2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
55 #include <miscfs/specfs/specdev.h>
58 #include <sys/ubc_internal.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <sys/kdebug.h>
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
78 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp
, off_t filesize
);
83 /* from bsd/hfs/hfs_vfsops.c */
84 extern int hfs_vfs_vget(struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
86 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
87 static int hfs_clonefile(struct vnode
*, int, int, int);
88 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
90 int flush_cache_on_write
= 0;
91 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
95 * Read data from a file.
98 hfs_vnop_read(struct vnop_read_args
*ap
)
100 uio_t uio
= ap
->a_uio
;
101 struct vnode
*vp
= ap
->a_vp
;
104 struct hfsmount
*hfsmp
;
107 off_t start_resid
= uio_resid(uio
);
108 off_t offset
= uio_offset(uio
);
112 /* Preflight checks */
113 if (!vnode_isreg(vp
)) {
114 /* can only read regular files */
120 if (start_resid
== 0)
121 return (0); /* Nothing left to do */
123 return (EINVAL
); /* cant read from a negative offset */
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp
, 0);
132 filesize
= fp
->ff_size
;
133 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
134 if (offset
> filesize
) {
135 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
136 (offset
> (off_t
)MAXHFSFILESIZE
)) {
142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
143 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
145 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
147 cp
->c_touch_acctime
= TRUE
;
149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
150 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
153 * Keep track blocks read
155 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
156 int took_cnode_lock
= 0;
159 bytesread
= start_resid
- uio_resid(uio
);
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
163 hfs_lock(cp
, HFS_FORCE_LOCK
);
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
170 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
173 fp
->ff_bytesread
= bytesread
;
175 cp
->c_atime
= tv
.tv_sec
;
177 fp
->ff_bytesread
+= bytesread
;
183 hfs_unlock_truncate(cp
, 0);
188 * Write data to a file.
191 hfs_vnop_write(struct vnop_write_args
*ap
)
193 uio_t uio
= ap
->a_uio
;
194 struct vnode
*vp
= ap
->a_vp
;
197 struct hfsmount
*hfsmp
;
198 kauth_cred_t cred
= NULL
;
201 off_t bytesToAdd
= 0;
202 off_t actualBytesAdded
;
207 int ioflag
= ap
->a_ioflag
;
210 int cnode_locked
= 0;
211 int partialwrite
= 0;
212 int exclusive_lock
= 0;
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid
= uio_resid(uio
);
216 offset
= uio_offset(uio
);
218 if (ioflag
& IO_APPEND
) {
226 if (!vnode_isreg(vp
))
227 return (EPERM
); /* Can only write regular files */
233 eflags
= kEFDeferMask
; /* defer file block allocations */
234 #ifdef HFS_SPARSE_DEV
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
240 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
241 (hfs_freeblks(hfsmp
, 0) < 2048)) {
242 eflags
&= ~kEFDeferMask
;
245 #endif /* HFS_SPARSE_DEV */
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp
, exclusive_lock
);
251 if (ioflag
& IO_APPEND
) {
252 uio_setoffset(uio
, fp
->ff_size
);
253 offset
= fp
->ff_size
;
255 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
260 origFileSize
= fp
->ff_size
;
261 writelimit
= offset
+ resid
;
262 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
269 if ((exclusive_lock
== 0) &&
270 ((fp
->ff_unallocblocks
!= 0) || (writelimit
> filebytes
))) {
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
278 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
283 if (!exclusive_lock
) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
285 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
289 /* Check if we do not need to extend the file */
290 if (writelimit
<= filebytes
) {
294 cred
= vfs_context_ucred(ap
->a_context
);
295 bytesToAdd
= writelimit
- filebytes
;
298 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
304 if (hfs_start_transaction(hfsmp
) != 0) {
309 while (writelimit
> filebytes
) {
310 bytesToAdd
= writelimit
- filebytes
;
311 if (cred
&& suser(cred
, NULL
) != 0)
312 eflags
|= kEFReserveMask
;
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags
= SFL_BITMAP
;
316 if (overflow_extents(fp
))
317 lockflags
|= SFL_EXTENTS
;
318 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
322 fp
->ff_bytesread
= 0;
324 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
325 0, eflags
, &actualBytesAdded
));
327 hfs_systemfile_unlock(hfsmp
, lockflags
);
329 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
331 if (retval
!= E_NONE
)
333 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
335 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
337 (void) hfs_update(vp
, TRUE
);
338 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
339 (void) hfs_end_transaction(hfsmp
);
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
345 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
348 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
350 writelimit
= filebytes
;
353 if (retval
== E_NONE
) {
361 struct rl_entry
*invalid_range
;
363 if (writelimit
> fp
->ff_size
)
364 filesize
= writelimit
;
366 filesize
= fp
->ff_size
;
368 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
370 if (offset
<= fp
->ff_size
) {
371 zero_off
= offset
& ~PAGE_MASK_64
;
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
377 if (offset
> zero_off
) {
378 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
379 lflag
|= IO_HEADZEROFILL
;
382 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
384 /* The bytes between fp->ff_size and uio->uio_offset must never be
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
396 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
397 inval_end
= offset
& ~PAGE_MASK_64
;
398 zero_off
= fp
->ff_size
;
400 if ((fp
->ff_size
& PAGE_MASK_64
) &&
401 (rl_scan(&fp
->ff_invalidranges
,
404 &invalid_range
) != RL_NOOVERLAP
)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
413 if (inval_end
> eof_page_base
) {
414 inval_start
= eof_page_base
;
416 zero_off
= eof_page_base
;
420 if (inval_start
< inval_end
) {
422 /* There's some range of data that's going to be marked invalid */
424 if (zero_off
< inval_start
) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
431 retval
= cluster_write(vp
, (uio_t
) 0,
432 fp
->ff_size
, inval_start
,
434 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
435 hfs_lock(cp
, HFS_FORCE_LOCK
);
437 if (retval
) goto ioerr_exit
;
438 offset
= uio_offset(uio
);
441 /* Mark the remaining area of the newly allocated space as invalid: */
442 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
444 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
445 zero_off
= fp
->ff_size
= inval_end
;
448 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
454 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
455 if (tail_off
> filesize
) tail_off
= filesize
;
456 if (tail_off
> writelimit
) {
457 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
458 lflag
|= IO_TAILZEROFILL
;
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
472 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
473 if (io_start
< fp
->ff_size
) {
476 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
477 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
484 * We need to tell UBC the fork's new size BEFORE calling
485 * cluster_write, in case any of the new pages need to be
486 * paged out before cluster_write completes (which does happen
487 * in embedded systems due to extreme memory pressure).
488 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
489 * will be, so that it can pass that on to cluster_pageout, and
490 * allow those pageouts.
492 * We don't update ff_size yet since we don't want pageins to
493 * be able to see uninitialized data between the old and new
494 * EOF, until cluster_write has completed and initialized that
497 * The vnode pager relies on the file size last given to UBC via
498 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
499 * ff_size (whichever is larger). NOTE: ff_new_size is always
500 * zero, unless we are extending the file via write.
502 if (filesize
> fp
->ff_size
) {
503 fp
->ff_new_size
= filesize
;
504 ubc_setsize(vp
, filesize
);
506 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
507 tail_off
, lflag
| IO_NOZERODIRTY
);
509 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
510 if (filesize
> origFileSize
) {
511 ubc_setsize(vp
, origFileSize
);
516 if (filesize
> origFileSize
) {
517 fp
->ff_size
= filesize
;
519 /* Files that are changing size are not hot file candidates. */
520 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
521 fp
->ff_bytesread
= 0;
524 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
526 /* If we wrote some bytes, then touch the change and mod times */
527 if (resid
> uio_resid(uio
)) {
528 cp
->c_touch_chgtime
= TRUE
;
529 cp
->c_touch_modtime
= TRUE
;
533 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
537 // XXXdbg - see radar 4871353 for more info
539 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
540 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
543 HFS_KNOTE(vp
, NOTE_WRITE
);
547 * If we successfully wrote any data, and we are not the superuser
548 * we clear the setuid and setgid bits as a precaution against
551 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
552 cred
= vfs_context_ucred(ap
->a_context
);
553 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
555 hfs_lock(cp
, HFS_FORCE_LOCK
);
558 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
562 if (ioflag
& IO_UNIT
) {
564 hfs_lock(cp
, HFS_FORCE_LOCK
);
567 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
569 // LP64todo - fix this! resid needs to by user_ssize_t
570 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
571 uio_setresid(uio
, resid
);
572 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
574 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
576 hfs_lock(cp
, HFS_FORCE_LOCK
);
579 retval
= hfs_update(vp
, TRUE
);
581 /* Updating vcbWrCnt doesn't need to be atomic. */
584 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
585 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
589 hfs_unlock_truncate(cp
, exclusive_lock
);
593 /* support for the "bulk-access" fcntl */
595 #define CACHE_LEVELS 16
596 #define NUM_CACHE_ENTRIES (64*16)
597 #define PARENT_IDS_FLAG 0x100
599 struct access_cache
{
601 int cachehits
; /* these two for statistics gathering */
603 unsigned int *acache
;
604 unsigned char *haveaccess
;
608 uid_t uid
; /* IN: effective user id */
609 short flags
; /* IN: access requested (i.e. R_OK) */
610 short num_groups
; /* IN: number of groups user belongs to */
611 int num_files
; /* IN: number of files to process */
612 int *file_ids
; /* IN: array of file ids */
613 gid_t
*groups
; /* IN: array of groups */
614 short *access
; /* OUT: access info for each file (0 for 'has access') */
617 struct user_access_t
{
618 uid_t uid
; /* IN: effective user id */
619 short flags
; /* IN: access requested (i.e. R_OK) */
620 short num_groups
; /* IN: number of groups user belongs to */
621 int num_files
; /* IN: number of files to process */
622 user_addr_t file_ids
; /* IN: array of file ids */
623 user_addr_t groups
; /* IN: array of groups */
624 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
628 // these are the "extended" versions of the above structures
629 // note that it is crucial that they be different sized than
630 // the regular version
631 struct ext_access_t
{
632 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
633 uint32_t num_files
; /* IN: number of files to process */
634 uint32_t map_size
; /* IN: size of the bit map */
635 uint32_t *file_ids
; /* IN: Array of file ids */
636 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
637 short *access
; /* OUT: access info for each file (0 for 'has access') */
638 uint32_t num_parents
; /* future use */
639 cnid_t
*parents
; /* future use */
642 struct ext_user_access_t
{
643 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
644 uint32_t num_files
; /* IN: number of files to process */
645 uint32_t map_size
; /* IN: size of the bit map */
646 user_addr_t file_ids
; /* IN: array of file ids */
647 user_addr_t bitmap
; /* IN: array of groups */
648 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
649 uint32_t num_parents
;/* future use */
650 user_addr_t parents
;/* future use */
655 * Perform a binary search for the given parent_id. Return value is
656 * the index if there is a match. If no_match_indexp is non-NULL it
657 * will be assigned with the index to insert the item (even if it was
660 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
666 unsigned int mid
= ((hi
- lo
)/2) + lo
;
667 unsigned int this_id
= array
[mid
];
669 if (parent_id
== this_id
) {
674 if (parent_id
< this_id
) {
679 if (parent_id
> this_id
) {
685 /* check if lo and hi converged on the match */
686 if (parent_id
== array
[hi
]) {
690 if (no_match_indexp
) {
691 *no_match_indexp
= hi
;
699 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
703 int index
, no_match_index
;
705 if (cache
->numcached
== 0) {
707 return 0; // table is empty, so insert at index=0 and report no match
710 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
711 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
712 cache->numcached, NUM_CACHE_ENTRIES);*/
713 cache
->numcached
= NUM_CACHE_ENTRIES
;
716 hi
= cache
->numcached
- 1;
718 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
720 /* if no existing entry found, find index for new one */
722 index
= no_match_index
;
733 * Add a node to the access_cache at the given index (or do a lookup first
734 * to find the index if -1 is passed in). We currently do a replace rather
735 * than an insert if the cache is full.
738 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
740 int lookup_index
= -1;
742 /* need to do a lookup first if -1 passed for index */
744 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
745 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
746 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
747 cache
->haveaccess
[lookup_index
] = access
;
750 /* mission accomplished */
753 index
= lookup_index
;
758 /* if the cache is full, do a replace rather than an insert */
759 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
760 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
761 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
763 if (index
> cache
->numcached
) {
764 // printf("index %d pinned to %d\n", index, cache->numcached);
765 index
= cache
->numcached
;
769 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
773 if (index
>= 0 && index
< cache
->numcached
) {
774 /* only do bcopy if we're inserting */
775 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
776 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
779 cache
->acache
[index
] = nodeID
;
780 cache
->haveaccess
[index
] = access
;
794 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
796 struct cinfo
*cip
= (struct cinfo
*)arg
;
798 cip
->uid
= attrp
->ca_uid
;
799 cip
->gid
= attrp
->ca_gid
;
800 cip
->mode
= attrp
->ca_mode
;
801 cip
->parentcnid
= descp
->cd_parentcnid
;
802 cip
->recflags
= attrp
->ca_recflags
;
808 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
809 * isn't incore, then go to the catalog.
812 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
813 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
817 /* if this id matches the one the fsctl was called with, skip the lookup */
818 if (cnid
== skip_cp
->c_cnid
) {
819 cnattrp
->ca_uid
= skip_cp
->c_uid
;
820 cnattrp
->ca_gid
= skip_cp
->c_gid
;
821 cnattrp
->ca_mode
= skip_cp
->c_mode
;
822 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
826 /* otherwise, check the cnode hash incase the file/dir is incore */
827 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
828 cnattrp
->ca_uid
= c_info
.uid
;
829 cnattrp
->ca_gid
= c_info
.gid
;
830 cnattrp
->ca_mode
= c_info
.mode
;
831 cnattrp
->ca_recflags
= c_info
.recflags
;
832 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
836 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
838 /* lookup this cnid in the catalog */
839 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
841 hfs_systemfile_unlock(hfsmp
, lockflags
);
852 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
853 * up to CACHE_LEVELS as we progress towards the root.
856 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
857 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
,
858 struct vfs_context
*my_context
,
862 uint32_t num_parents
)
866 HFSCatalogNodeID thisNodeID
;
867 unsigned int myPerms
;
868 struct cat_attr cnattr
;
869 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
872 int i
= 0, ids_to_cache
= 0;
873 int parent_ids
[CACHE_LEVELS
];
876 while (thisNodeID
>= kRootDirID
) {
877 myResult
= 0; /* default to "no access" */
879 /* check the cache before resorting to hitting the catalog */
881 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
882 * to look any further after hitting cached dir */
884 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
886 myErr
= cache
->haveaccess
[cache_index
];
887 if (scope_index
!= -1) {
888 if (myErr
== ESRCH
) {
892 scope_index
= 0; // so we'll just use the cache result
893 scope_idx_start
= ids_to_cache
;
895 myResult
= (myErr
== 0) ? 1 : 0;
896 goto ExitThisRoutine
;
902 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
903 if (scope_index
== -1)
905 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
906 scope_idx_start
= ids_to_cache
;
910 /* remember which parents we want to cache */
911 if (ids_to_cache
< CACHE_LEVELS
) {
912 parent_ids
[ids_to_cache
] = thisNodeID
;
915 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
916 if (bitmap
&& map_size
) {
917 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
921 /* do the lookup (checks the cnode hash, then the catalog) */
922 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
924 goto ExitThisRoutine
; /* no access */
927 /* Root always gets access. */
928 if (suser(myp_ucred
, NULL
) == 0) {
929 thisNodeID
= catkey
.hfsPlus
.parentID
;
934 // if the thing has acl's, do the full permission check
935 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
938 /* get the vnode for this cnid */
939 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0);
942 goto ExitThisRoutine
;
945 thisNodeID
= VTOC(vp
)->c_parentcnid
;
947 hfs_unlock(VTOC(vp
));
949 if (vnode_vtype(vp
) == VDIR
) {
950 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
952 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
958 goto ExitThisRoutine
;
963 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
964 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
965 myp_ucred
, theProcPtr
);
967 if (cnattr
.ca_mode
& S_IFDIR
) {
972 if ( (myPerms
& flags
) != flags
) {
975 goto ExitThisRoutine
; /* no access */
978 /* up the hierarchy we go */
979 thisNodeID
= catkey
.hfsPlus
.parentID
;
983 /* if here, we have access to this node */
987 if (parents
&& myErr
== 0 && scope_index
== -1) {
996 /* cache the parent directory(ies) */
997 for (i
= 0; i
< ids_to_cache
; i
++) {
998 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
999 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1001 add_node(cache
, -1, parent_ids
[i
], myErr
);
1009 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1010 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1015 * NOTE: on entry, the vnode is locked. Incase this vnode
1016 * happens to be in our list of file_ids, we'll note it
1017 * avoid calling hfs_chashget_nowait() on that id as that
1018 * will cause a "locking against myself" panic.
1020 Boolean check_leaf
= true;
1022 struct ext_user_access_t
*user_access_structp
;
1023 struct ext_user_access_t tmp_user_access
;
1024 struct access_cache cache
;
1029 dev_t dev
= VTOC(vp
)->c_dev
;
1032 unsigned int num_files
= 0;
1034 int num_parents
= 0;
1038 cnid_t
*parents
=NULL
;
1042 cnid_t prevParent_cnid
= 0;
1043 unsigned int myPerms
;
1045 struct cat_attr cnattr
;
1047 struct cnode
*skip_cp
= VTOC(vp
);
1048 kauth_cred_t cred
= vfs_context_ucred(context
);
1049 proc_t p
= vfs_context_proc(context
);
1051 is64bit
= proc_is64bit(p
);
1053 /* initialize the local cache and buffers */
1054 cache
.numcached
= 0;
1055 cache
.cachehits
= 0;
1057 cache
.acache
= NULL
;
1058 cache
.haveaccess
= NULL
;
1060 /* struct copyin done during dispatch... need to copy file_id array separately */
1061 if (ap
->a_data
== NULL
) {
1063 goto err_exit_bulk_access
;
1067 if (arg_size
!= sizeof(struct ext_user_access_t
)) {
1069 goto err_exit_bulk_access
;
1072 user_access_structp
= (struct ext_user_access_t
*)ap
->a_data
;
1074 } else if (arg_size
== sizeof(struct access_t
)) {
1075 struct access_t
*accessp
= (struct access_t
*)ap
->a_data
;
1077 // convert an old style bulk-access struct to the new style
1078 tmp_user_access
.flags
= accessp
->flags
;
1079 tmp_user_access
.num_files
= accessp
->num_files
;
1080 tmp_user_access
.map_size
= 0;
1081 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1082 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1083 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1084 tmp_user_access
.num_parents
= 0;
1085 user_access_structp
= &tmp_user_access
;
1087 } else if (arg_size
== sizeof(struct ext_access_t
)) {
1088 struct ext_access_t
*accessp
= (struct ext_access_t
*)ap
->a_data
;
1090 // up-cast from a 32-bit version of the struct
1091 tmp_user_access
.flags
= accessp
->flags
;
1092 tmp_user_access
.num_files
= accessp
->num_files
;
1093 tmp_user_access
.map_size
= accessp
->map_size
;
1094 tmp_user_access
.num_parents
= accessp
->num_parents
;
1096 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1097 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1098 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1099 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1101 user_access_structp
= &tmp_user_access
;
1104 goto err_exit_bulk_access
;
1107 map_size
= user_access_structp
->map_size
;
1109 num_files
= user_access_structp
->num_files
;
1111 num_parents
= user_access_structp
->num_parents
;
1113 if (num_files
< 1) {
1114 goto err_exit_bulk_access
;
1116 if (num_files
> 1024) {
1118 goto err_exit_bulk_access
;
1121 if (num_parents
> 1024) {
1123 goto err_exit_bulk_access
;
1126 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1127 access
= (short *) kalloc(sizeof(short) * num_files
);
1129 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1133 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1136 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1137 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1139 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1141 kfree(file_ids
, sizeof(int) * num_files
);
1144 kfree(bitmap
, sizeof(char) * map_size
);
1147 kfree(access
, sizeof(short) * num_files
);
1150 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1152 if (cache
.haveaccess
) {
1153 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1156 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1161 // make sure the bitmap is zero'ed out...
1163 bzero(bitmap
, (sizeof(char) * map_size
));
1166 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1167 num_files
* sizeof(int)))) {
1168 goto err_exit_bulk_access
;
1172 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1173 num_parents
* sizeof(cnid_t
)))) {
1174 goto err_exit_bulk_access
;
1178 flags
= user_access_structp
->flags
;
1179 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1183 /* check if we've been passed leaf node ids or parent ids */
1184 if (flags
& PARENT_IDS_FLAG
) {
1188 /* Check access to each file_id passed in */
1189 for (i
= 0; i
< num_files
; i
++) {
1191 cnid
= (cnid_t
) file_ids
[i
];
1193 /* root always has access */
1194 if ((!parents
) && (!suser(cred
, NULL
))) {
1200 /* do the lookup (checks the cnode hash, then the catalog) */
1201 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
);
1203 access
[i
] = (short) error
;
1208 // Check if the leaf matches one of the parent scopes
1209 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1212 // if the thing has acl's, do the full permission check
1213 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1216 /* get the vnode for this cnid */
1217 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0);
1223 hfs_unlock(VTOC(cvp
));
1225 if (vnode_vtype(cvp
) == VDIR
) {
1226 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1228 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1237 /* before calling CheckAccess(), check the target file for read access */
1238 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1239 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1241 /* fail fast if no access */
1242 if ((myPerms
& flags
) == 0) {
1248 /* we were passed an array of parent ids */
1249 catkey
.hfsPlus
.parentID
= cnid
;
1252 /* if the last guy had the same parent and had access, we're done */
1253 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1259 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1260 skip_cp
, p
, cred
, dev
, context
,bitmap
, map_size
, parents
, num_parents
);
1262 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1263 access
[i
] = 0; // have access.. no errors to report
1265 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1268 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1271 /* copyout the access array */
1272 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1273 num_files
* sizeof (short)))) {
1274 goto err_exit_bulk_access
;
1276 if (map_size
&& bitmap
) {
1277 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1278 map_size
* sizeof (char)))) {
1279 goto err_exit_bulk_access
;
1284 err_exit_bulk_access
:
1286 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1289 kfree(file_ids
, sizeof(int) * num_files
);
1291 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1293 kfree(bitmap
, sizeof(char) * map_size
);
1295 kfree(access
, sizeof(short) * num_files
);
1297 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1298 if (cache
.haveaccess
)
1299 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1305 /* end "bulk-access" support */
1309 * Callback for use with freeze ioctl.
1312 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1314 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1320 * Control filesystem operating characteristics.
1323 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1328 vfs_context_t a_context;
1331 struct vnode
* vp
= ap
->a_vp
;
1332 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1333 vfs_context_t context
= ap
->a_context
;
1334 kauth_cred_t cred
= vfs_context_ucred(context
);
1335 proc_t p
= vfs_context_proc(context
);
1336 struct vfsstatfs
*vfsp
;
1339 is64bit
= proc_is64bit(p
);
1341 switch (ap
->a_command
) {
1345 struct vnode
*file_vp
;
1351 /* Caller must be owner of file system. */
1352 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1353 if (suser(cred
, NULL
) &&
1354 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1357 /* Target vnode must be file system's root. */
1358 if (!vnode_isvroot(vp
)) {
1361 bufptr
= (char *)ap
->a_data
;
1362 cnid
= strtoul(bufptr
, NULL
, 10);
1364 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1365 * origin list for us if needed, as opposed to calling hfs_vget, since
1366 * we will need it for the subsequent build_path call.
1368 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1371 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1385 /* Caller must be owner of file system. */
1386 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1387 if (suser(cred
, NULL
) &&
1388 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1391 /* Target vnode must be file system's root. */
1392 if (!vnode_isvroot(vp
)) {
1395 linkfileid
= *(cnid_t
*)ap
->a_data
;
1396 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1399 if ((error
= hfs_lookuplink(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1402 if (ap
->a_command
== HFS_NEXT_LINK
) {
1403 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1405 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1410 case HFS_RESIZE_PROGRESS
: {
1412 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1413 if (suser(cred
, NULL
) &&
1414 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1415 return (EACCES
); /* must be owner of file system */
1417 if (!vnode_isvroot(vp
)) {
1420 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1423 case HFS_RESIZE_VOLUME
: {
1427 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1428 if (suser(cred
, NULL
) &&
1429 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1430 return (EACCES
); /* must be owner of file system */
1432 if (!vnode_isvroot(vp
)) {
1435 newsize
= *(u_int64_t
*)ap
->a_data
;
1436 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1438 if (newsize
> cursize
) {
1439 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1440 } else if (newsize
< cursize
) {
1441 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1446 case HFS_CHANGE_NEXT_ALLOCATION
: {
1447 int error
= 0; /* Assume success */
1450 if (vnode_vfsisrdonly(vp
)) {
1453 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1454 if (suser(cred
, NULL
) &&
1455 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1456 return (EACCES
); /* must be owner of file system */
1458 if (!vnode_isvroot(vp
)) {
1461 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1462 location
= *(u_int32_t
*)ap
->a_data
;
1463 if ((location
>= hfsmp
->allocLimit
) &&
1464 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1466 goto fail_change_next_allocation
;
1468 /* Return previous value. */
1469 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1470 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1471 /* On magic value for location, set nextAllocation to next block
1472 * after metadata zone and set flag in mount structure to indicate
1473 * that nextAllocation should not be updated again.
1475 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1476 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1478 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1479 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1481 MarkVCBDirty(hfsmp
);
1482 fail_change_next_allocation
:
1483 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1487 #ifdef HFS_SPARSE_DEV
1488 case HFS_SETBACKINGSTOREINFO
: {
1489 struct vnode
* bsfs_rootvp
;
1490 struct vnode
* di_vp
;
1491 struct hfs_backingstoreinfo
*bsdata
;
1494 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1497 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1498 if (suser(cred
, NULL
) &&
1499 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1500 return (EACCES
); /* must be owner of file system */
1502 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1503 if (bsdata
== NULL
) {
1506 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1509 if ((error
= vnode_getwithref(di_vp
))) {
1510 file_drop(bsdata
->backingfd
);
1514 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1515 (void)vnode_put(di_vp
);
1516 file_drop(bsdata
->backingfd
);
1521 * Obtain the backing fs root vnode and keep a reference
1522 * on it. This reference will be dropped in hfs_unmount.
1524 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1526 (void)vnode_put(di_vp
);
1527 file_drop(bsdata
->backingfd
);
1530 vnode_ref(bsfs_rootvp
);
1531 vnode_put(bsfs_rootvp
);
1533 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1534 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1535 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1536 hfsmp
->hfs_sparsebandblks
*= 4;
1538 vfs_markdependency(hfsmp
->hfs_mp
);
1540 (void)vnode_put(di_vp
);
1541 file_drop(bsdata
->backingfd
);
1544 case HFS_CLRBACKINGSTOREINFO
: {
1545 struct vnode
* tmpvp
;
1547 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1548 if (suser(cred
, NULL
) &&
1549 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1550 return (EACCES
); /* must be owner of file system */
1552 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1553 hfsmp
->hfs_backingfs_rootvp
) {
1555 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1556 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1557 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1558 hfsmp
->hfs_sparsebandblks
= 0;
1563 #endif /* HFS_SPARSE_DEV */
1571 mp
= vnode_mount(vp
);
1572 hfsmp
= VFSTOHFS(mp
);
1577 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1579 // flush things before we get started to try and prevent
1580 // dirty data from being paged out while we're frozen.
1581 // note: can't do this after taking the lock as it will
1582 // deadlock against ourselves.
1583 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1584 hfs_global_exclusive_lock_acquire(hfsmp
);
1585 journal_flush(hfsmp
->jnl
);
1587 // don't need to iterate on all vnodes, we just need to
1588 // wait for writes to the system files and the device vnode
1589 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1590 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1591 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1592 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1593 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1594 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1595 if (hfsmp
->hfs_attribute_vp
)
1596 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1597 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1599 hfsmp
->hfs_freezing_proc
= current_proc();
1608 // if we're not the one who froze the fs then we
1610 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1614 // NOTE: if you add code here, also go check the
1615 // code that "thaws" the fs in hfs_vnop_close()
1617 hfsmp
->hfs_freezing_proc
= NULL
;
1618 hfs_global_exclusive_lock_release(hfsmp
);
1619 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1624 case HFS_BULKACCESS_FSCTL
: {
1627 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1632 size
= sizeof(struct user_access_t
);
1634 size
= sizeof(struct access_t
);
1637 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1640 case HFS_EXT_BULKACCESS_FSCTL
: {
1643 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1648 size
= sizeof(struct ext_user_access_t
);
1650 size
= sizeof(struct ext_access_t
);
1653 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1656 case HFS_SETACLSTATE
: {
1659 if (ap
->a_data
== NULL
) {
1663 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1664 state
= *(int *)ap
->a_data
;
1666 // super-user can enable or disable acl's on a volume.
1667 // the volume owner can only enable acl's
1668 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1671 if (state
== 0 || state
== 1)
1672 return hfs_set_volxattr(hfsmp
, HFS_SETACLSTATE
, state
);
1677 case HFS_SET_XATTREXTENTS_STATE
: {
1680 if (ap
->a_data
== NULL
) {
1684 state
= *(int *)ap
->a_data
;
1686 /* Super-user can enable or disable extent-based extended
1687 * attribute support on a volume
1692 if (state
== 0 || state
== 1)
1693 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1701 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1703 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1704 hfs_unlock(VTOC(vp
));
1711 register struct cnode
*cp
;
1714 if (!vnode_isreg(vp
))
1717 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1721 * used by regression test to determine if
1722 * all the dirty pages (via write) have been cleaned
1723 * after a call to 'fsysnc'.
1725 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1732 register struct radvisory
*ra
;
1733 struct filefork
*fp
;
1736 if (!vnode_isreg(vp
))
1739 ra
= (struct radvisory
*)(ap
->a_data
);
1742 /* Protect against a size change. */
1743 hfs_lock_truncate(VTOC(vp
), TRUE
);
1745 if (ra
->ra_offset
>= fp
->ff_size
) {
1748 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1751 hfs_unlock_truncate(VTOC(vp
), TRUE
);
1755 case F_READBOOTSTRAP
:
1756 case F_WRITEBOOTSTRAP
:
1758 struct vnode
*devvp
= NULL
;
1759 user_fbootstraptransfer_t
*user_bootstrapp
;
1763 daddr64_t blockNumber
;
1767 user_fbootstraptransfer_t user_bootstrap
;
1769 if (!vnode_isvroot(vp
))
1771 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1772 * to a user_fbootstraptransfer_t else we get a pointer to a
1773 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1776 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1779 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1780 user_bootstrapp
= &user_bootstrap
;
1781 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1782 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1783 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1785 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1788 devvp
= VTOHFS(vp
)->hfs_devvp
;
1789 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1790 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1791 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1792 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1794 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1796 while (uio_resid(auio
) > 0) {
1797 blockNumber
= uio_offset(auio
) / devBlockSize
;
1798 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1800 if (bp
) buf_brelse(bp
);
1805 blockOffset
= uio_offset(auio
) % devBlockSize
;
1806 xfersize
= devBlockSize
- blockOffset
;
1807 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1813 if (uio_rw(auio
) == UIO_WRITE
) {
1814 error
= VNOP_BWRITE(bp
);
1827 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1830 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1833 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1838 case HFS_GET_MOUNT_TIME
:
1840 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) hfsmp
->hfs_mount_time
;
1842 *(time_t *)(ap
->a_data
) = (time_t) hfsmp
->hfs_mount_time
;
1846 case HFS_GET_LAST_MTIME
:
1848 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) hfsmp
->hfs_last_mounted_mtime
;
1850 *(time_t *)(ap
->a_data
) = (time_t) hfsmp
->hfs_last_mounted_mtime
;
1854 case HFS_SET_BOOT_INFO
:
1855 if (!vnode_isvroot(vp
))
1857 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1858 return(EACCES
); /* must be superuser or owner of filesystem */
1859 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1860 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1861 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1862 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1865 case HFS_GET_BOOT_INFO
:
1866 if (!vnode_isvroot(vp
))
1868 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1869 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1870 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1873 case HFS_MARK_BOOT_CORRUPT
:
1874 /* Mark the boot volume corrupt by setting
1875 * kHFSVolumeInconsistentBit in the volume header. This will
1876 * force fsck_hfs on next mount.
1882 /* Allowed only on the root vnode of the boot volume */
1883 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
1884 !vnode_isvroot(vp
)) {
1888 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1889 hfs_mark_volume_inconsistent(hfsmp
);
1896 /* Should never get here */
1904 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1906 struct vnop_select_args {
1911 vfs_context_t a_context;
1916 * We should really check to see if I/O is possible.
1922 * Converts a logical block number to a physical block, and optionally returns
1923 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1924 * The physical block number is based on the device block size, currently its 512.
1925 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1928 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
1930 struct filefork
*fp
= VTOF(vp
);
1931 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1932 int retval
= E_NONE
;
1933 u_int32_t logBlockSize
;
1934 size_t bytesContAvail
= 0;
1935 off_t blockposition
;
1940 * Check for underlying vnode requests and ensure that logical
1941 * to physical mapping is requested.
1944 *vpp
= hfsmp
->hfs_devvp
;
1948 logBlockSize
= GetLogicalBlockSize(vp
);
1949 blockposition
= (off_t
)bn
* logBlockSize
;
1951 lockExtBtree
= overflow_extents(fp
);
1954 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
1956 retval
= MacToVFSError(
1957 MapFileBlockC (HFSTOVCB(hfsmp
),
1965 hfs_systemfile_unlock(hfsmp
, lockflags
);
1967 if (retval
== E_NONE
) {
1968 /* Figure out how many read ahead blocks there are */
1970 if (can_cluster(logBlockSize
)) {
1971 /* Make sure this result never goes negative: */
1972 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1982 * Convert logical block number to file offset.
1985 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1987 struct vnop_blktooff_args {
1994 if (ap
->a_vp
== NULL
)
1996 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2002 * Convert file offset to logical block number.
2005 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2007 struct vnop_offtoblk_args {
2010 daddr64_t *a_lblkno;
2014 if (ap
->a_vp
== NULL
)
2016 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2022 * Map file offset to physical block number.
2024 * If this function is called for write operation, and if the file
2025 * had virtual blocks allocated (delayed allocation), real blocks
2026 * are allocated by calling ExtendFileC().
2028 * If this function is called for read operation, and if the file
2029 * had virtual blocks allocated (delayed allocation), no change
2030 * to the size of file is done, and if required, rangelist is
2031 * searched for mapping.
2033 * System file cnodes are expected to be locked (shared or exclusive).
2036 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2038 struct vnop_blockmap_args {
2046 vfs_context_t a_context;
2050 struct vnode
*vp
= ap
->a_vp
;
2052 struct filefork
*fp
;
2053 struct hfsmount
*hfsmp
;
2054 size_t bytesContAvail
= 0;
2055 int retval
= E_NONE
;
2058 struct rl_entry
*invalid_range
;
2059 enum rl_overlaptype overlaptype
;
2063 /* Do not allow blockmap operation on a directory */
2064 if (vnode_isdir(vp
)) {
2069 * Check for underlying vnode requests and ensure that logical
2070 * to physical mapping is requested.
2072 if (ap
->a_bpn
== NULL
)
2075 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2076 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2077 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2086 /* Check virtual blocks only when performing write operation */
2087 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2088 if (hfs_start_transaction(hfsmp
) != 0) {
2094 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2096 } else if (overflow_extents(fp
)) {
2097 syslocks
= SFL_EXTENTS
;
2101 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2104 * Check for any delayed allocations.
2106 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2108 u_int32_t loanedBlocks
;
2111 // Make sure we have a transaction. It's possible
2112 // that we came in and fp->ff_unallocblocks was zero
2113 // but during the time we blocked acquiring the extents
2114 // btree, ff_unallocblocks became non-zero and so we
2115 // will need to start a transaction.
2117 if (started_tr
== 0) {
2119 hfs_systemfile_unlock(hfsmp
, lockflags
);
2126 * Note: ExtendFileC will Release any blocks on loan and
2127 * aquire real blocks. So we ask to extend by zero bytes
2128 * since ExtendFileC will account for the virtual blocks.
2131 loanedBlocks
= fp
->ff_unallocblocks
;
2132 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2133 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2136 fp
->ff_unallocblocks
= loanedBlocks
;
2137 cp
->c_blocks
+= loanedBlocks
;
2138 fp
->ff_blocks
+= loanedBlocks
;
2140 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2141 hfsmp
->loanedBlocks
+= loanedBlocks
;
2142 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2144 hfs_systemfile_unlock(hfsmp
, lockflags
);
2145 cp
->c_flag
|= C_MODIFIED
;
2147 (void) hfs_update(vp
, TRUE
);
2148 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2150 hfs_end_transaction(hfsmp
);
2157 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2158 ap
->a_bpn
, &bytesContAvail
);
2160 hfs_systemfile_unlock(hfsmp
, lockflags
);
2165 (void) hfs_update(vp
, TRUE
);
2166 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2167 hfs_end_transaction(hfsmp
);
2171 /* On write, always return error because virtual blocks, if any,
2172 * should have been allocated in ExtendFileC(). We do not
2173 * allocate virtual blocks on read, therefore return error
2174 * only if no virtual blocks are allocated. Otherwise we search
2175 * rangelist for zero-fills
2177 if ((MacToVFSError(retval
) != ERANGE
) ||
2178 (ap
->a_flags
& VNODE_WRITE
) ||
2179 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2183 /* Validate if the start offset is within logical file size */
2184 if (ap
->a_foffset
> fp
->ff_size
) {
2188 /* Searching file extents has failed for read operation, therefore
2189 * search rangelist for any uncommitted holes in the file.
2191 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2192 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2194 switch(overlaptype
) {
2195 case RL_OVERLAPISCONTAINED
:
2196 /* start_offset <= rl_start, end_offset >= rl_end */
2197 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2200 case RL_MATCHINGOVERLAP
:
2201 /* start_offset = rl_start, end_offset = rl_end */
2202 case RL_OVERLAPCONTAINSRANGE
:
2203 /* start_offset >= rl_start, end_offset <= rl_end */
2204 case RL_OVERLAPSTARTSBEFORE
:
2205 /* start_offset > rl_start, end_offset >= rl_start */
2206 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2207 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2209 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2211 if (bytesContAvail
> ap
->a_size
) {
2212 bytesContAvail
= ap
->a_size
;
2214 *ap
->a_bpn
= (daddr64_t
)-1;
2217 case RL_OVERLAPENDSAFTER
:
2218 /* start_offset < rl_start, end_offset < rl_end */
2225 /* MapFileC() found a valid extent in the filefork. Search the
2226 * mapping information further for invalid file ranges
2228 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2229 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2231 if (overlaptype
!= RL_NOOVERLAP
) {
2232 switch(overlaptype
) {
2233 case RL_MATCHINGOVERLAP
:
2234 case RL_OVERLAPCONTAINSRANGE
:
2235 case RL_OVERLAPSTARTSBEFORE
:
2236 /* There's no valid block for this byte offset */
2237 *ap
->a_bpn
= (daddr64_t
)-1;
2238 /* There's no point limiting the amount to be returned
2239 * if the invalid range that was hit extends all the way
2240 * to the EOF (i.e. there's no valid bytes between the
2241 * end of this range and the file's EOF):
2243 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2244 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2245 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2249 case RL_OVERLAPISCONTAINED
:
2250 case RL_OVERLAPENDSAFTER
:
2251 /* The range of interest hits an invalid block before the end: */
2252 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2253 /* There's actually no valid information to be had starting here: */
2254 *ap
->a_bpn
= (daddr64_t
)-1;
2255 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2256 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2257 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2260 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2267 if (bytesContAvail
> ap
->a_size
)
2268 bytesContAvail
= ap
->a_size
;
2274 *ap
->a_run
= bytesContAvail
;
2277 *(int *)ap
->a_poff
= 0;
2283 return (MacToVFSError(retval
));
2288 * prepare and issue the I/O
2289 * buf_strategy knows how to deal
2290 * with requests that require
2294 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2296 buf_t bp
= ap
->a_bp
;
2297 vnode_t vp
= buf_vnode(bp
);
2299 return (buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
));
2304 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, vfs_context_t context
)
2306 register struct cnode
*cp
= VTOC(vp
);
2307 struct filefork
*fp
= VTOF(vp
);
2308 struct proc
*p
= vfs_context_proc(context
);;
2309 kauth_cred_t cred
= vfs_context_ucred(context
);
2312 off_t actualBytesAdded
;
2316 struct hfsmount
*hfsmp
;
2319 blksize
= VTOVCB(vp
)->blockSize
;
2320 fileblocks
= fp
->ff_blocks
;
2321 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2323 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2324 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2329 /* This should only happen with a corrupt filesystem */
2330 if ((off_t
)fp
->ff_size
< 0)
2333 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2340 /* Files that are changing size are not hot file candidates. */
2341 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2342 fp
->ff_bytesread
= 0;
2346 * We cannot just check if fp->ff_size == length (as an optimization)
2347 * since there may be extra physical blocks that also need truncation.
2350 if ((retval
= hfs_getinoquota(cp
)))
2355 * Lengthen the size of the file. We must ensure that the
2356 * last byte of the file is allocated. Since the smallest
2357 * value of ff_size is 0, length will be at least 1.
2359 if (length
> (off_t
)fp
->ff_size
) {
2361 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2367 * If we don't have enough physical space then
2368 * we need to extend the physical size.
2370 if (length
> filebytes
) {
2372 u_long blockHint
= 0;
2374 /* All or nothing and don't round up to clumpsize. */
2375 eflags
= kEFAllMask
| kEFNoClumpMask
;
2377 if (cred
&& suser(cred
, NULL
) != 0)
2378 eflags
|= kEFReserveMask
; /* keep a reserve */
2381 * Allocate Journal and Quota files in metadata zone.
2383 if (filebytes
== 0 &&
2384 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2385 hfs_virtualmetafile(cp
)) {
2386 eflags
|= kEFMetadataMask
;
2387 blockHint
= hfsmp
->hfs_metazone_start
;
2389 if (hfs_start_transaction(hfsmp
) != 0) {
2394 /* Protect extents b-tree and allocation bitmap */
2395 lockflags
= SFL_BITMAP
;
2396 if (overflow_extents(fp
))
2397 lockflags
|= SFL_EXTENTS
;
2398 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2400 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2401 bytesToAdd
= length
- filebytes
;
2402 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2407 &actualBytesAdded
));
2409 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2410 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2411 if (length
> filebytes
)
2417 hfs_systemfile_unlock(hfsmp
, lockflags
);
2420 (void) hfs_update(vp
, TRUE
);
2421 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2424 hfs_end_transaction(hfsmp
);
2429 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2430 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2433 if (!(flags
& IO_NOZEROFILL
)) {
2434 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2435 struct rl_entry
*invalid_range
;
2438 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2439 if (length
< zero_limit
) zero_limit
= length
;
2441 if (length
> (off_t
)fp
->ff_size
) {
2444 /* Extending the file: time to fill out the current last page w. zeroes? */
2445 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2446 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2447 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2449 /* There's some valid data at the start of the (current) last page
2450 of the file, so zero out the remainder of that page to ensure the
2451 entire page contains valid data. Since there is no invalid range
2452 possible past the (current) eof, there's no need to remove anything
2453 from the invalid range list before calling cluster_write(): */
2455 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2456 fp
->ff_size
, (off_t
)0,
2457 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2458 hfs_lock(cp
, HFS_FORCE_LOCK
);
2459 if (retval
) goto Err_Exit
;
2461 /* Merely invalidate the remaining area, if necessary: */
2462 if (length
> zero_limit
) {
2464 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2465 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2468 /* The page containing the (current) eof is invalid: just add the
2469 remainder of the page to the invalid list, along with the area
2470 being newly allocated:
2473 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2474 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2478 panic("hfs_truncate: invoked on non-UBC object?!");
2481 cp
->c_touch_modtime
= TRUE
;
2482 fp
->ff_size
= length
;
2484 } else { /* Shorten the size of the file */
2486 if ((off_t
)fp
->ff_size
> length
) {
2487 /* Any space previously marked as invalid is now irrelevant: */
2488 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2492 * Account for any unmapped blocks. Note that the new
2493 * file length can still end up with unmapped blocks.
2495 if (fp
->ff_unallocblocks
> 0) {
2496 u_int32_t finalblks
;
2497 u_int32_t loanedBlocks
;
2499 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2501 loanedBlocks
= fp
->ff_unallocblocks
;
2502 cp
->c_blocks
-= loanedBlocks
;
2503 fp
->ff_blocks
-= loanedBlocks
;
2504 fp
->ff_unallocblocks
= 0;
2506 hfsmp
->loanedBlocks
-= loanedBlocks
;
2508 finalblks
= (length
+ blksize
- 1) / blksize
;
2509 if (finalblks
> fp
->ff_blocks
) {
2510 /* calculate required unmapped blocks */
2511 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2512 hfsmp
->loanedBlocks
+= loanedBlocks
;
2514 fp
->ff_unallocblocks
= loanedBlocks
;
2515 cp
->c_blocks
+= loanedBlocks
;
2516 fp
->ff_blocks
+= loanedBlocks
;
2518 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2522 * For a TBE process the deallocation of the file blocks is
2523 * delayed until the file is closed. And hfs_close calls
2524 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2525 * isn't set, we make sure this isn't a TBE process.
2527 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2529 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2531 if (hfs_start_transaction(hfsmp
) != 0) {
2536 if (fp
->ff_unallocblocks
== 0) {
2537 /* Protect extents b-tree and allocation bitmap */
2538 lockflags
= SFL_BITMAP
;
2539 if (overflow_extents(fp
))
2540 lockflags
|= SFL_EXTENTS
;
2541 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2543 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2544 (FCB
*)fp
, length
, false));
2546 hfs_systemfile_unlock(hfsmp
, lockflags
);
2550 fp
->ff_size
= length
;
2552 (void) hfs_update(vp
, TRUE
);
2553 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2556 hfs_end_transaction(hfsmp
);
2558 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2562 /* These are bytesreleased */
2563 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2566 /* Only set update flag if the logical length changes */
2567 if ((off_t
)fp
->ff_size
!= length
)
2568 cp
->c_touch_modtime
= TRUE
;
2569 fp
->ff_size
= length
;
2571 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2572 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2573 retval
= hfs_update(vp
, MNT_WAIT
);
2575 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2576 -1, -1, -1, retval
, 0);
2581 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2582 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2590 * Truncate a cnode to at most length size, freeing (or adding) the
2595 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2596 vfs_context_t context
)
2598 struct filefork
*fp
= VTOF(vp
);
2601 int blksize
, error
= 0;
2602 struct cnode
*cp
= VTOC(vp
);
2604 /* Cannot truncate an HFS directory! */
2605 if (vnode_isdir(vp
)) {
2608 /* A swap file cannot change size. */
2609 if (vnode_isswap(vp
) && (length
!= 0)) {
2613 blksize
= VTOVCB(vp
)->blockSize
;
2614 fileblocks
= fp
->ff_blocks
;
2615 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2618 // Have to do this here so that we don't wind up with
2619 // i/o pending for blocks that are about to be released
2620 // if we truncate the file.
2622 // If skipsetsize is set, then the caller is responsible
2623 // for the ubc_setsize.
2626 ubc_setsize(vp
, length
);
2628 // have to loop truncating or growing files that are
2629 // really big because otherwise transactions can get
2630 // enormous and consume too many kernel resources.
2632 if (length
< filebytes
) {
2633 while (filebytes
> length
) {
2634 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2635 filebytes
-= HFS_BIGFILE_SIZE
;
2639 cp
->c_flag
|= C_FORCEUPDATE
;
2640 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2644 } else if (length
> filebytes
) {
2645 while (filebytes
< length
) {
2646 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2647 filebytes
+= HFS_BIGFILE_SIZE
;
2651 cp
->c_flag
|= C_FORCEUPDATE
;
2652 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2656 } else /* Same logical size */ {
2658 error
= do_hfs_truncate(vp
, length
, flags
, context
);
2660 /* Files that are changing size are not hot file candidates. */
2661 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2662 fp
->ff_bytesread
= 0;
2671 * Preallocate file storage space.
2674 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2678 off_t *a_bytesallocated;
2680 vfs_context_t a_context;
2683 struct vnode
*vp
= ap
->a_vp
;
2685 struct filefork
*fp
;
2687 off_t length
= ap
->a_length
;
2689 off_t moreBytesRequested
;
2690 off_t actualBytesAdded
;
2693 int retval
, retval2
;
2694 u_int32_t blockHint
;
2695 u_int32_t extendFlags
; /* For call to ExtendFileC */
2696 struct hfsmount
*hfsmp
;
2697 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2700 *(ap
->a_bytesallocated
) = 0;
2702 if (!vnode_isreg(vp
))
2704 if (length
< (off_t
)0)
2709 hfs_lock_truncate(cp
, TRUE
);
2711 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2719 fileblocks
= fp
->ff_blocks
;
2720 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2722 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2727 /* Fill in the flags word for the call to Extend the file */
2729 extendFlags
= kEFNoClumpMask
;
2730 if (ap
->a_flags
& ALLOCATECONTIG
)
2731 extendFlags
|= kEFContigMask
;
2732 if (ap
->a_flags
& ALLOCATEALL
)
2733 extendFlags
|= kEFAllMask
;
2734 if (cred
&& suser(cred
, NULL
) != 0)
2735 extendFlags
|= kEFReserveMask
;
2739 startingPEOF
= filebytes
;
2741 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2742 length
+= filebytes
;
2743 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2744 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2746 /* If no changes are necesary, then we're done */
2747 if (filebytes
== length
)
2751 * Lengthen the size of the file. We must ensure that the
2752 * last byte of the file is allocated. Since the smallest
2753 * value of filebytes is 0, length will be at least 1.
2755 if (length
> filebytes
) {
2756 off_t total_bytes_added
= 0, orig_request_size
;
2758 orig_request_size
= moreBytesRequested
= length
- filebytes
;
2761 retval
= hfs_chkdq(cp
,
2762 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2769 * Metadata zone checks.
2771 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2773 * Allocate Journal and Quota files in metadata zone.
2775 if (hfs_virtualmetafile(cp
)) {
2776 extendFlags
|= kEFMetadataMask
;
2777 blockHint
= hfsmp
->hfs_metazone_start
;
2778 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2779 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2781 * Move blockHint outside metadata zone.
2783 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2788 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2789 off_t bytesRequested
;
2791 if (hfs_start_transaction(hfsmp
) != 0) {
2796 /* Protect extents b-tree and allocation bitmap */
2797 lockflags
= SFL_BITMAP
;
2798 if (overflow_extents(fp
))
2799 lockflags
|= SFL_EXTENTS
;
2800 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2802 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
2803 bytesRequested
= HFS_BIGFILE_SIZE
;
2805 bytesRequested
= moreBytesRequested
;
2808 retval
= MacToVFSError(ExtendFileC(vcb
,
2813 &actualBytesAdded
));
2815 if (retval
== E_NONE
) {
2816 *(ap
->a_bytesallocated
) += actualBytesAdded
;
2817 total_bytes_added
+= actualBytesAdded
;
2818 moreBytesRequested
-= actualBytesAdded
;
2819 if (blockHint
!= 0) {
2820 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
2823 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2825 hfs_systemfile_unlock(hfsmp
, lockflags
);
2828 (void) hfs_update(vp
, TRUE
);
2829 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2832 hfs_end_transaction(hfsmp
);
2837 * if we get an error and no changes were made then exit
2838 * otherwise we must do the hfs_update to reflect the changes
2840 if (retval
&& (startingPEOF
== filebytes
))
2844 * Adjust actualBytesAdded to be allocation block aligned, not
2845 * clump size aligned.
2846 * NOTE: So what we are reporting does not affect reality
2847 * until the file is closed, when we truncate the file to allocation
2850 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
2851 *(ap
->a_bytesallocated
) =
2852 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
2854 } else { /* Shorten the size of the file */
2856 if (fp
->ff_size
> length
) {
2858 * Any buffers that are past the truncation point need to be
2859 * invalidated (to maintain buffer cache consistency).
2863 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
2864 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2867 * if we get an error and no changes were made then exit
2868 * otherwise we must do the hfs_update to reflect the changes
2870 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2872 /* These are bytesreleased */
2873 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2876 if (fp
->ff_size
> filebytes
) {
2877 fp
->ff_size
= filebytes
;
2880 ubc_setsize(vp
, fp
->ff_size
);
2881 hfs_lock(cp
, HFS_FORCE_LOCK
);
2886 cp
->c_touch_chgtime
= TRUE
;
2887 cp
->c_touch_modtime
= TRUE
;
2888 retval2
= hfs_update(vp
, MNT_WAIT
);
2893 hfs_unlock_truncate(cp
, TRUE
);
2900 * Pagein for HFS filesystem
2903 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2905 struct vnop_pagein_args {
2908 vm_offset_t a_pl_offset,
2912 vfs_context_t a_context;
2916 vnode_t vp
= ap
->a_vp
;
2919 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2920 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2922 * Keep track of blocks read.
2924 if (!vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2926 struct filefork
*fp
;
2928 int took_cnode_lock
= 0;
2933 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2934 bytesread
= fp
->ff_size
;
2936 bytesread
= ap
->a_size
;
2938 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2939 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
2940 hfs_lock(cp
, HFS_FORCE_LOCK
);
2941 took_cnode_lock
= 1;
2944 * If this file hasn't been seen since the start of
2945 * the current sampling period then start over.
2947 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2950 fp
->ff_bytesread
= bytesread
;
2952 cp
->c_atime
= tv
.tv_sec
;
2954 fp
->ff_bytesread
+= bytesread
;
2956 cp
->c_touch_acctime
= TRUE
;
2957 if (took_cnode_lock
)
2964 * Pageout for HFS filesystem.
2967 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2969 struct vnop_pageout_args {
2972 vm_offset_t a_pl_offset,
2976 vfs_context_t a_context;
2980 vnode_t vp
= ap
->a_vp
;
2982 struct filefork
*fp
;
2990 * Figure out where the file ends, for pageout purposes. If
2991 * ff_new_size > ff_size, then we're in the middle of extending the
2992 * file via a write, so it is safe (and necessary) that we be able
2993 * to pageout up to that point.
2995 filesize
= fp
->ff_size
;
2996 if (fp
->ff_new_size
> filesize
)
2997 filesize
= fp
->ff_new_size
;
2999 if (!vnode_isswap(vp
)) {
3003 if (cp
->c_lockowner
!= current_thread()) {
3004 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3005 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
3006 ubc_upl_abort_range(ap
->a_pl
,
3009 UPL_ABORT_FREE_ON_EMPTY
);
3016 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
3018 if (end_of_range
>= filesize
) {
3019 end_of_range
= (off_t
)(filesize
- 1);
3021 if (ap
->a_f_offset
< filesize
) {
3022 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
3023 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
3031 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3032 ap
->a_size
, filesize
, ap
->a_flags
);
3035 * If data was written, and setuid or setgid bits are set and
3036 * this process is not the superuser then clear the setuid and
3037 * setgid bits as a precaution against tampering.
3039 if ((retval
== 0) &&
3040 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
3041 (vfs_context_suser(ap
->a_context
) != 0)) {
3042 hfs_lock(cp
, HFS_FORCE_LOCK
);
3043 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3044 cp
->c_touch_chgtime
= TRUE
;
3051 * Intercept B-Tree node writes to unswap them if necessary.
3054 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
3057 register struct buf
*bp
= ap
->a_bp
;
3058 register struct vnode
*vp
= buf_vnode(bp
);
3059 BlockDescriptor block
;
3061 /* Trap B-Tree writes */
3062 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
3063 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
3064 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
3065 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
3068 * Swap and validate the node if it is in native byte order.
3069 * This is always be true on big endian, so we always validate
3070 * before writing here. On little endian, the node typically has
3071 * been swapped and validated when it was written to the journal,
3072 * so we won't do anything here.
3074 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
3075 /* Prepare the block pointer */
3076 block
.blockHeader
= bp
;
3077 block
.buffer
= (char *)buf_dataptr(bp
);
3078 block
.blockNum
= buf_lblkno(bp
);
3079 /* not found in cache ==> came from disk */
3080 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
3081 block
.blockSize
= buf_count(bp
);
3083 /* Endian un-swap B-Tree node */
3084 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
3086 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3090 /* This buffer shouldn't be locked anymore but if it is clear it */
3091 if ((buf_flags(bp
) & B_LOCKED
)) {
3093 if (VTOHFS(vp
)->jnl
) {
3094 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
3096 buf_clearflags(bp
, B_LOCKED
);
3098 retval
= vn_bwrite (ap
);
3104 * Relocate a file to a new location on disk
3105 * cnode must be locked on entry
3107 * Relocation occurs by cloning the file's data from its
3108 * current set of blocks to a new set of blocks. During
3109 * the relocation all of the blocks (old and new) are
3110 * owned by the file.
3117 * ----------------- -----------------
3118 * |///////////////| | | STEP 1 (acquire new blocks)
3119 * ----------------- -----------------
3122 * ----------------- -----------------
3123 * |///////////////| |///////////////| STEP 2 (clone data)
3124 * ----------------- -----------------
3128 * |///////////////| STEP 3 (head truncate blocks)
3132 * During steps 2 and 3 page-outs to file offsets less
3133 * than or equal to N are suspended.
3135 * During step 3 page-ins to the file get suspended.
3139 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
3143 struct filefork
*fp
;
3144 struct hfsmount
*hfsmp
;
3149 u_int32_t nextallocsave
;
3150 daddr64_t sector_a
, sector_b
;
3155 int took_trunc_lock
= 0;
3157 enum vtype vnodetype
;
3159 vnodetype
= vnode_vtype(vp
);
3160 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
3165 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
3171 if (fp
->ff_unallocblocks
)
3173 blksize
= hfsmp
->blockSize
;
3175 blockHint
= hfsmp
->nextAllocation
;
3177 if ((fp
->ff_size
> 0x7fffffff) ||
3178 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
3183 // We do not believe that this call to hfs_fsync() is
3184 // necessary and it causes a journal transaction
3185 // deadlock so we are removing it.
3187 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3188 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3193 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
3195 hfs_lock_truncate(cp
, TRUE
);
3196 /* Force lock since callers expects lock to be held. */
3197 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
3198 hfs_unlock_truncate(cp
, TRUE
);
3201 /* No need to continue if file was removed. */
3202 if (cp
->c_flag
& C_NOEXISTS
) {
3203 hfs_unlock_truncate(cp
, TRUE
);
3206 took_trunc_lock
= 1;
3208 headblks
= fp
->ff_blocks
;
3209 datablks
= howmany(fp
->ff_size
, blksize
);
3210 growsize
= datablks
* blksize
;
3211 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
3212 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
3213 blockHint
<= hfsmp
->hfs_metazone_end
)
3214 eflags
|= kEFMetadataMask
;
3216 if (hfs_start_transaction(hfsmp
) != 0) {
3217 if (took_trunc_lock
)
3218 hfs_unlock_truncate(cp
, TRUE
);
3223 * Protect the extents b-tree and the allocation bitmap
3224 * during MapFileBlockC and ExtendFileC operations.
3226 lockflags
= SFL_BITMAP
;
3227 if (overflow_extents(fp
))
3228 lockflags
|= SFL_EXTENTS
;
3229 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3231 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
3233 retval
= MacToVFSError(retval
);
3238 * STEP 1 - acquire new allocation blocks.
3240 nextallocsave
= hfsmp
->nextAllocation
;
3241 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
3242 if (eflags
& kEFMetadataMask
) {
3243 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3244 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
3245 MarkVCBDirty(hfsmp
);
3246 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3249 retval
= MacToVFSError(retval
);
3251 cp
->c_flag
|= C_MODIFIED
;
3252 if (newbytes
< growsize
) {
3255 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
3256 printf("hfs_relocate: allocation failed");
3261 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
3263 retval
= MacToVFSError(retval
);
3264 } else if ((sector_a
+ 1) == sector_b
) {
3267 } else if ((eflags
& kEFMetadataMask
) &&
3268 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
3269 hfsmp
->hfs_metazone_end
)) {
3270 const char * filestr
;
3271 char emptystr
= '\0';
3273 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
3274 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
3275 } else if (vnode_name(vp
) != NULL
) {
3276 filestr
= vnode_name(vp
);
3278 filestr
= &emptystr
;
3280 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr
, fp
->ff_blocks
);
3285 /* Done with system locks and journal for now. */
3286 hfs_systemfile_unlock(hfsmp
, lockflags
);
3288 hfs_end_transaction(hfsmp
);
3293 * Check to see if failure is due to excessive fragmentation.
3295 if ((retval
== ENOSPC
) &&
3296 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
3297 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
3302 * STEP 2 - clone file data into the new allocation blocks.
3305 if (vnodetype
== VLNK
)
3306 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
3307 else if (vnode_issystem(vp
))
3308 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
3310 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
3312 /* Start transaction for step 3 or for a restore. */
3313 if (hfs_start_transaction(hfsmp
) != 0) {
3322 * STEP 3 - switch to cloned data and remove old blocks.
3324 lockflags
= SFL_BITMAP
;
3325 if (overflow_extents(fp
))
3326 lockflags
|= SFL_EXTENTS
;
3327 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3329 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
3331 hfs_systemfile_unlock(hfsmp
, lockflags
);
3336 if (took_trunc_lock
)
3337 hfs_unlock_truncate(cp
, TRUE
);
3340 hfs_systemfile_unlock(hfsmp
, lockflags
);
3344 /* Push cnode's new extent data to disk. */
3346 (void) hfs_update(vp
, MNT_WAIT
);
3349 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
3350 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
3352 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
3356 hfs_end_transaction(hfsmp
);
3361 if (fp
->ff_blocks
== headblks
) {
3362 if (took_trunc_lock
)
3363 hfs_unlock_truncate(cp
, TRUE
);
3367 * Give back any newly allocated space.
3369 if (lockflags
== 0) {
3370 lockflags
= SFL_BITMAP
;
3371 if (overflow_extents(fp
))
3372 lockflags
|= SFL_EXTENTS
;
3373 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3376 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
3378 hfs_systemfile_unlock(hfsmp
, lockflags
);
3381 if (took_trunc_lock
)
3382 hfs_unlock_truncate(cp
, TRUE
);
3392 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
3394 struct buf
*head_bp
= NULL
;
3395 struct buf
*tail_bp
= NULL
;
3399 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
3403 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
3404 if (tail_bp
== NULL
) {
3408 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
3409 error
= (int)buf_bwrite(tail_bp
);
3412 buf_markinvalid(head_bp
);
3413 buf_brelse(head_bp
);
3415 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
3421 * Clone a file's data within the file.
3425 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
3437 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
3438 writebase
= blkstart
* blksize
;
3439 copysize
= blkcnt
* blksize
;
3440 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
3443 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3446 hfs_unlock(VTOC(vp
));
3448 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
3450 while (offset
< copysize
) {
3451 iosize
= MIN(copysize
- offset
, iosize
);
3453 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
3454 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3456 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
3458 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
3461 if (uio_resid(auio
) != 0) {
3462 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
3467 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
3468 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3470 error
= cluster_write(vp
, auio
, filesize
+ offset
,
3471 filesize
+ offset
+ iosize
,
3472 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
3474 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
3477 if (uio_resid(auio
) != 0) {
3478 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3487 * No need to call ubc_sync_range or hfs_invalbuf
3488 * since the file was copied using IO_NOCACHE.
3491 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3493 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
3498 * Clone a system (metadata) file.
3502 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3503 kauth_cred_t cred
, struct proc
*p
)
3509 struct buf
*bp
= NULL
;
3512 daddr64_t start_blk
;
3519 iosize
= GetLogicalBlockSize(vp
);
3520 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3521 breadcnt
= bufsize
/ iosize
;
3523 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3526 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3527 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3530 while (blkno
< last_blk
) {
3532 * Read up to a megabyte
3535 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3536 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3538 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3541 if (buf_count(bp
) != iosize
) {
3542 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3545 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3547 buf_markinvalid(bp
);
3555 * Write up to a megabyte
3558 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3559 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3561 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3565 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3566 error
= (int)buf_bwrite(bp
);
3578 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3580 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);