2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
48 #include <sys/vfs_context.h>
50 #include <sys/sysctl.h>
52 #include <miscfs/specfs/specdev.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_kern.h>
58 #include <sys/kdebug.h>
61 #include "hfs_endian.h"
62 #include "hfs_fsctl.h"
63 #include "hfs_quota.h"
64 #include "hfscommon/headers/FileMgrInternal.h"
65 #include "hfscommon/headers/BTreesInternal.h"
66 #include "hfs_cnode.h"
69 extern int overflow_extents(struct filefork
*fp
);
71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
74 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
77 extern u_int32_t
GetLogicalBlockSize(struct vnode
*vp
);
79 extern int hfs_setextendedsecurity(struct hfsmount
*, int);
82 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
83 static int hfs_clonefile(struct vnode
*, int, int, int);
84 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
87 int flush_cache_on_write
= 0;
88 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
91 /*****************************************************************************
93 * I/O Operations on vnodes
95 *****************************************************************************/
96 int hfs_vnop_read(struct vnop_read_args
*);
97 int hfs_vnop_write(struct vnop_write_args
*);
98 int hfs_vnop_ioctl(struct vnop_ioctl_args
*);
99 int hfs_vnop_select(struct vnop_select_args
*);
100 int hfs_vnop_blktooff(struct vnop_blktooff_args
*);
101 int hfs_vnop_offtoblk(struct vnop_offtoblk_args
*);
102 int hfs_vnop_blockmap(struct vnop_blockmap_args
*);
103 int hfs_vnop_strategy(struct vnop_strategy_args
*);
104 int hfs_vnop_allocate(struct vnop_allocate_args
*);
105 int hfs_vnop_pagein(struct vnop_pagein_args
*);
106 int hfs_vnop_pageout(struct vnop_pageout_args
*);
107 int hfs_vnop_bwrite(struct vnop_bwrite_args
*);
111 * Read data from a file.
114 hfs_vnop_read(struct vnop_read_args
*ap
)
116 uio_t uio
= ap
->a_uio
;
117 struct vnode
*vp
= ap
->a_vp
;
120 struct hfsmount
*hfsmp
;
123 off_t start_resid
= uio_resid(uio
);
124 off_t offset
= uio_offset(uio
);
128 /* Preflight checks */
129 if (!vnode_isreg(vp
)) {
130 /* can only read regular files */
136 if (start_resid
== 0)
137 return (0); /* Nothing left to do */
139 return (EINVAL
); /* cant read from a negative offset */
145 /* Protect against a size change. */
146 hfs_lock_truncate(cp
, 0);
148 filesize
= fp
->ff_size
;
149 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
150 if (offset
> filesize
) {
151 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
152 (offset
> (off_t
)MAXHFSFILESIZE
)) {
158 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
159 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
161 retval
= cluster_read(vp
, uio
, filesize
, 0);
163 cp
->c_touch_acctime
= TRUE
;
165 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
166 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
169 * Keep track blocks read
171 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
172 int took_cnode_lock
= 0;
175 bytesread
= start_resid
- uio_resid(uio
);
177 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
178 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
179 hfs_lock(cp
, HFS_FORCE_LOCK
);
183 * If this file hasn't been seen since the start of
184 * the current sampling period then start over.
186 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
189 fp
->ff_bytesread
= bytesread
;
191 cp
->c_atime
= tv
.tv_sec
;
193 fp
->ff_bytesread
+= bytesread
;
199 hfs_unlock_truncate(cp
);
204 * Write data to a file.
207 hfs_vnop_write(struct vnop_write_args
*ap
)
209 uio_t uio
= ap
->a_uio
;
210 struct vnode
*vp
= ap
->a_vp
;
213 struct hfsmount
*hfsmp
;
214 kauth_cred_t cred
= NULL
;
218 off_t actualBytesAdded
;
223 int ioflag
= ap
->a_ioflag
;
226 int cnode_locked
= 0;
228 // LP64todo - fix this! uio_resid may be 64-bit value
229 resid
= uio_resid(uio
);
230 offset
= uio_offset(uio
);
236 if (!vnode_isreg(vp
))
237 return (EPERM
); /* Can only write regular files */
239 /* Protect against a size change. */
240 hfs_lock_truncate(VTOC(vp
), TRUE
);
242 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
243 hfs_unlock_truncate(VTOC(vp
));
250 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
252 if (ioflag
& IO_APPEND
) {
253 uio_setoffset(uio
, fp
->ff_size
);
254 offset
= fp
->ff_size
;
256 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
261 origFileSize
= fp
->ff_size
;
262 eflags
= kEFDeferMask
; /* defer file block allocations */
264 #ifdef HFS_SPARSE_DEV
266 * When the underlying device is sparse and space
267 * is low (< 8MB), stop doing delayed allocations
268 * and begin doing synchronous I/O.
270 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
271 (hfs_freeblks(hfsmp
, 0) < 2048)) {
272 eflags
&= ~kEFDeferMask
;
275 #endif /* HFS_SPARSE_DEV */
277 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
278 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
280 /* Now test if we need to extend the file */
281 /* Doing so will adjust the filebytes for us */
283 writelimit
= offset
+ resid
;
284 if (writelimit
<= filebytes
)
287 cred
= vfs_context_ucred(ap
->a_context
);
289 bytesToAdd
= writelimit
- filebytes
;
290 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
296 if (hfs_start_transaction(hfsmp
) != 0) {
301 while (writelimit
> filebytes
) {
302 bytesToAdd
= writelimit
- filebytes
;
303 if (cred
&& suser(cred
, NULL
) != 0)
304 eflags
|= kEFReserveMask
;
306 /* Protect extents b-tree and allocation bitmap */
307 lockflags
= SFL_BITMAP
;
308 if (overflow_extents(fp
))
309 lockflags
|= SFL_EXTENTS
;
310 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
312 /* Files that are changing size are not hot file candidates. */
313 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
314 fp
->ff_bytesread
= 0;
316 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
317 0, eflags
, &actualBytesAdded
));
319 hfs_systemfile_unlock(hfsmp
, lockflags
);
321 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
323 if (retval
!= E_NONE
)
325 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
326 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
327 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
329 (void) hfs_update(vp
, TRUE
);
330 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
331 (void) hfs_end_transaction(hfsmp
);
334 if (retval
== E_NONE
) {
342 struct rl_entry
*invalid_range
;
344 if (writelimit
> fp
->ff_size
)
345 filesize
= writelimit
;
347 filesize
= fp
->ff_size
;
349 lflag
= (ioflag
& IO_SYNC
);
351 if (offset
<= fp
->ff_size
) {
352 zero_off
= offset
& ~PAGE_MASK_64
;
354 /* Check to see whether the area between the zero_offset and the start
355 of the transfer to see whether is invalid and should be zero-filled
356 as part of the transfer:
358 if (offset
> zero_off
) {
359 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
360 lflag
|= IO_HEADZEROFILL
;
363 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
365 /* The bytes between fp->ff_size and uio->uio_offset must never be
366 read without being zeroed. The current last block is filled with zeroes
367 if it holds valid data but in all cases merely do a little bookkeeping
368 to track the area from the end of the current last page to the start of
369 the area actually written. For the same reason only the bytes up to the
370 start of the page where this write will start is invalidated; any remainder
371 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
373 Note that inval_start, the start of the page after the current EOF,
374 may be past the start of the write, in which case the zeroing
375 will be handled by the cluser_write of the actual data.
377 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
378 inval_end
= offset
& ~PAGE_MASK_64
;
379 zero_off
= fp
->ff_size
;
381 if ((fp
->ff_size
& PAGE_MASK_64
) &&
382 (rl_scan(&fp
->ff_invalidranges
,
385 &invalid_range
) != RL_NOOVERLAP
)) {
386 /* The page containing the EOF is not valid, so the
387 entire page must be made inaccessible now. If the write
388 starts on a page beyond the page containing the eof
389 (inval_end > eof_page_base), add the
390 whole page to the range to be invalidated. Otherwise
391 (i.e. if the write starts on the same page), zero-fill
392 the entire page explicitly now:
394 if (inval_end
> eof_page_base
) {
395 inval_start
= eof_page_base
;
397 zero_off
= eof_page_base
;
401 if (inval_start
< inval_end
) {
403 /* There's some range of data that's going to be marked invalid */
405 if (zero_off
< inval_start
) {
406 /* The pages between inval_start and inval_end are going to be invalidated,
407 and the actual write will start on a page past inval_end. Now's the last
408 chance to zero-fill the page containing the EOF:
412 retval
= cluster_write(vp
, (uio_t
) 0,
413 fp
->ff_size
, inval_start
,
415 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
416 hfs_lock(cp
, HFS_FORCE_LOCK
);
418 if (retval
) goto ioerr_exit
;
419 offset
= uio_offset(uio
);
422 /* Mark the remaining area of the newly allocated space as invalid: */
423 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
425 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
426 zero_off
= fp
->ff_size
= inval_end
;
429 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
432 /* Check to see whether the area between the end of the write and the end of
433 the page it falls in is invalid and should be zero-filled as part of the transfer:
435 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
436 if (tail_off
> filesize
) tail_off
= filesize
;
437 if (tail_off
> writelimit
) {
438 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
439 lflag
|= IO_TAILZEROFILL
;
444 * if the write starts beyond the current EOF (possibly advanced in the
445 * zeroing of the last block, above), then we'll zero fill from the current EOF
446 * to where the write begins:
448 * NOTE: If (and ONLY if) the portion of the file about to be written is
449 * before the current EOF it might be marked as invalid now and must be
450 * made readable (removed from the invalid ranges) before cluster_write
453 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
454 if (io_start
< fp
->ff_size
) {
457 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
458 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
463 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
464 tail_off
, lflag
| IO_NOZERODIRTY
);
465 offset
= uio_offset(uio
);
466 if (offset
> fp
->ff_size
) {
467 fp
->ff_size
= offset
;
469 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
470 /* Files that are changing size are not hot file candidates. */
471 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
472 fp
->ff_bytesread
= 0;
474 if (resid
> uio_resid(uio
)) {
475 cp
->c_touch_chgtime
= TRUE
;
476 cp
->c_touch_modtime
= TRUE
;
480 // XXXdbg - testing for vivek and paul lambert
482 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
483 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
486 HFS_KNOTE(vp
, NOTE_WRITE
);
490 * If we successfully wrote any data, and we are not the superuser
491 * we clear the setuid and setgid bits as a precaution against
494 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
495 cred
= vfs_context_ucred(ap
->a_context
);
496 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
498 hfs_lock(cp
, HFS_FORCE_LOCK
);
501 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
505 if (ioflag
& IO_UNIT
) {
507 hfs_lock(cp
, HFS_FORCE_LOCK
);
510 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
512 // LP64todo - fix this! resid needs to by user_ssize_t
513 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
514 uio_setresid(uio
, resid
);
515 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
517 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
519 hfs_lock(cp
, HFS_FORCE_LOCK
);
522 retval
= hfs_update(vp
, TRUE
);
524 /* Updating vcbWrCnt doesn't need to be atomic. */
527 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
528 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
532 hfs_unlock_truncate(cp
);
536 /* support for the "bulk-access" fcntl */
538 #define CACHE_ELEMS 64
539 #define CACHE_LEVELS 16
540 #define PARENT_IDS_FLAG 0x100
542 /* from hfs_attrlist.c */
543 extern unsigned long DerivePermissionSummary(uid_t obj_uid
, gid_t obj_gid
,
544 mode_t obj_mode
, struct mount
*mp
,
545 kauth_cred_t cred
, struct proc
*p
);
547 /* from vfs/vfs_fsevents.c */
548 extern char *get_pathbuff(void);
549 extern void release_pathbuff(char *buff
);
551 struct access_cache
{
553 int cachehits
; /* these two for statistics gathering */
555 unsigned int *acache
;
560 uid_t uid
; /* IN: effective user id */
561 short flags
; /* IN: access requested (i.e. R_OK) */
562 short num_groups
; /* IN: number of groups user belongs to */
563 int num_files
; /* IN: number of files to process */
564 int *file_ids
; /* IN: array of file ids */
565 gid_t
*groups
; /* IN: array of groups */
566 short *access
; /* OUT: access info for each file (0 for 'has access') */
569 struct user_access_t
{
570 uid_t uid
; /* IN: effective user id */
571 short flags
; /* IN: access requested (i.e. R_OK) */
572 short num_groups
; /* IN: number of groups user belongs to */
573 int num_files
; /* IN: number of files to process */
574 user_addr_t file_ids
; /* IN: array of file ids */
575 user_addr_t groups
; /* IN: array of groups */
576 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
580 * Perform a binary search for the given parent_id. Return value is
581 * found/not found boolean, and indexp will be the index of the item
582 * or the index at which to insert the item if it's not found.
585 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
588 int index
, matches
= 0;
590 if (cache
->numcached
== 0) {
592 return 0; // table is empty, so insert at index=0 and report no match
595 if (cache
->numcached
> CACHE_ELEMS
) {
596 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
597 cache->numcached, CACHE_ELEMS);*/
598 cache
->numcached
= CACHE_ELEMS
;
602 hi
= cache
->numcached
- 1;
605 /* perform binary search for parent_id */
607 unsigned int mid
= (hi
- lo
)/2 + lo
;
608 unsigned int this_id
= cache
->acache
[mid
];
610 if (parent_id
== this_id
) {
615 if (parent_id
< this_id
) {
620 if (parent_id
> this_id
) {
626 /* check if lo and hi converged on the match */
627 if (parent_id
== cache
->acache
[hi
]) {
631 /* if no existing entry found, find index for new one */
633 index
= (parent_id
< cache
->acache
[hi
]) ? hi
: hi
+ 1;
644 * Add a node to the access_cache at the given index (or do a lookup first
645 * to find the index if -1 is passed in). We currently do a replace rather
646 * than an insert if the cache is full.
649 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
651 int lookup_index
= -1;
653 /* need to do a lookup first if -1 passed for index */
655 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
656 if (cache
->haveaccess
[lookup_index
] != access
) {
657 /* change access info for existing entry... should never happen */
658 cache
->haveaccess
[lookup_index
] = access
;
661 /* mission accomplished */
664 index
= lookup_index
;
669 /* if the cache is full, do a replace rather than an insert */
670 if (cache
->numcached
>= CACHE_ELEMS
) {
671 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
672 cache
->numcached
= CACHE_ELEMS
-1;
674 if (index
> cache
->numcached
) {
675 // printf("index %d pinned to %d\n", index, cache->numcached);
676 index
= cache
->numcached
;
678 } else if (index
>= 0 && index
< cache
->numcached
) {
679 /* only do bcopy if we're inserting */
680 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
681 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(Boolean
) );
684 cache
->acache
[index
] = nodeID
;
685 cache
->haveaccess
[index
] = access
;
698 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
700 struct cinfo
*cip
= (struct cinfo
*)arg
;
702 cip
->uid
= attrp
->ca_uid
;
703 cip
->gid
= attrp
->ca_gid
;
704 cip
->mode
= attrp
->ca_mode
;
705 cip
->parentcnid
= descp
->cd_parentcnid
;
711 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
712 * isn't incore, then go to the catalog.
715 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
716 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
, struct proc
*p
)
720 /* if this id matches the one the fsctl was called with, skip the lookup */
721 if (cnid
== skip_cp
->c_cnid
) {
722 cnattrp
->ca_uid
= skip_cp
->c_uid
;
723 cnattrp
->ca_gid
= skip_cp
->c_gid
;
724 cnattrp
->ca_mode
= skip_cp
->c_mode
;
725 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
729 /* otherwise, check the cnode hash incase the file/dir is incore */
730 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
731 cnattrp
->ca_uid
= c_info
.uid
;
732 cnattrp
->ca_gid
= c_info
.gid
;
733 cnattrp
->ca_mode
= c_info
.mode
;
734 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
738 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
740 /* lookup this cnid in the catalog */
741 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
743 hfs_systemfile_unlock(hfsmp
, lockflags
);
753 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
754 * up to CACHE_LEVELS as we progress towards the root.
757 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
758 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
)
762 HFSCatalogNodeID thisNodeID
;
763 unsigned long myPerms
;
764 struct cat_attr cnattr
;
765 int cache_index
= -1;
768 int i
= 0, ids_to_cache
= 0;
769 int parent_ids
[CACHE_LEVELS
];
771 /* root always has access */
772 if (!suser(myp_ucred
, NULL
)) {
777 while (thisNodeID
>= kRootDirID
) {
778 myResult
= 0; /* default to "no access" */
780 /* check the cache before resorting to hitting the catalog */
782 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
783 * to look any further after hitting cached dir */
785 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
787 myResult
= cache
->haveaccess
[cache_index
];
788 goto ExitThisRoutine
;
791 /* remember which parents we want to cache */
792 if (ids_to_cache
< CACHE_LEVELS
) {
793 parent_ids
[ids_to_cache
] = thisNodeID
;
797 /* do the lookup (checks the cnode hash, then the catalog) */
798 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
, theProcPtr
);
800 goto ExitThisRoutine
; /* no access */
803 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
804 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
805 myp_ucred
, theProcPtr
);
807 if ( (myPerms
& X_OK
) == 0 ) {
809 goto ExitThisRoutine
; /* no access */
812 /* up the hierarchy we go */
813 thisNodeID
= catkey
.hfsPlus
.parentID
;
816 /* if here, we have access to this node */
821 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
826 /* cache the parent directory(ies) */
827 for (i
= 0; i
< ids_to_cache
; i
++) {
828 /* small optimization: get rid of double-lookup for all these */
829 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
830 add_node(cache
, -1, parent_ids
[i
], myResult
);
835 /* end "bulk-access" support */
840 * Callback for use with freeze ioctl.
843 hfs_freezewrite_callback(struct vnode
*vp
, void *cargs
)
845 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
851 * Control filesystem operating characteristics.
854 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
859 vfs_context_t a_context;
862 struct vnode
* vp
= ap
->a_vp
;
863 struct hfsmount
*hfsmp
= VTOHFS(vp
);
864 vfs_context_t context
= ap
->a_context
;
865 kauth_cred_t cred
= vfs_context_ucred(context
);
866 proc_t p
= vfs_context_proc(context
);
867 struct vfsstatfs
*vfsp
;
870 is64bit
= proc_is64bit(p
);
872 switch (ap
->a_command
) {
874 case HFS_RESIZE_VOLUME
: {
878 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
879 if (suser(cred
, NULL
) &&
880 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
881 return (EACCES
); /* must be owner of file system */
883 if (!vnode_isvroot(vp
)) {
886 newsize
= *(u_int64_t
*)ap
->a_data
;
887 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
889 if (newsize
> cursize
) {
890 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
891 } else if (newsize
< cursize
) {
892 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
897 case HFS_CHANGE_NEXT_ALLOCATION
: {
900 if (vnode_vfsisrdonly(vp
)) {
903 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
904 if (suser(cred
, NULL
) &&
905 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
906 return (EACCES
); /* must be owner of file system */
908 if (!vnode_isvroot(vp
)) {
911 location
= *(u_int32_t
*)ap
->a_data
;
912 if (location
> hfsmp
->totalBlocks
- 1) {
915 /* Return previous value. */
916 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
917 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
918 hfsmp
->nextAllocation
= location
;
919 hfsmp
->vcbFlags
|= 0xFF00;
920 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
924 #ifdef HFS_SPARSE_DEV
925 case HFS_SETBACKINGSTOREINFO
: {
926 struct vnode
* bsfs_rootvp
;
927 struct vnode
* di_vp
;
928 struct hfs_backingstoreinfo
*bsdata
;
931 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
934 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
935 if (suser(cred
, NULL
) &&
936 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
937 return (EACCES
); /* must be owner of file system */
939 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
940 if (bsdata
== NULL
) {
943 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
946 if ((error
= vnode_getwithref(di_vp
))) {
947 file_drop(bsdata
->backingfd
);
951 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
952 (void)vnode_put(di_vp
);
953 file_drop(bsdata
->backingfd
);
958 * Obtain the backing fs root vnode and keep a reference
959 * on it. This reference will be dropped in hfs_unmount.
961 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
963 (void)vnode_put(di_vp
);
964 file_drop(bsdata
->backingfd
);
967 vnode_ref(bsfs_rootvp
);
968 vnode_put(bsfs_rootvp
);
970 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
971 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
972 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
973 hfsmp
->hfs_sparsebandblks
*= 4;
975 (void)vnode_put(di_vp
);
976 file_drop(bsdata
->backingfd
);
979 case HFS_CLRBACKINGSTOREINFO
: {
980 struct vnode
* tmpvp
;
982 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
983 if (suser(cred
, NULL
) &&
984 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
985 return (EACCES
); /* must be owner of file system */
987 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
988 hfsmp
->hfs_backingfs_rootvp
) {
990 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
991 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
992 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
993 hfsmp
->hfs_sparsebandblks
= 0;
998 #endif /* HFS_SPARSE_DEV */
1007 mp
= vnode_mount(vp
);
1008 hfsmp
= VFSTOHFS(mp
);
1013 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1015 task
= current_task();
1016 task_working_set_disable(task
);
1018 // flush things before we get started to try and prevent
1019 // dirty data from being paged out while we're frozen.
1020 // note: can't do this after taking the lock as it will
1021 // deadlock against ourselves.
1022 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1023 hfs_global_exclusive_lock_acquire(hfsmp
);
1024 journal_flush(hfsmp
->jnl
);
1026 // don't need to iterate on all vnodes, we just need to
1027 // wait for writes to the system files and the device vnode
1028 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1029 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1030 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1031 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1032 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1033 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1034 if (hfsmp
->hfs_attribute_vp
)
1035 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1036 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1038 hfsmp
->hfs_freezing_proc
= current_proc();
1047 // if we're not the one who froze the fs then we
1049 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1053 // NOTE: if you add code here, also go check the
1054 // code that "thaws" the fs in hfs_vnop_close()
1056 hfsmp
->hfs_freezing_proc
= NULL
;
1057 hfs_global_exclusive_lock_release(hfsmp
);
1058 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1063 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1064 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1066 case HFS_BULKACCESS_FSCTL
:
1067 case HFS_BULKACCESS
: {
1069 * NOTE: on entry, the vnode is locked. Incase this vnode
1070 * happens to be in our list of file_ids, we'll note it
1071 * avoid calling hfs_chashget_nowait() on that id as that
1072 * will cause a "locking against myself" panic.
1074 Boolean check_leaf
= true;
1076 struct user_access_t
*user_access_structp
;
1077 struct user_access_t tmp_user_access_t
;
1078 struct access_cache cache
;
1082 dev_t dev
= VTOC(vp
)->c_dev
;
1085 struct ucred myucred
; /* XXX ILLEGAL */
1087 int *file_ids
= NULL
;
1088 short *access
= NULL
;
1091 cnid_t prevParent_cnid
= 0;
1092 unsigned long myPerms
;
1094 struct cat_attr cnattr
;
1096 struct cnode
*skip_cp
= VTOC(vp
);
1097 struct vfs_context my_context
;
1099 /* first, return error if not run as root */
1100 if (cred
->cr_ruid
!= 0) {
1104 /* initialize the local cache and buffers */
1105 cache
.numcached
= 0;
1106 cache
.cachehits
= 0;
1109 file_ids
= (int *) get_pathbuff();
1110 access
= (short *) get_pathbuff();
1111 cache
.acache
= (int *) get_pathbuff();
1112 cache
.haveaccess
= (Boolean
*) get_pathbuff();
1114 if (file_ids
== NULL
|| access
== NULL
|| cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1115 release_pathbuff((char *) file_ids
);
1116 release_pathbuff((char *) access
);
1117 release_pathbuff((char *) cache
.acache
);
1118 release_pathbuff((char *) cache
.haveaccess
);
1123 /* struct copyin done during dispatch... need to copy file_id array separately */
1124 if (ap
->a_data
== NULL
) {
1126 goto err_exit_bulk_access
;
1130 user_access_structp
= (struct user_access_t
*)ap
->a_data
;
1133 struct access_t
* accessp
= (struct access_t
*)ap
->a_data
;
1134 tmp_user_access_t
.uid
= accessp
->uid
;
1135 tmp_user_access_t
.flags
= accessp
->flags
;
1136 tmp_user_access_t
.num_groups
= accessp
->num_groups
;
1137 tmp_user_access_t
.num_files
= accessp
->num_files
;
1138 tmp_user_access_t
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1139 tmp_user_access_t
.groups
= CAST_USER_ADDR_T(accessp
->groups
);
1140 tmp_user_access_t
.access
= CAST_USER_ADDR_T(accessp
->access
);
1141 user_access_structp
= &tmp_user_access_t
;
1144 num_files
= user_access_structp
->num_files
;
1145 if (num_files
< 1) {
1146 goto err_exit_bulk_access
;
1148 if (num_files
> 256) {
1150 goto err_exit_bulk_access
;
1153 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1154 num_files
* sizeof(int)))) {
1155 goto err_exit_bulk_access
;
1158 /* fill in the ucred structure */
1159 flags
= user_access_structp
->flags
;
1160 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1164 /* check if we've been passed leaf node ids or parent ids */
1165 if (flags
& PARENT_IDS_FLAG
) {
1169 memset(&myucred
, 0, sizeof(myucred
));
1171 myucred
.cr_uid
= myucred
.cr_ruid
= myucred
.cr_svuid
= user_access_structp
->uid
;
1172 myucred
.cr_ngroups
= user_access_structp
->num_groups
;
1173 if (myucred
.cr_ngroups
< 1 || myucred
.cr_ngroups
> 16) {
1174 myucred
.cr_ngroups
= 0;
1175 } else if ((error
= copyin(user_access_structp
->groups
, (caddr_t
)myucred
.cr_groups
,
1176 myucred
.cr_ngroups
* sizeof(gid_t
)))) {
1177 goto err_exit_bulk_access
;
1179 myucred
.cr_rgid
= myucred
.cr_svgid
= myucred
.cr_groups
[0];
1180 myucred
.cr_gmuid
= myucred
.cr_uid
;
1182 my_context
.vc_proc
= p
;
1183 my_context
.vc_ucred
= &myucred
;
1185 /* Check access to each file_id passed in */
1186 for (i
= 0; i
< num_files
; i
++) {
1188 cnid
= (cnid_t
) file_ids
[i
];
1190 /* root always has access */
1191 if (!suser(&myucred
, NULL
)) {
1198 /* do the lookup (checks the cnode hash, then the catalog) */
1199 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
, p
);
1201 access
[i
] = (short) error
;
1205 /* before calling CheckAccess(), check the target file for read access */
1206 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1207 cnattr
.ca_mode
, hfsmp
->hfs_mp
, &myucred
, p
);
1210 /* fail fast if no access */
1211 if ((myPerms
& flags
) == 0) {
1216 /* we were passed an array of parent ids */
1217 catkey
.hfsPlus
.parentID
= cnid
;
1220 /* if the last guy had the same parent and had access, we're done */
1221 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1227 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1228 skip_cp
, p
, &myucred
, dev
);
1231 access
[i
] = 0; // have access.. no errors to report
1233 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1236 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1240 cnid
= (cnid_t
)file_ids
[i
];
1242 while (cnid
>= kRootDirID
) {
1243 /* get the vnode for this cnid */
1244 myErr
= hfs_vget(hfsmp
, cnid
, &vp
, 0);
1250 cnid
= VTOC(vp
)->c_parentcnid
;
1252 hfs_unlock(VTOC(vp
));
1253 if (vnode_vtype(vp
) == VDIR
) {
1254 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), &my_context
);
1256 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, &my_context
);
1267 /* copyout the access array */
1268 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1269 num_files
* sizeof (short)))) {
1270 goto err_exit_bulk_access
;
1273 err_exit_bulk_access
:
1275 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1277 release_pathbuff((char *) cache
.acache
);
1278 release_pathbuff((char *) cache
.haveaccess
);
1279 release_pathbuff((char *) file_ids
);
1280 release_pathbuff((char *) access
);
1283 } /* HFS_BULKACCESS */
1285 case HFS_SETACLSTATE
: {
1288 if (ap
->a_data
== NULL
) {
1292 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1293 state
= *(int *)ap
->a_data
;
1295 // super-user can enable or disable acl's on a volume.
1296 // the volume owner can only enable acl's
1297 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1300 if (state
== 0 || state
== 1)
1301 return hfs_setextendedsecurity(hfsmp
, state
);
1309 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1311 error
= hfs_fsync(vp
, MNT_NOWAIT
, TRUE
, p
);
1312 hfs_unlock(VTOC(vp
));
1319 register struct cnode
*cp
;
1322 if (!vnode_isreg(vp
))
1325 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1329 * used by regression test to determine if
1330 * all the dirty pages (via write) have been cleaned
1331 * after a call to 'fsysnc'.
1333 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1340 register struct radvisory
*ra
;
1341 struct filefork
*fp
;
1344 if (!vnode_isreg(vp
))
1347 ra
= (struct radvisory
*)(ap
->a_data
);
1350 /* Protect against a size change. */
1351 hfs_lock_truncate(VTOC(vp
), TRUE
);
1353 if (ra
->ra_offset
>= fp
->ff_size
) {
1356 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1359 hfs_unlock_truncate(VTOC(vp
));
1363 case F_READBOOTSTRAP
:
1364 case F_WRITEBOOTSTRAP
:
1366 struct vnode
*devvp
= NULL
;
1367 user_fbootstraptransfer_t
*user_bootstrapp
;
1371 daddr64_t blockNumber
;
1375 user_fbootstraptransfer_t user_bootstrap
;
1377 if (!vnode_isvroot(vp
))
1379 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1380 * to a user_fbootstraptransfer_t else we get a pointer to a
1381 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1384 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1387 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1388 user_bootstrapp
= &user_bootstrap
;
1389 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1390 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1391 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1393 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1396 devvp
= VTOHFS(vp
)->hfs_devvp
;
1397 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1398 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1399 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1400 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1402 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1404 while (uio_resid(auio
) > 0) {
1405 blockNumber
= uio_offset(auio
) / devBlockSize
;
1406 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1408 if (bp
) buf_brelse(bp
);
1413 blockOffset
= uio_offset(auio
) % devBlockSize
;
1414 xfersize
= devBlockSize
- blockOffset
;
1415 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1421 if (uio_rw(auio
) == UIO_WRITE
) {
1422 error
= VNOP_BWRITE(bp
);
1435 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1438 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1441 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1446 case HFS_GET_MOUNT_TIME
:
1447 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1450 case HFS_GET_LAST_MTIME
:
1451 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1454 case HFS_SET_BOOT_INFO
:
1455 if (!vnode_isvroot(vp
))
1457 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1458 return(EACCES
); /* must be superuser or owner of filesystem */
1459 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1460 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1461 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1462 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1465 case HFS_GET_BOOT_INFO
:
1466 if (!vnode_isvroot(vp
))
1468 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1469 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1470 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1477 /* Should never get here */
1485 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1487 struct vnop_select_args {
1492 vfs_context_t a_context;
1497 * We should really check to see if I/O is possible.
1503 * Converts a logical block number to a physical block, and optionally returns
1504 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1505 * The physical block number is based on the device block size, currently its 512.
1506 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1509 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, int *runp
)
1511 struct cnode
*cp
= VTOC(vp
);
1512 struct filefork
*fp
= VTOF(vp
);
1513 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1514 int retval
= E_NONE
;
1515 daddr_t logBlockSize
;
1516 size_t bytesContAvail
= 0;
1517 off_t blockposition
;
1522 * Check for underlying vnode requests and ensure that logical
1523 * to physical mapping is requested.
1530 logBlockSize
= GetLogicalBlockSize(vp
);
1531 blockposition
= (off_t
)bn
* (off_t
)logBlockSize
;
1533 lockExtBtree
= overflow_extents(fp
);
1536 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
1538 retval
= MacToVFSError(
1539 MapFileBlockC (HFSTOVCB(hfsmp
),
1547 hfs_systemfile_unlock(hfsmp
, lockflags
);
1549 if (retval
== E_NONE
) {
1550 /* Figure out how many read ahead blocks there are */
1552 if (can_cluster(logBlockSize
)) {
1553 /* Make sure this result never goes negative: */
1554 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1564 * Convert logical block number to file offset.
1567 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1569 struct vnop_blktooff_args {
1576 if (ap
->a_vp
== NULL
)
1578 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1584 * Convert file offset to logical block number.
1587 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1589 struct vnop_offtoblk_args {
1592 daddr64_t *a_lblkno;
1596 if (ap
->a_vp
== NULL
)
1598 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1604 * Map file offset to physical block number.
1606 * System file cnodes are expected to be locked (shared or exclusive).
1609 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1611 struct vnop_blockmap_args {
1619 vfs_context_t a_context;
1623 struct vnode
*vp
= ap
->a_vp
;
1625 struct filefork
*fp
;
1626 struct hfsmount
*hfsmp
;
1627 size_t bytesContAvail
= 0;
1628 int retval
= E_NONE
;
1631 struct rl_entry
*invalid_range
;
1632 enum rl_overlaptype overlaptype
;
1636 /* Do not allow blockmap operation on a directory */
1637 if (vnode_isdir(vp
)) {
1642 * Check for underlying vnode requests and ensure that logical
1643 * to physical mapping is requested.
1645 if (ap
->a_bpn
== NULL
)
1648 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
)) {
1649 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
1650 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
1654 panic("blockmap: %s cnode lock already held!\n",
1655 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
1663 if (fp
->ff_unallocblocks
) {
1664 if (hfs_start_transaction(hfsmp
) != 0) {
1670 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
1672 } else if (overflow_extents(fp
)) {
1673 syslocks
= SFL_EXTENTS
;
1677 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
1680 * Check for any delayed allocations.
1682 if (fp
->ff_unallocblocks
) {
1684 u_int32_t loanedBlocks
;
1687 // Make sure we have a transaction. It's possible
1688 // that we came in and fp->ff_unallocblocks was zero
1689 // but during the time we blocked acquiring the extents
1690 // btree, ff_unallocblocks became non-zero and so we
1691 // will need to start a transaction.
1693 if (started_tr
== 0) {
1695 hfs_systemfile_unlock(hfsmp
, lockflags
);
1702 * Note: ExtendFileC will Release any blocks on loan and
1703 * aquire real blocks. So we ask to extend by zero bytes
1704 * since ExtendFileC will account for the virtual blocks.
1707 loanedBlocks
= fp
->ff_unallocblocks
;
1708 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
1709 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
1712 fp
->ff_unallocblocks
= loanedBlocks
;
1713 cp
->c_blocks
+= loanedBlocks
;
1714 fp
->ff_blocks
+= loanedBlocks
;
1716 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1717 hfsmp
->loanedBlocks
+= loanedBlocks
;
1718 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1722 hfs_systemfile_unlock(hfsmp
, lockflags
);
1723 cp
->c_flag
|= C_MODIFIED
;
1725 (void) hfs_update(vp
, TRUE
);
1726 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1728 hfs_end_transaction(hfsmp
);
1734 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
1735 ap
->a_bpn
, &bytesContAvail
);
1737 hfs_systemfile_unlock(hfsmp
, lockflags
);
1742 (void) hfs_update(vp
, TRUE
);
1743 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1744 hfs_end_transaction(hfsmp
);
1751 /* Adjust the mapping information for invalid file ranges: */
1752 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
1753 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
1755 if (overlaptype
!= RL_NOOVERLAP
) {
1756 switch(overlaptype
) {
1757 case RL_MATCHINGOVERLAP
:
1758 case RL_OVERLAPCONTAINSRANGE
:
1759 case RL_OVERLAPSTARTSBEFORE
:
1760 /* There's no valid block for this byte offset: */
1761 *ap
->a_bpn
= (daddr64_t
)-1;
1762 /* There's no point limiting the amount to be returned
1763 * if the invalid range that was hit extends all the way
1764 * to the EOF (i.e. there's no valid bytes between the
1765 * end of this range and the file's EOF):
1767 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1768 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1769 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1773 case RL_OVERLAPISCONTAINED
:
1774 case RL_OVERLAPENDSAFTER
:
1775 /* The range of interest hits an invalid block before the end: */
1776 if (invalid_range
->rl_start
== ap
->a_foffset
) {
1777 /* There's actually no valid information to be had starting here: */
1778 *ap
->a_bpn
= (daddr64_t
)-1;
1779 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1780 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1781 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1784 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
1791 if (bytesContAvail
> ap
->a_size
)
1792 bytesContAvail
= ap
->a_size
;
1795 *ap
->a_run
= bytesContAvail
;
1798 *(int *)ap
->a_poff
= 0;
1803 return (MacToVFSError(retval
));
1808 * prepare and issue the I/O
1809 * buf_strategy knows how to deal
1810 * with requests that require
1814 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
1816 buf_t bp
= ap
->a_bp
;
1817 vnode_t vp
= buf_vnode(bp
);
1818 struct cnode
*cp
= VTOC(vp
);
1820 return (buf_strategy(cp
->c_devvp
, ap
));
1825 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
, vfs_context_t context
)
1827 register struct cnode
*cp
= VTOC(vp
);
1828 struct filefork
*fp
= VTOF(vp
);
1829 struct proc
*p
= vfs_context_proc(context
);;
1830 kauth_cred_t cred
= vfs_context_ucred(context
);
1833 off_t actualBytesAdded
;
1835 u_int64_t old_filesize
;
1838 struct hfsmount
*hfsmp
;
1841 blksize
= VTOVCB(vp
)->blockSize
;
1842 fileblocks
= fp
->ff_blocks
;
1843 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
1844 old_filesize
= fp
->ff_size
;
1846 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
1847 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1852 /* This should only happen with a corrupt filesystem */
1853 if ((off_t
)fp
->ff_size
< 0)
1856 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
1863 /* Files that are changing size are not hot file candidates. */
1864 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
1865 fp
->ff_bytesread
= 0;
1869 * We cannot just check if fp->ff_size == length (as an optimization)
1870 * since there may be extra physical blocks that also need truncation.
1873 if ((retval
= hfs_getinoquota(cp
)))
1878 * Lengthen the size of the file. We must ensure that the
1879 * last byte of the file is allocated. Since the smallest
1880 * value of ff_size is 0, length will be at least 1.
1882 if (length
> (off_t
)fp
->ff_size
) {
1884 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
1890 * If we don't have enough physical space then
1891 * we need to extend the physical size.
1893 if (length
> filebytes
) {
1895 u_long blockHint
= 0;
1897 /* All or nothing and don't round up to clumpsize. */
1898 eflags
= kEFAllMask
| kEFNoClumpMask
;
1900 if (cred
&& suser(cred
, NULL
) != 0)
1901 eflags
|= kEFReserveMask
; /* keep a reserve */
1904 * Allocate Journal and Quota files in metadata zone.
1906 if (filebytes
== 0 &&
1907 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
1908 hfs_virtualmetafile(cp
)) {
1909 eflags
|= kEFMetadataMask
;
1910 blockHint
= hfsmp
->hfs_metazone_start
;
1912 if (hfs_start_transaction(hfsmp
) != 0) {
1917 /* Protect extents b-tree and allocation bitmap */
1918 lockflags
= SFL_BITMAP
;
1919 if (overflow_extents(fp
))
1920 lockflags
|= SFL_EXTENTS
;
1921 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
1923 while ((length
> filebytes
) && (retval
== E_NONE
)) {
1924 bytesToAdd
= length
- filebytes
;
1925 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
1930 &actualBytesAdded
));
1932 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
1933 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
1934 if (length
> filebytes
)
1940 hfs_systemfile_unlock(hfsmp
, lockflags
);
1943 (void) hfs_update(vp
, TRUE
);
1944 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1947 hfs_end_transaction(hfsmp
);
1952 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
1953 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1956 if (!(flags
& IO_NOZEROFILL
)) {
1957 if (UBCINFOEXISTS(vp
) && retval
== E_NONE
) {
1958 struct rl_entry
*invalid_range
;
1961 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
1962 if (length
< zero_limit
) zero_limit
= length
;
1964 if (length
> (off_t
)fp
->ff_size
) {
1967 /* Extending the file: time to fill out the current last page w. zeroes? */
1968 if ((fp
->ff_size
& PAGE_MASK_64
) &&
1969 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
1970 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
1972 /* There's some valid data at the start of the (current) last page
1973 of the file, so zero out the remainder of that page to ensure the
1974 entire page contains valid data. Since there is no invalid range
1975 possible past the (current) eof, there's no need to remove anything
1976 from the invalid range list before calling cluster_write(): */
1978 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
1979 fp
->ff_size
, (off_t
)0,
1980 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
1981 hfs_lock(cp
, HFS_FORCE_LOCK
);
1982 if (retval
) goto Err_Exit
;
1984 /* Merely invalidate the remaining area, if necessary: */
1985 if (length
> zero_limit
) {
1987 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
1988 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1991 /* The page containing the (current) eof is invalid: just add the
1992 remainder of the page to the invalid list, along with the area
1993 being newly allocated:
1996 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
1997 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2001 panic("hfs_truncate: invoked on non-UBC object?!");
2004 cp
->c_touch_modtime
= TRUE
;
2005 fp
->ff_size
= length
;
2007 /* Nested transactions will do their own ubc_setsize. */
2010 * ubc_setsize can cause a pagein here
2011 * so we need to drop cnode lock.
2014 ubc_setsize(vp
, length
);
2015 hfs_lock(cp
, HFS_FORCE_LOCK
);
2018 } else { /* Shorten the size of the file */
2020 if ((off_t
)fp
->ff_size
> length
) {
2022 * Any buffers that are past the truncation point need to be
2023 * invalidated (to maintain buffer cache consistency).
2026 /* Nested transactions will do their own ubc_setsize. */
2029 * ubc_setsize can cause a pageout here
2030 * so we need to drop cnode lock.
2033 ubc_setsize(vp
, length
);
2034 hfs_lock(cp
, HFS_FORCE_LOCK
);
2037 /* Any space previously marked as invalid is now irrelevant: */
2038 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2042 * Account for any unmapped blocks. Note that the new
2043 * file length can still end up with unmapped blocks.
2045 if (fp
->ff_unallocblocks
> 0) {
2046 u_int32_t finalblks
;
2047 u_int32_t loanedBlocks
;
2049 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2051 loanedBlocks
= fp
->ff_unallocblocks
;
2052 cp
->c_blocks
-= loanedBlocks
;
2053 fp
->ff_blocks
-= loanedBlocks
;
2054 fp
->ff_unallocblocks
= 0;
2056 hfsmp
->loanedBlocks
-= loanedBlocks
;
2058 finalblks
= (length
+ blksize
- 1) / blksize
;
2059 if (finalblks
> fp
->ff_blocks
) {
2060 /* calculate required unmapped blocks */
2061 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2062 hfsmp
->loanedBlocks
+= loanedBlocks
;
2064 fp
->ff_unallocblocks
= loanedBlocks
;
2065 cp
->c_blocks
+= loanedBlocks
;
2066 fp
->ff_blocks
+= loanedBlocks
;
2068 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2072 * For a TBE process the deallocation of the file blocks is
2073 * delayed until the file is closed. And hfs_close calls
2074 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2075 * isn't set, we make sure this isn't a TBE process.
2077 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2079 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2081 if (hfs_start_transaction(hfsmp
) != 0) {
2086 if (fp
->ff_unallocblocks
== 0) {
2087 /* Protect extents b-tree and allocation bitmap */
2088 lockflags
= SFL_BITMAP
;
2089 if (overflow_extents(fp
))
2090 lockflags
|= SFL_EXTENTS
;
2091 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2093 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2094 (FCB
*)fp
, length
, false));
2096 hfs_systemfile_unlock(hfsmp
, lockflags
);
2100 fp
->ff_size
= length
;
2102 (void) hfs_update(vp
, TRUE
);
2103 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2106 hfs_end_transaction(hfsmp
);
2108 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2112 /* These are bytesreleased */
2113 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2116 /* Only set update flag if the logical length changes */
2117 if (old_filesize
!= length
)
2118 cp
->c_touch_modtime
= TRUE
;
2119 fp
->ff_size
= length
;
2121 cp
->c_touch_chgtime
= TRUE
;
2122 retval
= hfs_update(vp
, MNT_WAIT
);
2124 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2125 -1, -1, -1, retval
, 0);
2130 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2131 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2139 * Truncate a cnode to at most length size, freeing (or adding) the
2144 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2145 vfs_context_t context
)
2147 struct filefork
*fp
= VTOF(vp
);
2150 int blksize
, error
= 0;
2151 struct cnode
*cp
= VTOC(vp
);
2153 if (vnode_isdir(vp
))
2154 return (EISDIR
); /* cannot truncate an HFS directory! */
2156 blksize
= VTOVCB(vp
)->blockSize
;
2157 fileblocks
= fp
->ff_blocks
;
2158 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2160 // have to loop truncating or growing files that are
2161 // really big because otherwise transactions can get
2162 // enormous and consume too many kernel resources.
2164 if (length
< filebytes
) {
2165 while (filebytes
> length
) {
2166 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
2167 filebytes
-= HFS_BIGFILE_SIZE
;
2171 cp
->c_flag
|= C_FORCEUPDATE
;
2172 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2176 } else if (length
> filebytes
) {
2177 while (filebytes
< length
) {
2178 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
2179 filebytes
+= HFS_BIGFILE_SIZE
;
2183 cp
->c_flag
|= C_FORCEUPDATE
;
2184 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2188 } else /* Same logical size */ {
2190 error
= do_hfs_truncate(vp
, length
, flags
, skipsetsize
, context
);
2192 /* Files that are changing size are not hot file candidates. */
2193 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2194 fp
->ff_bytesread
= 0;
2203 * Preallocate file storage space.
2206 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2210 off_t *a_bytesallocated;
2212 vfs_context_t a_context;
2215 struct vnode
*vp
= ap
->a_vp
;
2217 struct filefork
*fp
;
2219 off_t length
= ap
->a_length
;
2221 off_t moreBytesRequested
;
2222 off_t actualBytesAdded
;
2225 int retval
, retval2
;
2227 UInt32 extendFlags
; /* For call to ExtendFileC */
2228 struct hfsmount
*hfsmp
;
2229 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2232 *(ap
->a_bytesallocated
) = 0;
2234 if (!vnode_isreg(vp
))
2236 if (length
< (off_t
)0)
2239 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
)))
2246 fileblocks
= fp
->ff_blocks
;
2247 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2249 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2254 /* Fill in the flags word for the call to Extend the file */
2256 extendFlags
= kEFNoClumpMask
;
2257 if (ap
->a_flags
& ALLOCATECONTIG
)
2258 extendFlags
|= kEFContigMask
;
2259 if (ap
->a_flags
& ALLOCATEALL
)
2260 extendFlags
|= kEFAllMask
;
2261 if (cred
&& suser(cred
, NULL
) != 0)
2262 extendFlags
|= kEFReserveMask
;
2266 startingPEOF
= filebytes
;
2268 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2269 length
+= filebytes
;
2270 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2271 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2273 /* If no changes are necesary, then we're done */
2274 if (filebytes
== length
)
2278 * Lengthen the size of the file. We must ensure that the
2279 * last byte of the file is allocated. Since the smallest
2280 * value of filebytes is 0, length will be at least 1.
2282 if (length
> filebytes
) {
2283 moreBytesRequested
= length
- filebytes
;
2286 retval
= hfs_chkdq(cp
,
2287 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2294 * Metadata zone checks.
2296 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2298 * Allocate Journal and Quota files in metadata zone.
2300 if (hfs_virtualmetafile(cp
)) {
2301 extendFlags
|= kEFMetadataMask
;
2302 blockHint
= hfsmp
->hfs_metazone_start
;
2303 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2304 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2306 * Move blockHint outside metadata zone.
2308 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2312 if (hfs_start_transaction(hfsmp
) != 0) {
2317 /* Protect extents b-tree and allocation bitmap */
2318 lockflags
= SFL_BITMAP
;
2319 if (overflow_extents(fp
))
2320 lockflags
|= SFL_EXTENTS
;
2321 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2323 retval
= MacToVFSError(ExtendFileC(vcb
,
2328 &actualBytesAdded
));
2330 *(ap
->a_bytesallocated
) = actualBytesAdded
;
2331 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2333 hfs_systemfile_unlock(hfsmp
, lockflags
);
2336 (void) hfs_update(vp
, TRUE
);
2337 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2340 hfs_end_transaction(hfsmp
);
2343 * if we get an error and no changes were made then exit
2344 * otherwise we must do the hfs_update to reflect the changes
2346 if (retval
&& (startingPEOF
== filebytes
))
2350 * Adjust actualBytesAdded to be allocation block aligned, not
2351 * clump size aligned.
2352 * NOTE: So what we are reporting does not affect reality
2353 * until the file is closed, when we truncate the file to allocation
2356 if ((actualBytesAdded
!= 0) && (moreBytesRequested
< actualBytesAdded
))
2357 *(ap
->a_bytesallocated
) =
2358 roundup(moreBytesRequested
, (off_t
)vcb
->blockSize
);
2360 } else { /* Shorten the size of the file */
2362 if (fp
->ff_size
> length
) {
2364 * Any buffers that are past the truncation point need to be
2365 * invalidated (to maintain buffer cache consistency).
2369 if (hfs_start_transaction(hfsmp
) != 0) {
2374 /* Protect extents b-tree and allocation bitmap */
2375 lockflags
= SFL_BITMAP
;
2376 if (overflow_extents(fp
))
2377 lockflags
|= SFL_EXTENTS
;
2378 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2380 retval
= MacToVFSError(TruncateFileC(vcb
, (FCB
*)fp
, length
, false));
2382 hfs_systemfile_unlock(hfsmp
, lockflags
);
2384 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2387 (void) hfs_update(vp
, TRUE
);
2388 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2391 hfs_end_transaction(hfsmp
);
2395 * if we get an error and no changes were made then exit
2396 * otherwise we must do the hfs_update to reflect the changes
2398 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2400 /* These are bytesreleased */
2401 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2404 if (fp
->ff_size
> filebytes
) {
2405 fp
->ff_size
= filebytes
;
2408 ubc_setsize(vp
, fp
->ff_size
);
2409 hfs_lock(cp
, HFS_FORCE_LOCK
);
2414 cp
->c_touch_chgtime
= TRUE
;
2415 cp
->c_touch_modtime
= TRUE
;
2416 retval2
= hfs_update(vp
, MNT_WAIT
);
2427 * Pagein for HFS filesystem
2430 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2432 struct vnop_pagein_args {
2435 vm_offset_t a_pl_offset,
2439 vfs_context_t a_context;
2443 vnode_t vp
= ap
->a_vp
;
2446 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2447 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2449 * Keep track of blocks read.
2451 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2453 struct filefork
*fp
;
2455 int took_cnode_lock
= 0;
2460 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2461 bytesread
= fp
->ff_size
;
2463 bytesread
= ap
->a_size
;
2465 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2466 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
2467 hfs_lock(cp
, HFS_FORCE_LOCK
);
2468 took_cnode_lock
= 1;
2471 * If this file hasn't been seen since the start of
2472 * the current sampling period then start over.
2474 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2477 fp
->ff_bytesread
= bytesread
;
2479 cp
->c_atime
= tv
.tv_sec
;
2481 fp
->ff_bytesread
+= bytesread
;
2483 cp
->c_touch_acctime
= TRUE
;
2484 if (took_cnode_lock
)
2491 * Pageout for HFS filesystem.
2494 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2496 struct vnop_pageout_args {
2499 vm_offset_t a_pl_offset,
2503 vfs_context_t a_context;
2507 vnode_t vp
= ap
->a_vp
;
2509 struct filefork
*fp
;
2515 if (cp
->c_lockowner
== current_thread()) {
2516 panic("pageout: %s cnode lock already held!\n",
2517 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
2519 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2520 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2521 ubc_upl_abort_range(ap
->a_pl
,
2524 UPL_ABORT_FREE_ON_EMPTY
);
2530 filesize
= fp
->ff_size
;
2531 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2533 if (end_of_range
>= filesize
) {
2534 end_of_range
= (off_t
)(filesize
- 1);
2536 if (ap
->a_f_offset
< filesize
) {
2537 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2538 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2542 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2543 ap
->a_size
, filesize
, ap
->a_flags
);
2546 * If data was written, and setuid or setgid bits are set and
2547 * this process is not the superuser then clear the setuid and
2548 * setgid bits as a precaution against tampering.
2550 if ((retval
== 0) &&
2551 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2552 (vfs_context_suser(ap
->a_context
) != 0)) {
2553 hfs_lock(cp
, HFS_FORCE_LOCK
);
2554 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2555 cp
->c_touch_chgtime
= TRUE
;
2562 * Intercept B-Tree node writes to unswap them if necessary.
2565 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
2568 register struct buf
*bp
= ap
->a_bp
;
2569 register struct vnode
*vp
= buf_vnode(bp
);
2570 BlockDescriptor block
;
2572 /* Trap B-Tree writes */
2573 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
2574 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
2575 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
)) {
2578 * Swap and validate the node if it is in native byte order.
2579 * This is always be true on big endian, so we always validate
2580 * before writing here. On little endian, the node typically has
2581 * been swapped and validatated when it was written to the journal,
2582 * so we won't do anything here.
2584 if (((UInt16
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
2585 /* Prepare the block pointer */
2586 block
.blockHeader
= bp
;
2587 block
.buffer
= (char *)buf_dataptr(bp
);
2588 block
.blockNum
= buf_lblkno(bp
);
2589 /* not found in cache ==> came from disk */
2590 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
2591 block
.blockSize
= buf_count(bp
);
2593 /* Endian un-swap B-Tree node */
2594 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
2596 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2600 /* This buffer shouldn't be locked anymore but if it is clear it */
2601 if ((buf_flags(bp
) & B_LOCKED
)) {
2603 if (VTOHFS(vp
)->jnl
) {
2604 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp
);
2606 buf_clearflags(bp
, B_LOCKED
);
2608 retval
= vn_bwrite (ap
);
2614 * Relocate a file to a new location on disk
2615 * cnode must be locked on entry
2617 * Relocation occurs by cloning the file's data from its
2618 * current set of blocks to a new set of blocks. During
2619 * the relocation all of the blocks (old and new) are
2620 * owned by the file.
2627 * ----------------- -----------------
2628 * |///////////////| | | STEP 1 (aquire new blocks)
2629 * ----------------- -----------------
2632 * ----------------- -----------------
2633 * |///////////////| |///////////////| STEP 2 (clone data)
2634 * ----------------- -----------------
2638 * |///////////////| STEP 3 (head truncate blocks)
2642 * During steps 2 and 3 page-outs to file offsets less
2643 * than or equal to N are suspended.
2645 * During step 3 page-ins to the file get supended.
2649 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
2653 struct filefork
*fp
;
2654 struct hfsmount
*hfsmp
;
2659 u_int32_t nextallocsave
;
2660 daddr64_t sector_a
, sector_b
;
2661 int disabled_caching
= 0;
2666 int took_trunc_lock
= 0;
2668 enum vtype vnodetype
;
2670 vnodetype
= vnode_vtype(vp
);
2671 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
2676 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
2682 if (fp
->ff_unallocblocks
)
2684 blksize
= hfsmp
->blockSize
;
2686 blockHint
= hfsmp
->nextAllocation
;
2688 if ((fp
->ff_size
> (u_int64_t
)0x7fffffff) ||
2689 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
2694 // We do not believe that this call to hfs_fsync() is
2695 // necessary and it causes a journal transaction
2696 // deadlock so we are removing it.
2698 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2699 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2704 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
2706 hfs_lock_truncate(cp
, TRUE
);
2707 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
2708 hfs_unlock_truncate(cp
);
2711 took_trunc_lock
= 1;
2713 headblks
= fp
->ff_blocks
;
2714 datablks
= howmany(fp
->ff_size
, blksize
);
2715 growsize
= datablks
* blksize
;
2716 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
2717 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
2718 blockHint
<= hfsmp
->hfs_metazone_end
)
2719 eflags
|= kEFMetadataMask
;
2721 if (hfs_start_transaction(hfsmp
) != 0) {
2722 if (took_trunc_lock
)
2723 hfs_unlock_truncate(cp
);
2728 * Protect the extents b-tree and the allocation bitmap
2729 * during MapFileBlockC and ExtendFileC operations.
2731 lockflags
= SFL_BITMAP
;
2732 if (overflow_extents(fp
))
2733 lockflags
|= SFL_EXTENTS
;
2734 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2736 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
2738 retval
= MacToVFSError(retval
);
2743 * STEP 1 - aquire new allocation blocks.
2745 if (!vnode_isnocache(vp
)) {
2746 vnode_setnocache(vp
);
2747 disabled_caching
= 1;
2750 nextallocsave
= hfsmp
->nextAllocation
;
2751 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
2752 if (eflags
& kEFMetadataMask
) {
2753 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2754 hfsmp
->nextAllocation
= nextallocsave
;
2755 hfsmp
->vcbFlags
|= 0xFF00;
2756 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2759 retval
= MacToVFSError(retval
);
2761 cp
->c_flag
|= C_MODIFIED
;
2762 if (newbytes
< growsize
) {
2765 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
2766 printf("hfs_relocate: allocation failed");
2771 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
2773 retval
= MacToVFSError(retval
);
2774 } else if ((sector_a
+ 1) == sector_b
) {
2777 } else if ((eflags
& kEFMetadataMask
) &&
2778 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
2779 hfsmp
->hfs_metazone_end
)) {
2780 printf("hfs_relocate: didn't move into metadata zone\n");
2785 /* Done with system locks and journal for now. */
2786 hfs_systemfile_unlock(hfsmp
, lockflags
);
2788 hfs_end_transaction(hfsmp
);
2793 * Check to see if failure is due to excessive fragmentation.
2795 if ((retval
== ENOSPC
) &&
2796 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
2797 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
2802 * STEP 2 - clone file data into the new allocation blocks.
2805 if (vnodetype
== VLNK
)
2806 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
2807 else if (vnode_issystem(vp
))
2808 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
2810 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
2812 /* Start transaction for step 3 or for a restore. */
2813 if (hfs_start_transaction(hfsmp
) != 0) {
2822 * STEP 3 - switch to cloned data and remove old blocks.
2824 lockflags
= SFL_BITMAP
;
2825 if (overflow_extents(fp
))
2826 lockflags
|= SFL_EXTENTS
;
2827 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2829 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
2831 hfs_systemfile_unlock(hfsmp
, lockflags
);
2836 if (took_trunc_lock
)
2837 hfs_unlock_truncate(cp
);
2840 hfs_systemfile_unlock(hfsmp
, lockflags
);
2844 // See comment up above about calls to hfs_fsync()
2847 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2850 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
2851 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
2853 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
2856 if (disabled_caching
) {
2857 vnode_clearnocache(vp
);
2860 hfs_end_transaction(hfsmp
);
2865 if (fp
->ff_blocks
== headblks
)
2868 * Give back any newly allocated space.
2870 if (lockflags
== 0) {
2871 lockflags
= SFL_BITMAP
;
2872 if (overflow_extents(fp
))
2873 lockflags
|= SFL_EXTENTS
;
2874 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2877 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
2879 hfs_systemfile_unlock(hfsmp
, lockflags
);
2882 if (took_trunc_lock
)
2883 hfs_unlock_truncate(cp
);
2893 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, struct proc
*p
)
2895 struct buf
*head_bp
= NULL
;
2896 struct buf
*tail_bp
= NULL
;
2900 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
2904 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
2905 if (tail_bp
== NULL
) {
2909 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
2910 error
= (int)buf_bwrite(tail_bp
);
2913 buf_markinvalid(head_bp
);
2914 buf_brelse(head_bp
);
2916 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
2922 * Clone a file's data within the file.
2926 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
2938 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
2939 writebase
= blkstart
* blksize
;
2940 copysize
= blkcnt
* blksize
;
2941 iosize
= bufsize
= MIN(copysize
, 4096 * 16);
2944 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
2947 hfs_unlock(VTOC(vp
));
2949 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
2951 while (offset
< copysize
) {
2952 iosize
= MIN(copysize
- offset
, iosize
);
2954 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
2955 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2957 error
= cluster_read(vp
, auio
, copysize
, 0);
2959 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
2962 if (uio_resid(auio
) != 0) {
2963 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
2968 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
2969 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2971 error
= cluster_write(vp
, auio
, filesize
+ offset
,
2972 filesize
+ offset
+ iosize
,
2973 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
2975 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
2978 if (uio_resid(auio
) != 0) {
2979 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2988 * No need to call ubc_sync_range or hfs_invalbuf
2989 * since the file was copied using IO_NOCACHE.
2992 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
2994 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2999 * Clone a system (metadata) file.
3003 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3004 kauth_cred_t cred
, struct proc
*p
)
3010 struct buf
*bp
= NULL
;
3013 daddr64_t start_blk
;
3020 iosize
= GetLogicalBlockSize(vp
);
3021 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3022 breadcnt
= bufsize
/ iosize
;
3024 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3027 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3028 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3031 while (blkno
< last_blk
) {
3033 * Read up to a megabyte
3036 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3037 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3039 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3042 if (buf_count(bp
) != iosize
) {
3043 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3046 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3048 buf_markinvalid(bp
);
3056 * Write up to a megabyte
3059 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3060 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3062 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3066 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3067 error
= (int)buf_bwrite(bp
);
3079 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3081 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);