2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
30 /* @(#)hfs_readwrite.c 1.0
32 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
34 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/resourcevar.h>
41 #include <sys/kernel.h>
42 #include <sys/fcntl.h>
43 #include <sys/filedesc.h>
47 #include <sys/kauth.h>
48 #include <sys/vnode.h>
50 #include <sys/vfs_context.h>
52 #include <miscfs/specfs/specdev.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_kern.h>
58 #include <sys/kdebug.h>
61 #include "hfs_endian.h"
62 #include "hfs_fsctl.h"
63 #include "hfs_quota.h"
64 #include "hfscommon/headers/FileMgrInternal.h"
65 #include "hfscommon/headers/BTreesInternal.h"
66 #include "hfs_cnode.h"
69 extern int overflow_extents(struct filefork
*fp
);
71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
74 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
77 extern u_int32_t
GetLogicalBlockSize(struct vnode
*vp
);
79 extern int hfs_setextendedsecurity(struct hfsmount
*, int);
82 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
83 static int hfs_clonefile(struct vnode
*, int, int, int);
84 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
87 /*****************************************************************************
89 * I/O Operations on vnodes
91 *****************************************************************************/
92 int hfs_vnop_read(struct vnop_read_args
*);
93 int hfs_vnop_write(struct vnop_write_args
*);
94 int hfs_vnop_ioctl(struct vnop_ioctl_args
*);
95 int hfs_vnop_select(struct vnop_select_args
*);
96 int hfs_vnop_blktooff(struct vnop_blktooff_args
*);
97 int hfs_vnop_offtoblk(struct vnop_offtoblk_args
*);
98 int hfs_vnop_blockmap(struct vnop_blockmap_args
*);
99 int hfs_vnop_strategy(struct vnop_strategy_args
*);
100 int hfs_vnop_allocate(struct vnop_allocate_args
*);
101 int hfs_vnop_pagein(struct vnop_pagein_args
*);
102 int hfs_vnop_pageout(struct vnop_pageout_args
*);
103 int hfs_vnop_bwrite(struct vnop_bwrite_args
*);
107 * Read data from a file.
110 hfs_vnop_read(struct vnop_read_args
*ap
)
112 uio_t uio
= ap
->a_uio
;
113 struct vnode
*vp
= ap
->a_vp
;
116 struct hfsmount
*hfsmp
;
119 off_t start_resid
= uio_resid(uio
);
120 off_t offset
= uio_offset(uio
);
124 /* Preflight checks */
125 if (!vnode_isreg(vp
)) {
126 /* can only read regular files */
132 if (start_resid
== 0)
133 return (0); /* Nothing left to do */
135 return (EINVAL
); /* cant read from a negative offset */
141 /* Protect against a size change. */
142 hfs_lock_truncate(cp
, 0);
144 filesize
= fp
->ff_size
;
145 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
146 if (offset
> filesize
) {
147 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
148 (offset
> (off_t
)MAXHFSFILESIZE
)) {
154 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
155 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
157 retval
= cluster_read(vp
, uio
, filesize
, 0);
159 cp
->c_touch_acctime
= TRUE
;
161 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
162 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
165 * Keep track blocks read
167 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
168 int took_cnode_lock
= 0;
171 bytesread
= start_resid
- uio_resid(uio
);
173 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
174 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
175 hfs_lock(cp
, HFS_FORCE_LOCK
);
179 * If this file hasn't been seen since the start of
180 * the current sampling period then start over.
182 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
185 fp
->ff_bytesread
= bytesread
;
187 cp
->c_atime
= tv
.tv_sec
;
189 fp
->ff_bytesread
+= bytesread
;
195 hfs_unlock_truncate(cp
);
200 * Write data to a file.
203 hfs_vnop_write(struct vnop_write_args
*ap
)
205 uio_t uio
= ap
->a_uio
;
206 struct vnode
*vp
= ap
->a_vp
;
209 struct hfsmount
*hfsmp
;
210 kauth_cred_t cred
= NULL
;
214 off_t actualBytesAdded
;
219 int ioflag
= ap
->a_ioflag
;
222 int cnode_locked
= 0;
224 // LP64todo - fix this! uio_resid may be 64-bit value
225 resid
= uio_resid(uio
);
226 offset
= uio_offset(uio
);
232 if (!vnode_isreg(vp
))
233 return (EPERM
); /* Can only write regular files */
235 /* Protect against a size change. */
236 hfs_lock_truncate(VTOC(vp
), TRUE
);
238 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
239 hfs_unlock_truncate(VTOC(vp
));
246 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
248 if (ioflag
& IO_APPEND
) {
249 uio_setoffset(uio
, fp
->ff_size
);
250 offset
= fp
->ff_size
;
252 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
257 origFileSize
= fp
->ff_size
;
258 eflags
= kEFDeferMask
; /* defer file block allocations */
260 #ifdef HFS_SPARSE_DEV
262 * When the underlying device is sparse and space
263 * is low (< 8MB), stop doing delayed allocations
264 * and begin doing synchronous I/O.
266 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
267 (hfs_freeblks(hfsmp
, 0) < 2048)) {
268 eflags
&= ~kEFDeferMask
;
271 #endif /* HFS_SPARSE_DEV */
273 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
274 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
276 /* Now test if we need to extend the file */
277 /* Doing so will adjust the filebytes for us */
279 writelimit
= offset
+ resid
;
280 if (writelimit
<= filebytes
)
283 cred
= vfs_context_ucred(ap
->a_context
);
285 bytesToAdd
= writelimit
- filebytes
;
286 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
292 if (hfs_start_transaction(hfsmp
) != 0) {
297 while (writelimit
> filebytes
) {
298 bytesToAdd
= writelimit
- filebytes
;
299 if (cred
&& suser(cred
, NULL
) != 0)
300 eflags
|= kEFReserveMask
;
302 /* Protect extents b-tree and allocation bitmap */
303 lockflags
= SFL_BITMAP
;
304 if (overflow_extents(fp
))
305 lockflags
|= SFL_EXTENTS
;
306 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
308 /* Files that are changing size are not hot file candidates. */
309 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
310 fp
->ff_bytesread
= 0;
312 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
313 0, eflags
, &actualBytesAdded
));
315 hfs_systemfile_unlock(hfsmp
, lockflags
);
317 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
319 if (retval
!= E_NONE
)
321 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
322 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
323 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
325 (void) hfs_update(vp
, TRUE
);
326 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
327 (void) hfs_end_transaction(hfsmp
);
330 if (retval
== E_NONE
) {
338 struct rl_entry
*invalid_range
;
340 if (writelimit
> fp
->ff_size
)
341 filesize
= writelimit
;
343 filesize
= fp
->ff_size
;
345 lflag
= (ioflag
& IO_SYNC
);
347 if (offset
<= fp
->ff_size
) {
348 zero_off
= offset
& ~PAGE_MASK_64
;
350 /* Check to see whether the area between the zero_offset and the start
351 of the transfer to see whether is invalid and should be zero-filled
352 as part of the transfer:
354 if (offset
> zero_off
) {
355 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
356 lflag
|= IO_HEADZEROFILL
;
359 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
361 /* The bytes between fp->ff_size and uio->uio_offset must never be
362 read without being zeroed. The current last block is filled with zeroes
363 if it holds valid data but in all cases merely do a little bookkeeping
364 to track the area from the end of the current last page to the start of
365 the area actually written. For the same reason only the bytes up to the
366 start of the page where this write will start is invalidated; any remainder
367 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
369 Note that inval_start, the start of the page after the current EOF,
370 may be past the start of the write, in which case the zeroing
371 will be handled by the cluser_write of the actual data.
373 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
374 inval_end
= offset
& ~PAGE_MASK_64
;
375 zero_off
= fp
->ff_size
;
377 if ((fp
->ff_size
& PAGE_MASK_64
) &&
378 (rl_scan(&fp
->ff_invalidranges
,
381 &invalid_range
) != RL_NOOVERLAP
)) {
382 /* The page containing the EOF is not valid, so the
383 entire page must be made inaccessible now. If the write
384 starts on a page beyond the page containing the eof
385 (inval_end > eof_page_base), add the
386 whole page to the range to be invalidated. Otherwise
387 (i.e. if the write starts on the same page), zero-fill
388 the entire page explicitly now:
390 if (inval_end
> eof_page_base
) {
391 inval_start
= eof_page_base
;
393 zero_off
= eof_page_base
;
397 if (inval_start
< inval_end
) {
399 /* There's some range of data that's going to be marked invalid */
401 if (zero_off
< inval_start
) {
402 /* The pages between inval_start and inval_end are going to be invalidated,
403 and the actual write will start on a page past inval_end. Now's the last
404 chance to zero-fill the page containing the EOF:
408 retval
= cluster_write(vp
, (uio_t
) 0,
409 fp
->ff_size
, inval_start
,
411 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
412 hfs_lock(cp
, HFS_FORCE_LOCK
);
414 if (retval
) goto ioerr_exit
;
415 offset
= uio_offset(uio
);
418 /* Mark the remaining area of the newly allocated space as invalid: */
419 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
421 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
422 zero_off
= fp
->ff_size
= inval_end
;
425 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
428 /* Check to see whether the area between the end of the write and the end of
429 the page it falls in is invalid and should be zero-filled as part of the transfer:
431 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
432 if (tail_off
> filesize
) tail_off
= filesize
;
433 if (tail_off
> writelimit
) {
434 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
435 lflag
|= IO_TAILZEROFILL
;
440 * if the write starts beyond the current EOF (possibly advanced in the
441 * zeroing of the last block, above), then we'll zero fill from the current EOF
442 * to where the write begins:
444 * NOTE: If (and ONLY if) the portion of the file about to be written is
445 * before the current EOF it might be marked as invalid now and must be
446 * made readable (removed from the invalid ranges) before cluster_write
449 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
450 if (io_start
< fp
->ff_size
) {
453 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
454 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
459 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
460 tail_off
, lflag
| IO_NOZERODIRTY
);
461 offset
= uio_offset(uio
);
462 if (offset
> fp
->ff_size
) {
463 fp
->ff_size
= offset
;
465 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
466 /* Files that are changing size are not hot file candidates. */
467 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
468 fp
->ff_bytesread
= 0;
470 if (resid
> uio_resid(uio
)) {
471 cp
->c_touch_chgtime
= TRUE
;
472 cp
->c_touch_modtime
= TRUE
;
475 HFS_KNOTE(vp
, NOTE_WRITE
);
479 * If we successfully wrote any data, and we are not the superuser
480 * we clear the setuid and setgid bits as a precaution against
483 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
484 cred
= vfs_context_ucred(ap
->a_context
);
485 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
487 hfs_lock(cp
, HFS_FORCE_LOCK
);
490 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
494 if (ioflag
& IO_UNIT
) {
496 hfs_lock(cp
, HFS_FORCE_LOCK
);
499 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
501 // LP64todo - fix this! resid needs to by user_ssize_t
502 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
503 uio_setresid(uio
, resid
);
504 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
506 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
508 hfs_lock(cp
, HFS_FORCE_LOCK
);
511 retval
= hfs_update(vp
, TRUE
);
513 /* Updating vcbWrCnt doesn't need to be atomic. */
516 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
517 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
521 hfs_unlock_truncate(cp
);
525 /* support for the "bulk-access" fcntl */
527 #define CACHE_ELEMS 64
528 #define CACHE_LEVELS 16
529 #define PARENT_IDS_FLAG 0x100
531 /* from hfs_attrlist.c */
532 extern unsigned long DerivePermissionSummary(uid_t obj_uid
, gid_t obj_gid
,
533 mode_t obj_mode
, struct mount
*mp
,
534 kauth_cred_t cred
, struct proc
*p
);
536 /* from vfs/vfs_fsevents.c */
537 extern char *get_pathbuff(void);
538 extern void release_pathbuff(char *buff
);
540 struct access_cache
{
542 int cachehits
; /* these two for statistics gathering */
544 unsigned int *acache
;
549 uid_t uid
; /* IN: effective user id */
550 short flags
; /* IN: access requested (i.e. R_OK) */
551 short num_groups
; /* IN: number of groups user belongs to */
552 int num_files
; /* IN: number of files to process */
553 int *file_ids
; /* IN: array of file ids */
554 gid_t
*groups
; /* IN: array of groups */
555 short *access
; /* OUT: access info for each file (0 for 'has access') */
558 struct user_access_t
{
559 uid_t uid
; /* IN: effective user id */
560 short flags
; /* IN: access requested (i.e. R_OK) */
561 short num_groups
; /* IN: number of groups user belongs to */
562 int num_files
; /* IN: number of files to process */
563 user_addr_t file_ids
; /* IN: array of file ids */
564 user_addr_t groups
; /* IN: array of groups */
565 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
569 * Perform a binary search for the given parent_id. Return value is
570 * found/not found boolean, and indexp will be the index of the item
571 * or the index at which to insert the item if it's not found.
574 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
577 int index
, matches
= 0;
579 if (cache
->numcached
== 0) {
581 return 0; // table is empty, so insert at index=0 and report no match
584 if (cache
->numcached
> CACHE_ELEMS
) {
585 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
586 cache->numcached, CACHE_ELEMS);*/
587 cache
->numcached
= CACHE_ELEMS
;
591 hi
= cache
->numcached
- 1;
594 /* perform binary search for parent_id */
596 unsigned int mid
= (hi
- lo
)/2 + lo
;
597 unsigned int this_id
= cache
->acache
[mid
];
599 if (parent_id
== this_id
) {
604 if (parent_id
< this_id
) {
609 if (parent_id
> this_id
) {
615 /* check if lo and hi converged on the match */
616 if (parent_id
== cache
->acache
[hi
]) {
620 /* if no existing entry found, find index for new one */
622 index
= (parent_id
< cache
->acache
[hi
]) ? hi
: hi
+ 1;
633 * Add a node to the access_cache at the given index (or do a lookup first
634 * to find the index if -1 is passed in). We currently do a replace rather
635 * than an insert if the cache is full.
638 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
640 int lookup_index
= -1;
642 /* need to do a lookup first if -1 passed for index */
644 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
645 if (cache
->haveaccess
[lookup_index
] != access
) {
646 /* change access info for existing entry... should never happen */
647 cache
->haveaccess
[lookup_index
] = access
;
650 /* mission accomplished */
653 index
= lookup_index
;
658 /* if the cache is full, do a replace rather than an insert */
659 if (cache
->numcached
>= CACHE_ELEMS
) {
660 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
661 cache
->numcached
= CACHE_ELEMS
-1;
663 if (index
> cache
->numcached
) {
664 // printf("index %d pinned to %d\n", index, cache->numcached);
665 index
= cache
->numcached
;
667 } else if (index
>= 0 && index
< cache
->numcached
) {
668 /* only do bcopy if we're inserting */
669 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
670 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(Boolean
) );
673 cache
->acache
[index
] = nodeID
;
674 cache
->haveaccess
[index
] = access
;
687 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
689 struct cinfo
*cip
= (struct cinfo
*)arg
;
691 cip
->uid
= attrp
->ca_uid
;
692 cip
->gid
= attrp
->ca_gid
;
693 cip
->mode
= attrp
->ca_mode
;
694 cip
->parentcnid
= descp
->cd_parentcnid
;
700 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
701 * isn't incore, then go to the catalog.
704 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
705 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
, struct proc
*p
)
709 /* if this id matches the one the fsctl was called with, skip the lookup */
710 if (cnid
== skip_cp
->c_cnid
) {
711 cnattrp
->ca_uid
= skip_cp
->c_uid
;
712 cnattrp
->ca_gid
= skip_cp
->c_gid
;
713 cnattrp
->ca_mode
= skip_cp
->c_mode
;
714 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
718 /* otherwise, check the cnode hash incase the file/dir is incore */
719 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
720 cnattrp
->ca_uid
= c_info
.uid
;
721 cnattrp
->ca_gid
= c_info
.gid
;
722 cnattrp
->ca_mode
= c_info
.mode
;
723 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
727 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
729 /* lookup this cnid in the catalog */
730 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
732 hfs_systemfile_unlock(hfsmp
, lockflags
);
742 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
743 * up to CACHE_LEVELS as we progress towards the root.
746 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
747 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
)
751 HFSCatalogNodeID thisNodeID
;
752 unsigned long myPerms
;
753 struct cat_attr cnattr
;
754 int cache_index
= -1;
757 int i
= 0, ids_to_cache
= 0;
758 int parent_ids
[CACHE_LEVELS
];
760 /* root always has access */
761 if (!suser(myp_ucred
, NULL
)) {
766 while (thisNodeID
>= kRootDirID
) {
767 myResult
= 0; /* default to "no access" */
769 /* check the cache before resorting to hitting the catalog */
771 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
772 * to look any further after hitting cached dir */
774 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
776 myResult
= cache
->haveaccess
[cache_index
];
777 goto ExitThisRoutine
;
780 /* remember which parents we want to cache */
781 if (ids_to_cache
< CACHE_LEVELS
) {
782 parent_ids
[ids_to_cache
] = thisNodeID
;
786 /* do the lookup (checks the cnode hash, then the catalog) */
787 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
, theProcPtr
);
789 goto ExitThisRoutine
; /* no access */
792 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
793 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
794 myp_ucred
, theProcPtr
);
796 if ( (myPerms
& X_OK
) == 0 ) {
798 goto ExitThisRoutine
; /* no access */
801 /* up the hierarchy we go */
802 thisNodeID
= catkey
.hfsPlus
.parentID
;
805 /* if here, we have access to this node */
810 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
815 /* cache the parent directory(ies) */
816 for (i
= 0; i
< ids_to_cache
; i
++) {
817 /* small optimization: get rid of double-lookup for all these */
818 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
819 add_node(cache
, -1, parent_ids
[i
], myResult
);
824 /* end "bulk-access" support */
829 * Callback for use with freeze ioctl.
832 hfs_freezewrite_callback(struct vnode
*vp
, void *cargs
)
834 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
840 * Control filesystem operating characteristics.
843 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
848 vfs_context_t a_context;
851 struct vnode
* vp
= ap
->a_vp
;
852 struct hfsmount
*hfsmp
= VTOHFS(vp
);
853 vfs_context_t context
= ap
->a_context
;
854 kauth_cred_t cred
= vfs_context_ucred(context
);
855 proc_t p
= vfs_context_proc(context
);
856 struct vfsstatfs
*vfsp
;
859 is64bit
= proc_is64bit(p
);
861 switch (ap
->a_command
) {
863 case HFS_RESIZE_PROGRESS
: {
865 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
866 if (suser(cred
, NULL
) &&
867 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
868 return (EACCES
); /* must be owner of file system */
870 if (!vnode_isvroot(vp
)) {
873 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
875 case HFS_RESIZE_VOLUME
: {
879 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
880 if (suser(cred
, NULL
) &&
881 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
882 return (EACCES
); /* must be owner of file system */
884 if (!vnode_isvroot(vp
)) {
887 newsize
= *(u_int64_t
*)ap
->a_data
;
888 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
890 if (newsize
> cursize
) {
891 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
892 } else if (newsize
< cursize
) {
893 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
898 case HFS_CHANGE_NEXT_ALLOCATION
: {
901 if (vnode_vfsisrdonly(vp
)) {
904 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
905 if (suser(cred
, NULL
) &&
906 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
907 return (EACCES
); /* must be owner of file system */
909 if (!vnode_isvroot(vp
)) {
912 location
= *(u_int32_t
*)ap
->a_data
;
913 if (location
> hfsmp
->totalBlocks
- 1) {
916 /* Return previous value. */
917 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
918 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
919 hfsmp
->nextAllocation
= location
;
920 hfsmp
->vcbFlags
|= 0xFF00;
921 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
925 #ifdef HFS_SPARSE_DEV
926 case HFS_SETBACKINGSTOREINFO
: {
927 struct vnode
* bsfs_rootvp
;
928 struct vnode
* di_vp
;
929 struct hfs_backingstoreinfo
*bsdata
;
932 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
935 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
936 if (suser(cred
, NULL
) &&
937 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
938 return (EACCES
); /* must be owner of file system */
940 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
941 if (bsdata
== NULL
) {
944 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
947 if ((error
= vnode_getwithref(di_vp
))) {
948 file_drop(bsdata
->backingfd
);
952 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
953 (void)vnode_put(di_vp
);
954 file_drop(bsdata
->backingfd
);
959 * Obtain the backing fs root vnode and keep a reference
960 * on it. This reference will be dropped in hfs_unmount.
962 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
964 (void)vnode_put(di_vp
);
965 file_drop(bsdata
->backingfd
);
968 vnode_ref(bsfs_rootvp
);
969 vnode_put(bsfs_rootvp
);
971 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
972 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
973 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
974 hfsmp
->hfs_sparsebandblks
*= 4;
976 (void)vnode_put(di_vp
);
977 file_drop(bsdata
->backingfd
);
980 case HFS_CLRBACKINGSTOREINFO
: {
981 struct vnode
* tmpvp
;
983 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
984 if (suser(cred
, NULL
) &&
985 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
986 return (EACCES
); /* must be owner of file system */
988 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
989 hfsmp
->hfs_backingfs_rootvp
) {
991 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
992 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
993 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
994 hfsmp
->hfs_sparsebandblks
= 0;
999 #endif /* HFS_SPARSE_DEV */
1008 mp
= vnode_mount(vp
);
1009 hfsmp
= VFSTOHFS(mp
);
1014 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1016 task
= current_task();
1017 task_working_set_disable(task
);
1019 // flush things before we get started to try and prevent
1020 // dirty data from being paged out while we're frozen.
1021 // note: can't do this after taking the lock as it will
1022 // deadlock against ourselves.
1023 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1024 hfs_global_exclusive_lock_acquire(hfsmp
);
1025 journal_flush(hfsmp
->jnl
);
1027 // don't need to iterate on all vnodes, we just need to
1028 // wait for writes to the system files and the device vnode
1029 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1030 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1031 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1032 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1033 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1034 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1035 if (hfsmp
->hfs_attribute_vp
)
1036 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1037 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1039 hfsmp
->hfs_freezing_proc
= current_proc();
1048 // if we're not the one who froze the fs then we
1050 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1054 // NOTE: if you add code here, also go check the
1055 // code that "thaws" the fs in hfs_vnop_close()
1057 hfsmp
->hfs_freezing_proc
= NULL
;
1058 hfs_global_exclusive_lock_release(hfsmp
);
1059 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1064 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1065 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1067 case HFS_BULKACCESS_FSCTL
:
1068 case HFS_BULKACCESS
: {
1070 * NOTE: on entry, the vnode is locked. Incase this vnode
1071 * happens to be in our list of file_ids, we'll note it
1072 * avoid calling hfs_chashget_nowait() on that id as that
1073 * will cause a "locking against myself" panic.
1075 Boolean check_leaf
= true;
1077 struct user_access_t
*user_access_structp
;
1078 struct user_access_t tmp_user_access_t
;
1079 struct access_cache cache
;
1083 dev_t dev
= VTOC(vp
)->c_dev
;
1086 struct ucred myucred
; /* XXX ILLEGAL */
1088 int *file_ids
= NULL
;
1089 short *access
= NULL
;
1092 cnid_t prevParent_cnid
= 0;
1093 unsigned long myPerms
;
1095 struct cat_attr cnattr
;
1097 struct cnode
*skip_cp
= VTOC(vp
);
1098 struct vfs_context my_context
;
1100 /* first, return error if not run as root */
1101 if (cred
->cr_ruid
!= 0) {
1105 /* initialize the local cache and buffers */
1106 cache
.numcached
= 0;
1107 cache
.cachehits
= 0;
1110 file_ids
= (int *) get_pathbuff();
1111 access
= (short *) get_pathbuff();
1112 cache
.acache
= (int *) get_pathbuff();
1113 cache
.haveaccess
= (Boolean
*) get_pathbuff();
1115 if (file_ids
== NULL
|| access
== NULL
|| cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1116 release_pathbuff((char *) file_ids
);
1117 release_pathbuff((char *) access
);
1118 release_pathbuff((char *) cache
.acache
);
1119 release_pathbuff((char *) cache
.haveaccess
);
1124 /* struct copyin done during dispatch... need to copy file_id array separately */
1125 if (ap
->a_data
== NULL
) {
1127 goto err_exit_bulk_access
;
1131 user_access_structp
= (struct user_access_t
*)ap
->a_data
;
1134 struct access_t
* accessp
= (struct access_t
*)ap
->a_data
;
1135 tmp_user_access_t
.uid
= accessp
->uid
;
1136 tmp_user_access_t
.flags
= accessp
->flags
;
1137 tmp_user_access_t
.num_groups
= accessp
->num_groups
;
1138 tmp_user_access_t
.num_files
= accessp
->num_files
;
1139 tmp_user_access_t
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1140 tmp_user_access_t
.groups
= CAST_USER_ADDR_T(accessp
->groups
);
1141 tmp_user_access_t
.access
= CAST_USER_ADDR_T(accessp
->access
);
1142 user_access_structp
= &tmp_user_access_t
;
1145 num_files
= user_access_structp
->num_files
;
1146 if (num_files
< 1) {
1147 goto err_exit_bulk_access
;
1149 if (num_files
> 256) {
1151 goto err_exit_bulk_access
;
1154 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1155 num_files
* sizeof(int)))) {
1156 goto err_exit_bulk_access
;
1159 /* fill in the ucred structure */
1160 flags
= user_access_structp
->flags
;
1161 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1165 /* check if we've been passed leaf node ids or parent ids */
1166 if (flags
& PARENT_IDS_FLAG
) {
1170 memset(&myucred
, 0, sizeof(myucred
));
1172 myucred
.cr_uid
= myucred
.cr_ruid
= myucred
.cr_svuid
= user_access_structp
->uid
;
1173 myucred
.cr_ngroups
= user_access_structp
->num_groups
;
1174 if (myucred
.cr_ngroups
< 1 || myucred
.cr_ngroups
> 16) {
1175 myucred
.cr_ngroups
= 0;
1176 } else if ((error
= copyin(user_access_structp
->groups
, (caddr_t
)myucred
.cr_groups
,
1177 myucred
.cr_ngroups
* sizeof(gid_t
)))) {
1178 goto err_exit_bulk_access
;
1180 myucred
.cr_rgid
= myucred
.cr_svgid
= myucred
.cr_groups
[0];
1181 myucred
.cr_gmuid
= myucred
.cr_uid
;
1183 my_context
.vc_proc
= p
;
1184 my_context
.vc_ucred
= &myucred
;
1186 /* Check access to each file_id passed in */
1187 for (i
= 0; i
< num_files
; i
++) {
1189 cnid
= (cnid_t
) file_ids
[i
];
1191 /* root always has access */
1192 if (!suser(&myucred
, NULL
)) {
1199 /* do the lookup (checks the cnode hash, then the catalog) */
1200 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
, p
);
1202 access
[i
] = (short) error
;
1206 /* before calling CheckAccess(), check the target file for read access */
1207 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1208 cnattr
.ca_mode
, hfsmp
->hfs_mp
, &myucred
, p
);
1211 /* fail fast if no access */
1212 if ((myPerms
& flags
) == 0) {
1217 /* we were passed an array of parent ids */
1218 catkey
.hfsPlus
.parentID
= cnid
;
1221 /* if the last guy had the same parent and had access, we're done */
1222 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1228 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1229 skip_cp
, p
, &myucred
, dev
);
1232 access
[i
] = 0; // have access.. no errors to report
1234 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1237 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1241 cnid
= (cnid_t
)file_ids
[i
];
1243 while (cnid
>= kRootDirID
) {
1244 /* get the vnode for this cnid */
1245 myErr
= hfs_vget(hfsmp
, cnid
, &vp
, 0);
1251 cnid
= VTOC(vp
)->c_parentcnid
;
1253 hfs_unlock(VTOC(vp
));
1254 if (vnode_vtype(vp
) == VDIR
) {
1255 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), &my_context
);
1257 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, &my_context
);
1268 /* copyout the access array */
1269 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1270 num_files
* sizeof (short)))) {
1271 goto err_exit_bulk_access
;
1274 err_exit_bulk_access
:
1276 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1278 release_pathbuff((char *) cache
.acache
);
1279 release_pathbuff((char *) cache
.haveaccess
);
1280 release_pathbuff((char *) file_ids
);
1281 release_pathbuff((char *) access
);
1284 } /* HFS_BULKACCESS */
1286 case HFS_SETACLSTATE
: {
1289 if (ap
->a_data
== NULL
) {
1293 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1294 state
= *(int *)ap
->a_data
;
1296 // super-user can enable or disable acl's on a volume.
1297 // the volume owner can only enable acl's
1298 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1301 if (state
== 0 || state
== 1)
1302 return hfs_setextendedsecurity(hfsmp
, state
);
1310 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1312 error
= hfs_fsync(vp
, MNT_NOWAIT
, TRUE
, p
);
1313 hfs_unlock(VTOC(vp
));
1320 register struct cnode
*cp
;
1323 if (!vnode_isreg(vp
))
1326 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1330 * used by regression test to determine if
1331 * all the dirty pages (via write) have been cleaned
1332 * after a call to 'fsysnc'.
1334 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1341 register struct radvisory
*ra
;
1342 struct filefork
*fp
;
1345 if (!vnode_isreg(vp
))
1348 ra
= (struct radvisory
*)(ap
->a_data
);
1351 /* Protect against a size change. */
1352 hfs_lock_truncate(VTOC(vp
), TRUE
);
1354 if (ra
->ra_offset
>= fp
->ff_size
) {
1357 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1360 hfs_unlock_truncate(VTOC(vp
));
1364 case F_READBOOTSTRAP
:
1365 case F_WRITEBOOTSTRAP
:
1367 struct vnode
*devvp
= NULL
;
1368 user_fbootstraptransfer_t
*user_bootstrapp
;
1372 daddr64_t blockNumber
;
1376 user_fbootstraptransfer_t user_bootstrap
;
1378 if (!vnode_isvroot(vp
))
1380 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1381 * to a user_fbootstraptransfer_t else we get a pointer to a
1382 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1385 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1388 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1389 user_bootstrapp
= &user_bootstrap
;
1390 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1391 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1392 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1394 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1397 devvp
= VTOHFS(vp
)->hfs_devvp
;
1398 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1399 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1400 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1401 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1403 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1405 while (uio_resid(auio
) > 0) {
1406 blockNumber
= uio_offset(auio
) / devBlockSize
;
1407 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1409 if (bp
) buf_brelse(bp
);
1414 blockOffset
= uio_offset(auio
) % devBlockSize
;
1415 xfersize
= devBlockSize
- blockOffset
;
1416 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1422 if (uio_rw(auio
) == UIO_WRITE
) {
1423 error
= VNOP_BWRITE(bp
);
1436 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1439 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1442 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1447 case HFS_GET_MOUNT_TIME
:
1448 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1451 case HFS_GET_LAST_MTIME
:
1452 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1455 case HFS_SET_BOOT_INFO
:
1456 if (!vnode_isvroot(vp
))
1458 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1459 return(EACCES
); /* must be superuser or owner of filesystem */
1460 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1461 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1462 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1463 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1466 case HFS_GET_BOOT_INFO
:
1467 if (!vnode_isvroot(vp
))
1469 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1470 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1471 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1478 /* Should never get here */
1486 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1488 struct vnop_select_args {
1493 vfs_context_t a_context;
1498 * We should really check to see if I/O is possible.
1504 * Converts a logical block number to a physical block, and optionally returns
1505 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1506 * The physical block number is based on the device block size, currently its 512.
1507 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1510 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, int *runp
)
1512 struct cnode
*cp
= VTOC(vp
);
1513 struct filefork
*fp
= VTOF(vp
);
1514 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1515 int retval
= E_NONE
;
1516 daddr_t logBlockSize
;
1517 size_t bytesContAvail
= 0;
1518 off_t blockposition
;
1523 * Check for underlying vnode requests and ensure that logical
1524 * to physical mapping is requested.
1531 logBlockSize
= GetLogicalBlockSize(vp
);
1532 blockposition
= (off_t
)bn
* (off_t
)logBlockSize
;
1534 lockExtBtree
= overflow_extents(fp
);
1537 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
1539 retval
= MacToVFSError(
1540 MapFileBlockC (HFSTOVCB(hfsmp
),
1548 hfs_systemfile_unlock(hfsmp
, lockflags
);
1550 if (retval
== E_NONE
) {
1551 /* Figure out how many read ahead blocks there are */
1553 if (can_cluster(logBlockSize
)) {
1554 /* Make sure this result never goes negative: */
1555 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1565 * Convert logical block number to file offset.
1568 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1570 struct vnop_blktooff_args {
1577 if (ap
->a_vp
== NULL
)
1579 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1585 * Convert file offset to logical block number.
1588 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1590 struct vnop_offtoblk_args {
1593 daddr64_t *a_lblkno;
1597 if (ap
->a_vp
== NULL
)
1599 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1605 * Map file offset to physical block number.
1607 * System file cnodes are expected to be locked (shared or exclusive).
1610 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1612 struct vnop_blockmap_args {
1620 vfs_context_t a_context;
1624 struct vnode
*vp
= ap
->a_vp
;
1626 struct filefork
*fp
;
1627 struct hfsmount
*hfsmp
;
1628 size_t bytesContAvail
= 0;
1629 int retval
= E_NONE
;
1632 struct rl_entry
*invalid_range
;
1633 enum rl_overlaptype overlaptype
;
1637 /* Do not allow blockmap operation on a directory */
1638 if (vnode_isdir(vp
)) {
1643 * Check for underlying vnode requests and ensure that logical
1644 * to physical mapping is requested.
1646 if (ap
->a_bpn
== NULL
)
1649 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
)) {
1650 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
1651 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
1655 panic("blockmap: %s cnode lock already held!\n",
1656 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
1664 if (fp
->ff_unallocblocks
) {
1665 if (hfs_start_transaction(hfsmp
) != 0) {
1671 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
1673 } else if (overflow_extents(fp
)) {
1674 syslocks
= SFL_EXTENTS
;
1678 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
1681 * Check for any delayed allocations.
1683 if (fp
->ff_unallocblocks
) {
1685 u_int32_t loanedBlocks
;
1688 // Make sure we have a transaction. It's possible
1689 // that we came in and fp->ff_unallocblocks was zero
1690 // but during the time we blocked acquiring the extents
1691 // btree, ff_unallocblocks became non-zero and so we
1692 // will need to start a transaction.
1694 if (started_tr
== 0) {
1696 hfs_systemfile_unlock(hfsmp
, lockflags
);
1703 * Note: ExtendFileC will Release any blocks on loan and
1704 * aquire real blocks. So we ask to extend by zero bytes
1705 * since ExtendFileC will account for the virtual blocks.
1708 loanedBlocks
= fp
->ff_unallocblocks
;
1709 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
1710 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
1713 fp
->ff_unallocblocks
= loanedBlocks
;
1714 cp
->c_blocks
+= loanedBlocks
;
1715 fp
->ff_blocks
+= loanedBlocks
;
1717 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1718 hfsmp
->loanedBlocks
+= loanedBlocks
;
1719 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1723 hfs_systemfile_unlock(hfsmp
, lockflags
);
1724 cp
->c_flag
|= C_MODIFIED
;
1726 (void) hfs_update(vp
, TRUE
);
1727 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1729 hfs_end_transaction(hfsmp
);
1735 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
1736 ap
->a_bpn
, &bytesContAvail
);
1738 hfs_systemfile_unlock(hfsmp
, lockflags
);
1743 (void) hfs_update(vp
, TRUE
);
1744 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1745 hfs_end_transaction(hfsmp
);
1752 /* Adjust the mapping information for invalid file ranges: */
1753 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
1754 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
1756 if (overlaptype
!= RL_NOOVERLAP
) {
1757 switch(overlaptype
) {
1758 case RL_MATCHINGOVERLAP
:
1759 case RL_OVERLAPCONTAINSRANGE
:
1760 case RL_OVERLAPSTARTSBEFORE
:
1761 /* There's no valid block for this byte offset: */
1762 *ap
->a_bpn
= (daddr64_t
)-1;
1763 /* There's no point limiting the amount to be returned
1764 * if the invalid range that was hit extends all the way
1765 * to the EOF (i.e. there's no valid bytes between the
1766 * end of this range and the file's EOF):
1768 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1769 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1770 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1774 case RL_OVERLAPISCONTAINED
:
1775 case RL_OVERLAPENDSAFTER
:
1776 /* The range of interest hits an invalid block before the end: */
1777 if (invalid_range
->rl_start
== ap
->a_foffset
) {
1778 /* There's actually no valid information to be had starting here: */
1779 *ap
->a_bpn
= (daddr64_t
)-1;
1780 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1781 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1782 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1785 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
1792 if (bytesContAvail
> ap
->a_size
)
1793 bytesContAvail
= ap
->a_size
;
1796 *ap
->a_run
= bytesContAvail
;
1799 *(int *)ap
->a_poff
= 0;
1804 return (MacToVFSError(retval
));
1809 * prepare and issue the I/O
1810 * buf_strategy knows how to deal
1811 * with requests that require
1815 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
1817 buf_t bp
= ap
->a_bp
;
1818 vnode_t vp
= buf_vnode(bp
);
1819 struct cnode
*cp
= VTOC(vp
);
1821 return (buf_strategy(cp
->c_devvp
, ap
));
1826 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
, vfs_context_t context
)
1828 register struct cnode
*cp
= VTOC(vp
);
1829 struct filefork
*fp
= VTOF(vp
);
1830 struct proc
*p
= vfs_context_proc(context
);;
1831 kauth_cred_t cred
= vfs_context_ucred(context
);
1834 off_t actualBytesAdded
;
1836 u_int64_t old_filesize
;
1839 struct hfsmount
*hfsmp
;
1842 blksize
= VTOVCB(vp
)->blockSize
;
1843 fileblocks
= fp
->ff_blocks
;
1844 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
1845 old_filesize
= fp
->ff_size
;
1847 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
1848 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1853 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
1860 /* Files that are changing size are not hot file candidates. */
1861 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
1862 fp
->ff_bytesread
= 0;
1866 * We cannot just check if fp->ff_size == length (as an optimization)
1867 * since there may be extra physical blocks that also need truncation.
1870 if ((retval
= hfs_getinoquota(cp
)))
1875 * Lengthen the size of the file. We must ensure that the
1876 * last byte of the file is allocated. Since the smallest
1877 * value of ff_size is 0, length will be at least 1.
1879 if (length
> (off_t
)fp
->ff_size
) {
1881 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
1887 * If we don't have enough physical space then
1888 * we need to extend the physical size.
1890 if (length
> filebytes
) {
1892 u_long blockHint
= 0;
1894 /* All or nothing and don't round up to clumpsize. */
1895 eflags
= kEFAllMask
| kEFNoClumpMask
;
1897 if (cred
&& suser(cred
, NULL
) != 0)
1898 eflags
|= kEFReserveMask
; /* keep a reserve */
1901 * Allocate Journal and Quota files in metadata zone.
1903 if (filebytes
== 0 &&
1904 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
1905 hfs_virtualmetafile(cp
)) {
1906 eflags
|= kEFMetadataMask
;
1907 blockHint
= hfsmp
->hfs_metazone_start
;
1909 if (hfs_start_transaction(hfsmp
) != 0) {
1914 /* Protect extents b-tree and allocation bitmap */
1915 lockflags
= SFL_BITMAP
;
1916 if (overflow_extents(fp
))
1917 lockflags
|= SFL_EXTENTS
;
1918 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
1920 while ((length
> filebytes
) && (retval
== E_NONE
)) {
1921 bytesToAdd
= length
- filebytes
;
1922 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
1927 &actualBytesAdded
));
1929 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
1930 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
1931 if (length
> filebytes
)
1937 hfs_systemfile_unlock(hfsmp
, lockflags
);
1940 (void) hfs_update(vp
, TRUE
);
1941 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1944 hfs_end_transaction(hfsmp
);
1949 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
1950 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1953 if (!(flags
& IO_NOZEROFILL
)) {
1954 if (UBCINFOEXISTS(vp
) && retval
== E_NONE
) {
1955 struct rl_entry
*invalid_range
;
1958 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
1959 if (length
< zero_limit
) zero_limit
= length
;
1961 if (length
> (off_t
)fp
->ff_size
) {
1964 /* Extending the file: time to fill out the current last page w. zeroes? */
1965 if ((fp
->ff_size
& PAGE_MASK_64
) &&
1966 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
1967 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
1969 /* There's some valid data at the start of the (current) last page
1970 of the file, so zero out the remainder of that page to ensure the
1971 entire page contains valid data. Since there is no invalid range
1972 possible past the (current) eof, there's no need to remove anything
1973 from the invalid range list before calling cluster_write(): */
1975 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
1976 fp
->ff_size
, (off_t
)0,
1977 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
1978 hfs_lock(cp
, HFS_FORCE_LOCK
);
1979 if (retval
) goto Err_Exit
;
1981 /* Merely invalidate the remaining area, if necessary: */
1982 if (length
> zero_limit
) {
1984 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
1985 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1988 /* The page containing the (current) eof is invalid: just add the
1989 remainder of the page to the invalid list, along with the area
1990 being newly allocated:
1993 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
1994 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1998 panic("hfs_truncate: invoked on non-UBC object?!");
2001 cp
->c_touch_modtime
= TRUE
;
2002 fp
->ff_size
= length
;
2004 /* Nested transactions will do their own ubc_setsize. */
2007 * ubc_setsize can cause a pagein here
2008 * so we need to drop cnode lock.
2011 ubc_setsize(vp
, length
);
2012 hfs_lock(cp
, HFS_FORCE_LOCK
);
2015 } else { /* Shorten the size of the file */
2017 if ((off_t
)fp
->ff_size
> length
) {
2019 * Any buffers that are past the truncation point need to be
2020 * invalidated (to maintain buffer cache consistency).
2023 /* Nested transactions will do their own ubc_setsize. */
2026 * ubc_setsize can cause a pageout here
2027 * so we need to drop cnode lock.
2030 ubc_setsize(vp
, length
);
2031 hfs_lock(cp
, HFS_FORCE_LOCK
);
2034 /* Any space previously marked as invalid is now irrelevant: */
2035 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2039 * Account for any unmapped blocks. Note that the new
2040 * file length can still end up with unmapped blocks.
2042 if (fp
->ff_unallocblocks
> 0) {
2043 u_int32_t finalblks
;
2044 u_int32_t loanedBlocks
;
2046 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2048 loanedBlocks
= fp
->ff_unallocblocks
;
2049 cp
->c_blocks
-= loanedBlocks
;
2050 fp
->ff_blocks
-= loanedBlocks
;
2051 fp
->ff_unallocblocks
= 0;
2053 hfsmp
->loanedBlocks
-= loanedBlocks
;
2055 finalblks
= (length
+ blksize
- 1) / blksize
;
2056 if (finalblks
> fp
->ff_blocks
) {
2057 /* calculate required unmapped blocks */
2058 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2059 hfsmp
->loanedBlocks
+= loanedBlocks
;
2061 fp
->ff_unallocblocks
= loanedBlocks
;
2062 cp
->c_blocks
+= loanedBlocks
;
2063 fp
->ff_blocks
+= loanedBlocks
;
2065 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2069 * For a TBE process the deallocation of the file blocks is
2070 * delayed until the file is closed. And hfs_close calls
2071 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2072 * isn't set, we make sure this isn't a TBE process.
2074 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2076 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2078 if (hfs_start_transaction(hfsmp
) != 0) {
2083 if (fp
->ff_unallocblocks
== 0) {
2084 /* Protect extents b-tree and allocation bitmap */
2085 lockflags
= SFL_BITMAP
;
2086 if (overflow_extents(fp
))
2087 lockflags
|= SFL_EXTENTS
;
2088 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2090 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2091 (FCB
*)fp
, length
, false));
2093 hfs_systemfile_unlock(hfsmp
, lockflags
);
2097 fp
->ff_size
= length
;
2099 (void) hfs_update(vp
, TRUE
);
2100 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2103 hfs_end_transaction(hfsmp
);
2105 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2109 /* These are bytesreleased */
2110 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2113 /* Only set update flag if the logical length changes */
2114 if (old_filesize
!= length
)
2115 cp
->c_touch_modtime
= TRUE
;
2116 fp
->ff_size
= length
;
2118 cp
->c_touch_chgtime
= TRUE
;
2119 retval
= hfs_update(vp
, MNT_WAIT
);
2121 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2122 -1, -1, -1, retval
, 0);
2127 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2128 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2136 * Truncate a cnode to at most length size, freeing (or adding) the
2141 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2142 vfs_context_t context
)
2144 struct filefork
*fp
= VTOF(vp
);
2147 int blksize
, error
= 0;
2148 struct cnode
*cp
= VTOC(vp
);
2150 if (vnode_isdir(vp
))
2151 return (EISDIR
); /* cannot truncate an HFS directory! */
2153 blksize
= VTOVCB(vp
)->blockSize
;
2154 fileblocks
= fp
->ff_blocks
;
2155 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2157 // have to loop truncating or growing files that are
2158 // really big because otherwise transactions can get
2159 // enormous and consume too many kernel resources.
2161 if (length
< filebytes
) {
2162 while (filebytes
> length
) {
2163 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
2164 filebytes
-= HFS_BIGFILE_SIZE
;
2168 cp
->c_flag
|= C_FORCEUPDATE
;
2169 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2173 } else if (length
> filebytes
) {
2174 while (filebytes
< length
) {
2175 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
2176 filebytes
+= HFS_BIGFILE_SIZE
;
2180 cp
->c_flag
|= C_FORCEUPDATE
;
2181 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2185 } else /* Same logical size */ {
2187 error
= do_hfs_truncate(vp
, length
, flags
, skipsetsize
, context
);
2189 /* Files that are changing size are not hot file candidates. */
2190 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2191 fp
->ff_bytesread
= 0;
2200 * Preallocate file storage space.
2203 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2207 off_t *a_bytesallocated;
2209 vfs_context_t a_context;
2212 struct vnode
*vp
= ap
->a_vp
;
2214 struct filefork
*fp
;
2216 off_t length
= ap
->a_length
;
2218 off_t moreBytesRequested
;
2219 off_t actualBytesAdded
;
2222 int retval
, retval2
;
2224 UInt32 extendFlags
; /* For call to ExtendFileC */
2225 struct hfsmount
*hfsmp
;
2226 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2229 *(ap
->a_bytesallocated
) = 0;
2231 if (!vnode_isreg(vp
))
2233 if (length
< (off_t
)0)
2236 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
)))
2243 fileblocks
= fp
->ff_blocks
;
2244 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2246 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2251 /* Fill in the flags word for the call to Extend the file */
2253 extendFlags
= kEFNoClumpMask
;
2254 if (ap
->a_flags
& ALLOCATECONTIG
)
2255 extendFlags
|= kEFContigMask
;
2256 if (ap
->a_flags
& ALLOCATEALL
)
2257 extendFlags
|= kEFAllMask
;
2258 if (cred
&& suser(cred
, NULL
) != 0)
2259 extendFlags
|= kEFReserveMask
;
2263 startingPEOF
= filebytes
;
2265 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2266 length
+= filebytes
;
2267 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2268 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2270 /* If no changes are necesary, then we're done */
2271 if (filebytes
== length
)
2275 * Lengthen the size of the file. We must ensure that the
2276 * last byte of the file is allocated. Since the smallest
2277 * value of filebytes is 0, length will be at least 1.
2279 if (length
> filebytes
) {
2280 moreBytesRequested
= length
- filebytes
;
2283 retval
= hfs_chkdq(cp
,
2284 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2291 * Metadata zone checks.
2293 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2295 * Allocate Journal and Quota files in metadata zone.
2297 if (hfs_virtualmetafile(cp
)) {
2298 extendFlags
|= kEFMetadataMask
;
2299 blockHint
= hfsmp
->hfs_metazone_start
;
2300 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2301 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2303 * Move blockHint outside metadata zone.
2305 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2309 if (hfs_start_transaction(hfsmp
) != 0) {
2314 /* Protect extents b-tree and allocation bitmap */
2315 lockflags
= SFL_BITMAP
;
2316 if (overflow_extents(fp
))
2317 lockflags
|= SFL_EXTENTS
;
2318 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2320 retval
= MacToVFSError(ExtendFileC(vcb
,
2325 &actualBytesAdded
));
2327 *(ap
->a_bytesallocated
) = actualBytesAdded
;
2328 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2330 hfs_systemfile_unlock(hfsmp
, lockflags
);
2333 (void) hfs_update(vp
, TRUE
);
2334 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2337 hfs_end_transaction(hfsmp
);
2340 * if we get an error and no changes were made then exit
2341 * otherwise we must do the hfs_update to reflect the changes
2343 if (retval
&& (startingPEOF
== filebytes
))
2347 * Adjust actualBytesAdded to be allocation block aligned, not
2348 * clump size aligned.
2349 * NOTE: So what we are reporting does not affect reality
2350 * until the file is closed, when we truncate the file to allocation
2353 if ((actualBytesAdded
!= 0) && (moreBytesRequested
< actualBytesAdded
))
2354 *(ap
->a_bytesallocated
) =
2355 roundup(moreBytesRequested
, (off_t
)vcb
->blockSize
);
2357 } else { /* Shorten the size of the file */
2359 if (fp
->ff_size
> length
) {
2361 * Any buffers that are past the truncation point need to be
2362 * invalidated (to maintain buffer cache consistency).
2366 if (hfs_start_transaction(hfsmp
) != 0) {
2371 /* Protect extents b-tree and allocation bitmap */
2372 lockflags
= SFL_BITMAP
;
2373 if (overflow_extents(fp
))
2374 lockflags
|= SFL_EXTENTS
;
2375 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2377 retval
= MacToVFSError(TruncateFileC(vcb
, (FCB
*)fp
, length
, false));
2379 hfs_systemfile_unlock(hfsmp
, lockflags
);
2381 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2384 (void) hfs_update(vp
, TRUE
);
2385 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2388 hfs_end_transaction(hfsmp
);
2392 * if we get an error and no changes were made then exit
2393 * otherwise we must do the hfs_update to reflect the changes
2395 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2397 /* These are bytesreleased */
2398 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2401 if (fp
->ff_size
> filebytes
) {
2402 fp
->ff_size
= filebytes
;
2405 ubc_setsize(vp
, fp
->ff_size
);
2406 hfs_lock(cp
, HFS_FORCE_LOCK
);
2411 cp
->c_touch_chgtime
= TRUE
;
2412 cp
->c_touch_modtime
= TRUE
;
2413 retval2
= hfs_update(vp
, MNT_WAIT
);
2424 * Pagein for HFS filesystem
2427 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2429 struct vnop_pagein_args {
2432 vm_offset_t a_pl_offset,
2436 vfs_context_t a_context;
2440 vnode_t vp
= ap
->a_vp
;
2443 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2444 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2446 * Keep track of blocks read.
2448 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2450 struct filefork
*fp
;
2452 int took_cnode_lock
= 0;
2457 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2458 bytesread
= fp
->ff_size
;
2460 bytesread
= ap
->a_size
;
2462 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2463 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
2464 hfs_lock(cp
, HFS_FORCE_LOCK
);
2465 took_cnode_lock
= 1;
2468 * If this file hasn't been seen since the start of
2469 * the current sampling period then start over.
2471 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2474 fp
->ff_bytesread
= bytesread
;
2476 cp
->c_atime
= tv
.tv_sec
;
2478 fp
->ff_bytesread
+= bytesread
;
2480 cp
->c_touch_acctime
= TRUE
;
2481 if (took_cnode_lock
)
2488 * Pageout for HFS filesystem.
2491 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2493 struct vnop_pageout_args {
2496 vm_offset_t a_pl_offset,
2500 vfs_context_t a_context;
2504 vnode_t vp
= ap
->a_vp
;
2506 struct filefork
*fp
;
2512 if (cp
->c_lockowner
== current_thread()) {
2513 panic("pageout: %s cnode lock already held!\n",
2514 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
2516 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2517 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2518 ubc_upl_abort_range(ap
->a_pl
,
2521 UPL_ABORT_FREE_ON_EMPTY
);
2527 filesize
= fp
->ff_size
;
2528 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2530 if (end_of_range
>= filesize
) {
2531 end_of_range
= (off_t
)(filesize
- 1);
2533 if (ap
->a_f_offset
< filesize
) {
2534 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2535 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2539 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2540 ap
->a_size
, filesize
, ap
->a_flags
);
2543 * If data was written, and setuid or setgid bits are set and
2544 * this process is not the superuser then clear the setuid and
2545 * setgid bits as a precaution against tampering.
2547 if ((retval
== 0) &&
2548 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2549 (vfs_context_suser(ap
->a_context
) != 0)) {
2550 hfs_lock(cp
, HFS_FORCE_LOCK
);
2551 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2552 cp
->c_touch_chgtime
= TRUE
;
2559 * Intercept B-Tree node writes to unswap them if necessary.
2562 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
2565 register struct buf
*bp
= ap
->a_bp
;
2566 register struct vnode
*vp
= buf_vnode(bp
);
2567 BlockDescriptor block
;
2569 /* Trap B-Tree writes */
2570 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
2571 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
2572 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
2573 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
2576 * Swap and validate the node if it is in native byte order.
2577 * This is always be true on big endian, so we always validate
2578 * before writing here. On little endian, the node typically has
2579 * been swapped and validatated when it was written to the journal,
2580 * so we won't do anything here.
2582 if (((UInt16
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
2583 /* Prepare the block pointer */
2584 block
.blockHeader
= bp
;
2585 block
.buffer
= (char *)buf_dataptr(bp
);
2586 block
.blockNum
= buf_lblkno(bp
);
2587 /* not found in cache ==> came from disk */
2588 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
2589 block
.blockSize
= buf_count(bp
);
2591 /* Endian un-swap B-Tree node */
2592 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
2594 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2598 /* This buffer shouldn't be locked anymore but if it is clear it */
2599 if ((buf_flags(bp
) & B_LOCKED
)) {
2601 if (VTOHFS(vp
)->jnl
) {
2602 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp
);
2604 buf_clearflags(bp
, B_LOCKED
);
2606 retval
= vn_bwrite (ap
);
2612 * Relocate a file to a new location on disk
2613 * cnode must be locked on entry
2615 * Relocation occurs by cloning the file's data from its
2616 * current set of blocks to a new set of blocks. During
2617 * the relocation all of the blocks (old and new) are
2618 * owned by the file.
2625 * ----------------- -----------------
2626 * |///////////////| | | STEP 1 (aquire new blocks)
2627 * ----------------- -----------------
2630 * ----------------- -----------------
2631 * |///////////////| |///////////////| STEP 2 (clone data)
2632 * ----------------- -----------------
2636 * |///////////////| STEP 3 (head truncate blocks)
2640 * During steps 2 and 3 page-outs to file offsets less
2641 * than or equal to N are suspended.
2643 * During step 3 page-ins to the file get supended.
2647 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
2651 struct filefork
*fp
;
2652 struct hfsmount
*hfsmp
;
2657 u_int32_t nextallocsave
;
2658 daddr64_t sector_a
, sector_b
;
2659 int disabled_caching
= 0;
2664 int took_trunc_lock
= 0;
2666 enum vtype vnodetype
;
2668 vnodetype
= vnode_vtype(vp
);
2669 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
2674 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
2680 if (fp
->ff_unallocblocks
)
2682 blksize
= hfsmp
->blockSize
;
2684 blockHint
= hfsmp
->nextAllocation
;
2686 if ((fp
->ff_size
> (u_int64_t
)0x7fffffff) ||
2687 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
2692 // We do not believe that this call to hfs_fsync() is
2693 // necessary and it causes a journal transaction
2694 // deadlock so we are removing it.
2696 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2697 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2702 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
2704 hfs_lock_truncate(cp
, TRUE
);
2705 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
2706 hfs_unlock_truncate(cp
);
2709 took_trunc_lock
= 1;
2711 headblks
= fp
->ff_blocks
;
2712 datablks
= howmany(fp
->ff_size
, blksize
);
2713 growsize
= datablks
* blksize
;
2714 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
2715 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
2716 blockHint
<= hfsmp
->hfs_metazone_end
)
2717 eflags
|= kEFMetadataMask
;
2719 if (hfs_start_transaction(hfsmp
) != 0) {
2720 if (took_trunc_lock
)
2721 hfs_unlock_truncate(cp
);
2726 * Protect the extents b-tree and the allocation bitmap
2727 * during MapFileBlockC and ExtendFileC operations.
2729 lockflags
= SFL_BITMAP
;
2730 if (overflow_extents(fp
))
2731 lockflags
|= SFL_EXTENTS
;
2732 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2734 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
2736 retval
= MacToVFSError(retval
);
2741 * STEP 1 - aquire new allocation blocks.
2743 if (!vnode_isnocache(vp
)) {
2744 vnode_setnocache(vp
);
2745 disabled_caching
= 1;
2748 nextallocsave
= hfsmp
->nextAllocation
;
2749 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
2750 if (eflags
& kEFMetadataMask
) {
2751 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2752 hfsmp
->nextAllocation
= nextallocsave
;
2753 hfsmp
->vcbFlags
|= 0xFF00;
2754 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2757 retval
= MacToVFSError(retval
);
2759 cp
->c_flag
|= C_MODIFIED
;
2760 if (newbytes
< growsize
) {
2763 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
2764 printf("hfs_relocate: allocation failed");
2769 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
2771 retval
= MacToVFSError(retval
);
2772 } else if ((sector_a
+ 1) == sector_b
) {
2775 } else if ((eflags
& kEFMetadataMask
) &&
2776 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
2777 hfsmp
->hfs_metazone_end
)) {
2778 printf("hfs_relocate: didn't move into metadata zone\n");
2783 /* Done with system locks and journal for now. */
2784 hfs_systemfile_unlock(hfsmp
, lockflags
);
2786 hfs_end_transaction(hfsmp
);
2791 * Check to see if failure is due to excessive fragmentation.
2793 if ((retval
== ENOSPC
) &&
2794 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
2795 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
2800 * STEP 2 - clone file data into the new allocation blocks.
2803 if (vnodetype
== VLNK
)
2804 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
2805 else if (vnode_issystem(vp
))
2806 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
2808 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
2810 /* Start transaction for step 3 or for a restore. */
2811 if (hfs_start_transaction(hfsmp
) != 0) {
2820 * STEP 3 - switch to cloned data and remove old blocks.
2822 lockflags
= SFL_BITMAP
;
2823 if (overflow_extents(fp
))
2824 lockflags
|= SFL_EXTENTS
;
2825 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2827 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
2829 hfs_systemfile_unlock(hfsmp
, lockflags
);
2834 if (took_trunc_lock
)
2835 hfs_unlock_truncate(cp
);
2838 hfs_systemfile_unlock(hfsmp
, lockflags
);
2842 /* Push cnode's new extent data to disk. */
2844 (void) hfs_update(vp
, MNT_WAIT
);
2848 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
2849 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
2851 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
2854 if (disabled_caching
) {
2855 vnode_clearnocache(vp
);
2858 hfs_end_transaction(hfsmp
);
2863 if (fp
->ff_blocks
== headblks
)
2866 * Give back any newly allocated space.
2868 if (lockflags
== 0) {
2869 lockflags
= SFL_BITMAP
;
2870 if (overflow_extents(fp
))
2871 lockflags
|= SFL_EXTENTS
;
2872 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2875 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
2877 hfs_systemfile_unlock(hfsmp
, lockflags
);
2880 if (took_trunc_lock
)
2881 hfs_unlock_truncate(cp
);
2891 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, struct proc
*p
)
2893 struct buf
*head_bp
= NULL
;
2894 struct buf
*tail_bp
= NULL
;
2898 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
2902 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
2903 if (tail_bp
== NULL
) {
2907 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
2908 error
= (int)buf_bwrite(tail_bp
);
2911 buf_markinvalid(head_bp
);
2912 buf_brelse(head_bp
);
2914 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
2920 * Clone a file's data within the file.
2924 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
2936 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
2937 writebase
= blkstart
* blksize
;
2938 copysize
= blkcnt
* blksize
;
2939 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
2942 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
2945 hfs_unlock(VTOC(vp
));
2947 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
2949 while (offset
< copysize
) {
2950 iosize
= MIN(copysize
- offset
, iosize
);
2952 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
2953 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2955 error
= cluster_read(vp
, auio
, copysize
, 0);
2957 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
2960 if (uio_resid(auio
) != 0) {
2961 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
2966 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
2967 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2969 error
= cluster_write(vp
, auio
, filesize
+ offset
,
2970 filesize
+ offset
+ iosize
,
2971 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
2973 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
2976 if (uio_resid(auio
) != 0) {
2977 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2986 * No need to call ubc_sync_range or hfs_invalbuf
2987 * since the file was copied using IO_NOCACHE.
2990 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
2992 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2997 * Clone a system (metadata) file.
3001 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3002 kauth_cred_t cred
, struct proc
*p
)
3008 struct buf
*bp
= NULL
;
3011 daddr64_t start_blk
;
3018 iosize
= GetLogicalBlockSize(vp
);
3019 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3020 breadcnt
= bufsize
/ iosize
;
3022 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3025 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3026 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3029 while (blkno
< last_blk
) {
3031 * Read up to a megabyte
3034 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3035 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3037 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3040 if (buf_count(bp
) != iosize
) {
3041 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3044 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3046 buf_markinvalid(bp
);
3054 * Write up to a megabyte
3057 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3058 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3060 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3064 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3065 error
= (int)buf_bwrite(bp
);
3077 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3079 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);