2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* @(#)hfs_readwrite.c 1.0
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
42 #include <sys/vfs_context.h>
44 #include <miscfs/specfs/specdev.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
50 #include <sys/kdebug.h>
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
61 extern int overflow_extents(struct filefork
*fp
);
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
66 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
69 extern u_int32_t
GetLogicalBlockSize(struct vnode
*vp
);
71 extern int hfs_setextendedsecurity(struct hfsmount
*, int);
74 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
75 static int hfs_clonefile(struct vnode
*, int, int, int);
76 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
79 /*****************************************************************************
81 * I/O Operations on vnodes
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args
*);
85 int hfs_vnop_write(struct vnop_write_args
*);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args
*);
87 int hfs_vnop_select(struct vnop_select_args
*);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args
*);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args
*);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args
*);
91 int hfs_vnop_strategy(struct vnop_strategy_args
*);
92 int hfs_vnop_allocate(struct vnop_allocate_args
*);
93 int hfs_vnop_pagein(struct vnop_pagein_args
*);
94 int hfs_vnop_pageout(struct vnop_pageout_args
*);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args
*);
99 * Read data from a file.
102 hfs_vnop_read(struct vnop_read_args
*ap
)
104 uio_t uio
= ap
->a_uio
;
105 struct vnode
*vp
= ap
->a_vp
;
108 struct hfsmount
*hfsmp
;
111 off_t start_resid
= uio_resid(uio
);
112 off_t offset
= uio_offset(uio
);
116 /* Preflight checks */
117 if (!vnode_isreg(vp
)) {
118 /* can only read regular files */
124 if (start_resid
== 0)
125 return (0); /* Nothing left to do */
127 return (EINVAL
); /* cant read from a negative offset */
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp
, 0);
136 filesize
= fp
->ff_size
;
137 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
138 if (offset
> filesize
) {
139 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
140 (offset
> (off_t
)MAXHFSFILESIZE
)) {
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
147 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
149 retval
= cluster_read(vp
, uio
, filesize
, 0);
151 cp
->c_touch_acctime
= TRUE
;
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
154 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
157 * Keep track blocks read
159 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
160 int took_cnode_lock
= 0;
163 bytesread
= start_resid
- uio_resid(uio
);
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
167 hfs_lock(cp
, HFS_FORCE_LOCK
);
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
174 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
177 fp
->ff_bytesread
= bytesread
;
179 cp
->c_atime
= tv
.tv_sec
;
181 fp
->ff_bytesread
+= bytesread
;
187 hfs_unlock_truncate(cp
);
192 * Write data to a file.
195 hfs_vnop_write(struct vnop_write_args
*ap
)
197 uio_t uio
= ap
->a_uio
;
198 struct vnode
*vp
= ap
->a_vp
;
201 struct hfsmount
*hfsmp
;
202 kauth_cred_t cred
= NULL
;
206 off_t actualBytesAdded
;
211 int ioflag
= ap
->a_ioflag
;
214 int cnode_locked
= 0;
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid
= uio_resid(uio
);
218 offset
= uio_offset(uio
);
224 if (!vnode_isreg(vp
))
225 return (EPERM
); /* Can only write regular files */
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp
), TRUE
);
230 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
231 hfs_unlock_truncate(VTOC(vp
));
238 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
240 if (ioflag
& IO_APPEND
) {
241 uio_setoffset(uio
, fp
->ff_size
);
242 offset
= fp
->ff_size
;
244 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
249 origFileSize
= fp
->ff_size
;
250 eflags
= kEFDeferMask
; /* defer file block allocations */
252 #ifdef HFS_SPARSE_DEV
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
258 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
259 (hfs_freeblks(hfsmp
, 0) < 2048)) {
260 eflags
&= ~kEFDeferMask
;
263 #endif /* HFS_SPARSE_DEV */
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
266 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
271 writelimit
= offset
+ resid
;
272 if (writelimit
<= filebytes
)
275 cred
= vfs_context_ucred(ap
->a_context
);
277 bytesToAdd
= writelimit
- filebytes
;
278 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
284 if (hfs_start_transaction(hfsmp
) != 0) {
289 while (writelimit
> filebytes
) {
290 bytesToAdd
= writelimit
- filebytes
;
291 if (cred
&& suser(cred
, NULL
) != 0)
292 eflags
|= kEFReserveMask
;
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags
= SFL_BITMAP
;
296 if (overflow_extents(fp
))
297 lockflags
|= SFL_EXTENTS
;
298 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
302 fp
->ff_bytesread
= 0;
304 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
305 0, eflags
, &actualBytesAdded
));
307 hfs_systemfile_unlock(hfsmp
, lockflags
);
309 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
311 if (retval
!= E_NONE
)
313 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
315 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
317 (void) hfs_update(vp
, TRUE
);
318 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
319 (void) hfs_end_transaction(hfsmp
);
322 if (retval
== E_NONE
) {
330 struct rl_entry
*invalid_range
;
332 if (writelimit
> fp
->ff_size
)
333 filesize
= writelimit
;
335 filesize
= fp
->ff_size
;
337 lflag
= (ioflag
& IO_SYNC
);
339 if (offset
<= fp
->ff_size
) {
340 zero_off
= offset
& ~PAGE_MASK_64
;
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
346 if (offset
> zero_off
) {
347 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
348 lflag
|= IO_HEADZEROFILL
;
351 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
365 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
366 inval_end
= offset
& ~PAGE_MASK_64
;
367 zero_off
= fp
->ff_size
;
369 if ((fp
->ff_size
& PAGE_MASK_64
) &&
370 (rl_scan(&fp
->ff_invalidranges
,
373 &invalid_range
) != RL_NOOVERLAP
)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
382 if (inval_end
> eof_page_base
) {
383 inval_start
= eof_page_base
;
385 zero_off
= eof_page_base
;
389 if (inval_start
< inval_end
) {
391 /* There's some range of data that's going to be marked invalid */
393 if (zero_off
< inval_start
) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
400 retval
= cluster_write(vp
, (uio_t
) 0,
401 fp
->ff_size
, inval_start
,
403 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
404 hfs_lock(cp
, HFS_FORCE_LOCK
);
406 if (retval
) goto ioerr_exit
;
407 offset
= uio_offset(uio
);
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
413 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
414 zero_off
= fp
->ff_size
= inval_end
;
417 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
423 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
424 if (tail_off
> filesize
) tail_off
= filesize
;
425 if (tail_off
> writelimit
) {
426 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
427 lflag
|= IO_TAILZEROFILL
;
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
441 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
442 if (io_start
< fp
->ff_size
) {
445 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
446 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
451 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
452 tail_off
, lflag
| IO_NOZERODIRTY
);
453 offset
= uio_offset(uio
);
454 if (offset
> fp
->ff_size
) {
455 fp
->ff_size
= offset
;
457 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
460 fp
->ff_bytesread
= 0;
462 if (resid
> uio_resid(uio
)) {
463 cp
->c_touch_chgtime
= TRUE
;
464 cp
->c_touch_modtime
= TRUE
;
467 HFS_KNOTE(vp
, NOTE_WRITE
);
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
475 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
476 cred
= vfs_context_ucred(ap
->a_context
);
477 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
479 hfs_lock(cp
, HFS_FORCE_LOCK
);
482 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
486 if (ioflag
& IO_UNIT
) {
488 hfs_lock(cp
, HFS_FORCE_LOCK
);
491 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
495 uio_setresid(uio
, resid
);
496 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
498 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
500 hfs_lock(cp
, HFS_FORCE_LOCK
);
503 retval
= hfs_update(vp
, TRUE
);
505 /* Updating vcbWrCnt doesn't need to be atomic. */
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
509 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
513 hfs_unlock_truncate(cp
);
517 /* support for the "bulk-access" fcntl */
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid
, gid_t obj_gid
,
525 mode_t obj_mode
, struct mount
*mp
,
526 kauth_cred_t cred
, struct proc
*p
);
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff
);
532 struct access_cache
{
534 int cachehits
; /* these two for statistics gathering */
536 unsigned int *acache
;
541 uid_t uid
; /* IN: effective user id */
542 short flags
; /* IN: access requested (i.e. R_OK) */
543 short num_groups
; /* IN: number of groups user belongs to */
544 int num_files
; /* IN: number of files to process */
545 int *file_ids
; /* IN: array of file ids */
546 gid_t
*groups
; /* IN: array of groups */
547 short *access
; /* OUT: access info for each file (0 for 'has access') */
550 struct user_access_t
{
551 uid_t uid
; /* IN: effective user id */
552 short flags
; /* IN: access requested (i.e. R_OK) */
553 short num_groups
; /* IN: number of groups user belongs to */
554 int num_files
; /* IN: number of files to process */
555 user_addr_t file_ids
; /* IN: array of file ids */
556 user_addr_t groups
; /* IN: array of groups */
557 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
566 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
569 int index
, matches
= 0;
571 if (cache
->numcached
== 0) {
573 return 0; // table is empty, so insert at index=0 and report no match
576 if (cache
->numcached
> CACHE_ELEMS
) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache
->numcached
= CACHE_ELEMS
;
583 hi
= cache
->numcached
- 1;
586 /* perform binary search for parent_id */
588 unsigned int mid
= (hi
- lo
)/2 + lo
;
589 unsigned int this_id
= cache
->acache
[mid
];
591 if (parent_id
== this_id
) {
596 if (parent_id
< this_id
) {
601 if (parent_id
> this_id
) {
607 /* check if lo and hi converged on the match */
608 if (parent_id
== cache
->acache
[hi
]) {
612 /* if no existing entry found, find index for new one */
614 index
= (parent_id
< cache
->acache
[hi
]) ? hi
: hi
+ 1;
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
630 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
632 int lookup_index
= -1;
634 /* need to do a lookup first if -1 passed for index */
636 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
637 if (cache
->haveaccess
[lookup_index
] != access
) {
638 /* change access info for existing entry... should never happen */
639 cache
->haveaccess
[lookup_index
] = access
;
642 /* mission accomplished */
645 index
= lookup_index
;
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache
->numcached
>= CACHE_ELEMS
) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache
->numcached
= CACHE_ELEMS
-1;
655 if (index
> cache
->numcached
) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index
= cache
->numcached
;
659 } else if (index
>= 0 && index
< cache
->numcached
) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
662 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(Boolean
) );
665 cache
->acache
[index
] = nodeID
;
666 cache
->haveaccess
[index
] = access
;
679 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
681 struct cinfo
*cip
= (struct cinfo
*)arg
;
683 cip
->uid
= attrp
->ca_uid
;
684 cip
->gid
= attrp
->ca_gid
;
685 cip
->mode
= attrp
->ca_mode
;
686 cip
->parentcnid
= descp
->cd_parentcnid
;
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
696 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
697 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
, struct proc
*p
)
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid
== skip_cp
->c_cnid
) {
703 cnattrp
->ca_uid
= skip_cp
->c_uid
;
704 cnattrp
->ca_gid
= skip_cp
->c_gid
;
705 cnattrp
->ca_mode
= skip_cp
->c_mode
;
706 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
712 cnattrp
->ca_uid
= c_info
.uid
;
713 cnattrp
->ca_gid
= c_info
.gid
;
714 cnattrp
->ca_mode
= c_info
.mode
;
715 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
719 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
721 /* lookup this cnid in the catalog */
722 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
724 hfs_systemfile_unlock(hfsmp
, lockflags
);
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
738 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
739 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
)
743 HFSCatalogNodeID thisNodeID
;
744 unsigned long myPerms
;
745 struct cat_attr cnattr
;
746 int cache_index
= -1;
749 int i
= 0, ids_to_cache
= 0;
750 int parent_ids
[CACHE_LEVELS
];
752 /* root always has access */
753 if (!suser(myp_ucred
, NULL
)) {
758 while (thisNodeID
>= kRootDirID
) {
759 myResult
= 0; /* default to "no access" */
761 /* check the cache before resorting to hitting the catalog */
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
766 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
768 myResult
= cache
->haveaccess
[cache_index
];
769 goto ExitThisRoutine
;
772 /* remember which parents we want to cache */
773 if (ids_to_cache
< CACHE_LEVELS
) {
774 parent_ids
[ids_to_cache
] = thisNodeID
;
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
, theProcPtr
);
781 goto ExitThisRoutine
; /* no access */
784 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
785 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
786 myp_ucred
, theProcPtr
);
788 if ( (myPerms
& X_OK
) == 0 ) {
790 goto ExitThisRoutine
; /* no access */
793 /* up the hierarchy we go */
794 thisNodeID
= catkey
.hfsPlus
.parentID
;
797 /* if here, we have access to this node */
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
807 /* cache the parent directory(ies) */
808 for (i
= 0; i
< ids_to_cache
; i
++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache
, -1, parent_ids
[i
], myResult
);
816 /* end "bulk-access" support */
821 * Callback for use with freeze ioctl.
824 hfs_freezewrite_callback(struct vnode
*vp
, void *cargs
)
826 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
832 * Control filesystem operating characteristics.
835 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
840 vfs_context_t a_context;
843 struct vnode
* vp
= ap
->a_vp
;
844 struct hfsmount
*hfsmp
= VTOHFS(vp
);
845 vfs_context_t context
= ap
->a_context
;
846 kauth_cred_t cred
= vfs_context_ucred(context
);
847 proc_t p
= vfs_context_proc(context
);
848 struct vfsstatfs
*vfsp
;
851 is64bit
= proc_is64bit(p
);
853 switch (ap
->a_command
) {
855 case HFS_RESIZE_PROGRESS
: {
857 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
858 if (suser(cred
, NULL
) &&
859 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
860 return (EACCES
); /* must be owner of file system */
862 if (!vnode_isvroot(vp
)) {
865 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
867 case HFS_RESIZE_VOLUME
: {
871 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
872 if (suser(cred
, NULL
) &&
873 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
874 return (EACCES
); /* must be owner of file system */
876 if (!vnode_isvroot(vp
)) {
879 newsize
= *(u_int64_t
*)ap
->a_data
;
880 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
882 if (newsize
> cursize
) {
883 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
884 } else if (newsize
< cursize
) {
885 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
890 case HFS_CHANGE_NEXT_ALLOCATION
: {
893 if (vnode_vfsisrdonly(vp
)) {
896 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
897 if (suser(cred
, NULL
) &&
898 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
899 return (EACCES
); /* must be owner of file system */
901 if (!vnode_isvroot(vp
)) {
904 location
= *(u_int32_t
*)ap
->a_data
;
905 if (location
> hfsmp
->totalBlocks
- 1) {
908 /* Return previous value. */
909 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
910 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
911 hfsmp
->nextAllocation
= location
;
912 hfsmp
->vcbFlags
|= 0xFF00;
913 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
917 #ifdef HFS_SPARSE_DEV
918 case HFS_SETBACKINGSTOREINFO
: {
919 struct vnode
* bsfs_rootvp
;
920 struct vnode
* di_vp
;
921 struct hfs_backingstoreinfo
*bsdata
;
924 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
927 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
928 if (suser(cred
, NULL
) &&
929 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
930 return (EACCES
); /* must be owner of file system */
932 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
933 if (bsdata
== NULL
) {
936 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
939 if ((error
= vnode_getwithref(di_vp
))) {
940 file_drop(bsdata
->backingfd
);
944 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
945 (void)vnode_put(di_vp
);
946 file_drop(bsdata
->backingfd
);
951 * Obtain the backing fs root vnode and keep a reference
952 * on it. This reference will be dropped in hfs_unmount.
954 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
956 (void)vnode_put(di_vp
);
957 file_drop(bsdata
->backingfd
);
960 vnode_ref(bsfs_rootvp
);
961 vnode_put(bsfs_rootvp
);
963 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
964 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
965 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
966 hfsmp
->hfs_sparsebandblks
*= 4;
968 (void)vnode_put(di_vp
);
969 file_drop(bsdata
->backingfd
);
972 case HFS_CLRBACKINGSTOREINFO
: {
973 struct vnode
* tmpvp
;
975 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
976 if (suser(cred
, NULL
) &&
977 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
978 return (EACCES
); /* must be owner of file system */
980 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
981 hfsmp
->hfs_backingfs_rootvp
) {
983 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
984 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
985 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
986 hfsmp
->hfs_sparsebandblks
= 0;
991 #endif /* HFS_SPARSE_DEV */
1000 mp
= vnode_mount(vp
);
1001 hfsmp
= VFSTOHFS(mp
);
1006 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1008 task
= current_task();
1009 task_working_set_disable(task
);
1011 // flush things before we get started to try and prevent
1012 // dirty data from being paged out while we're frozen.
1013 // note: can't do this after taking the lock as it will
1014 // deadlock against ourselves.
1015 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1016 hfs_global_exclusive_lock_acquire(hfsmp
);
1017 journal_flush(hfsmp
->jnl
);
1019 // don't need to iterate on all vnodes, we just need to
1020 // wait for writes to the system files and the device vnode
1021 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1022 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1023 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1024 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1025 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1026 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1027 if (hfsmp
->hfs_attribute_vp
)
1028 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1029 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1031 hfsmp
->hfs_freezing_proc
= current_proc();
1040 // if we're not the one who froze the fs then we
1042 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1046 // NOTE: if you add code here, also go check the
1047 // code that "thaws" the fs in hfs_vnop_close()
1049 hfsmp
->hfs_freezing_proc
= NULL
;
1050 hfs_global_exclusive_lock_release(hfsmp
);
1051 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1056 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1057 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1059 case HFS_BULKACCESS_FSCTL
:
1060 case HFS_BULKACCESS
: {
1062 * NOTE: on entry, the vnode is locked. Incase this vnode
1063 * happens to be in our list of file_ids, we'll note it
1064 * avoid calling hfs_chashget_nowait() on that id as that
1065 * will cause a "locking against myself" panic.
1067 Boolean check_leaf
= true;
1069 struct user_access_t
*user_access_structp
;
1070 struct user_access_t tmp_user_access_t
;
1071 struct access_cache cache
;
1075 dev_t dev
= VTOC(vp
)->c_dev
;
1078 struct ucred myucred
; /* XXX ILLEGAL */
1080 int *file_ids
= NULL
;
1081 short *access
= NULL
;
1084 cnid_t prevParent_cnid
= 0;
1085 unsigned long myPerms
;
1087 struct cat_attr cnattr
;
1089 struct cnode
*skip_cp
= VTOC(vp
);
1090 struct vfs_context my_context
;
1092 /* first, return error if not run as root */
1093 if (cred
->cr_ruid
!= 0) {
1097 /* initialize the local cache and buffers */
1098 cache
.numcached
= 0;
1099 cache
.cachehits
= 0;
1102 file_ids
= (int *) get_pathbuff();
1103 access
= (short *) get_pathbuff();
1104 cache
.acache
= (int *) get_pathbuff();
1105 cache
.haveaccess
= (Boolean
*) get_pathbuff();
1107 if (file_ids
== NULL
|| access
== NULL
|| cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1108 release_pathbuff((char *) file_ids
);
1109 release_pathbuff((char *) access
);
1110 release_pathbuff((char *) cache
.acache
);
1111 release_pathbuff((char *) cache
.haveaccess
);
1116 /* struct copyin done during dispatch... need to copy file_id array separately */
1117 if (ap
->a_data
== NULL
) {
1119 goto err_exit_bulk_access
;
1123 user_access_structp
= (struct user_access_t
*)ap
->a_data
;
1126 struct access_t
* accessp
= (struct access_t
*)ap
->a_data
;
1127 tmp_user_access_t
.uid
= accessp
->uid
;
1128 tmp_user_access_t
.flags
= accessp
->flags
;
1129 tmp_user_access_t
.num_groups
= accessp
->num_groups
;
1130 tmp_user_access_t
.num_files
= accessp
->num_files
;
1131 tmp_user_access_t
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1132 tmp_user_access_t
.groups
= CAST_USER_ADDR_T(accessp
->groups
);
1133 tmp_user_access_t
.access
= CAST_USER_ADDR_T(accessp
->access
);
1134 user_access_structp
= &tmp_user_access_t
;
1137 num_files
= user_access_structp
->num_files
;
1138 if (num_files
< 1) {
1139 goto err_exit_bulk_access
;
1141 if (num_files
> 256) {
1143 goto err_exit_bulk_access
;
1146 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1147 num_files
* sizeof(int)))) {
1148 goto err_exit_bulk_access
;
1151 /* fill in the ucred structure */
1152 flags
= user_access_structp
->flags
;
1153 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1157 /* check if we've been passed leaf node ids or parent ids */
1158 if (flags
& PARENT_IDS_FLAG
) {
1162 memset(&myucred
, 0, sizeof(myucred
));
1164 myucred
.cr_uid
= myucred
.cr_ruid
= myucred
.cr_svuid
= user_access_structp
->uid
;
1165 myucred
.cr_ngroups
= user_access_structp
->num_groups
;
1166 if (myucred
.cr_ngroups
< 1 || myucred
.cr_ngroups
> 16) {
1167 myucred
.cr_ngroups
= 0;
1168 } else if ((error
= copyin(user_access_structp
->groups
, (caddr_t
)myucred
.cr_groups
,
1169 myucred
.cr_ngroups
* sizeof(gid_t
)))) {
1170 goto err_exit_bulk_access
;
1172 myucred
.cr_rgid
= myucred
.cr_svgid
= myucred
.cr_groups
[0];
1173 myucred
.cr_gmuid
= myucred
.cr_uid
;
1175 my_context
.vc_proc
= p
;
1176 my_context
.vc_ucred
= &myucred
;
1178 /* Check access to each file_id passed in */
1179 for (i
= 0; i
< num_files
; i
++) {
1181 cnid
= (cnid_t
) file_ids
[i
];
1183 /* root always has access */
1184 if (!suser(&myucred
, NULL
)) {
1191 /* do the lookup (checks the cnode hash, then the catalog) */
1192 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
, p
);
1194 access
[i
] = (short) error
;
1198 /* before calling CheckAccess(), check the target file for read access */
1199 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1200 cnattr
.ca_mode
, hfsmp
->hfs_mp
, &myucred
, p
);
1203 /* fail fast if no access */
1204 if ((myPerms
& flags
) == 0) {
1209 /* we were passed an array of parent ids */
1210 catkey
.hfsPlus
.parentID
= cnid
;
1213 /* if the last guy had the same parent and had access, we're done */
1214 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1220 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1221 skip_cp
, p
, &myucred
, dev
);
1224 access
[i
] = 0; // have access.. no errors to report
1226 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1229 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1233 cnid
= (cnid_t
)file_ids
[i
];
1235 while (cnid
>= kRootDirID
) {
1236 /* get the vnode for this cnid */
1237 myErr
= hfs_vget(hfsmp
, cnid
, &vp
, 0);
1243 cnid
= VTOC(vp
)->c_parentcnid
;
1245 hfs_unlock(VTOC(vp
));
1246 if (vnode_vtype(vp
) == VDIR
) {
1247 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), &my_context
);
1249 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, &my_context
);
1260 /* copyout the access array */
1261 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1262 num_files
* sizeof (short)))) {
1263 goto err_exit_bulk_access
;
1266 err_exit_bulk_access
:
1268 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1270 release_pathbuff((char *) cache
.acache
);
1271 release_pathbuff((char *) cache
.haveaccess
);
1272 release_pathbuff((char *) file_ids
);
1273 release_pathbuff((char *) access
);
1276 } /* HFS_BULKACCESS */
1278 case HFS_SETACLSTATE
: {
1281 if (ap
->a_data
== NULL
) {
1285 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1286 state
= *(int *)ap
->a_data
;
1288 // super-user can enable or disable acl's on a volume.
1289 // the volume owner can only enable acl's
1290 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1293 if (state
== 0 || state
== 1)
1294 return hfs_setextendedsecurity(hfsmp
, state
);
1302 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1304 error
= hfs_fsync(vp
, MNT_NOWAIT
, TRUE
, p
);
1305 hfs_unlock(VTOC(vp
));
1312 register struct cnode
*cp
;
1315 if (!vnode_isreg(vp
))
1318 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1322 * used by regression test to determine if
1323 * all the dirty pages (via write) have been cleaned
1324 * after a call to 'fsysnc'.
1326 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1333 register struct radvisory
*ra
;
1334 struct filefork
*fp
;
1337 if (!vnode_isreg(vp
))
1340 ra
= (struct radvisory
*)(ap
->a_data
);
1343 /* Protect against a size change. */
1344 hfs_lock_truncate(VTOC(vp
), TRUE
);
1346 if (ra
->ra_offset
>= fp
->ff_size
) {
1349 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1352 hfs_unlock_truncate(VTOC(vp
));
1356 case F_READBOOTSTRAP
:
1357 case F_WRITEBOOTSTRAP
:
1359 struct vnode
*devvp
= NULL
;
1360 user_fbootstraptransfer_t
*user_bootstrapp
;
1364 daddr64_t blockNumber
;
1368 user_fbootstraptransfer_t user_bootstrap
;
1370 if (!vnode_isvroot(vp
))
1372 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1373 * to a user_fbootstraptransfer_t else we get a pointer to a
1374 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1377 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1380 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1381 user_bootstrapp
= &user_bootstrap
;
1382 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1383 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1384 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1386 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1389 devvp
= VTOHFS(vp
)->hfs_devvp
;
1390 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1391 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1392 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1393 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1395 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1397 while (uio_resid(auio
) > 0) {
1398 blockNumber
= uio_offset(auio
) / devBlockSize
;
1399 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1401 if (bp
) buf_brelse(bp
);
1406 blockOffset
= uio_offset(auio
) % devBlockSize
;
1407 xfersize
= devBlockSize
- blockOffset
;
1408 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1414 if (uio_rw(auio
) == UIO_WRITE
) {
1415 error
= VNOP_BWRITE(bp
);
1428 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1431 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1434 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1439 case HFS_GET_MOUNT_TIME
:
1440 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1443 case HFS_GET_LAST_MTIME
:
1444 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1447 case HFS_SET_BOOT_INFO
:
1448 if (!vnode_isvroot(vp
))
1450 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1451 return(EACCES
); /* must be superuser or owner of filesystem */
1452 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1453 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1454 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1455 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1458 case HFS_GET_BOOT_INFO
:
1459 if (!vnode_isvroot(vp
))
1461 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1462 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1463 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1470 /* Should never get here */
1478 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1480 struct vnop_select_args {
1485 vfs_context_t a_context;
1490 * We should really check to see if I/O is possible.
1496 * Converts a logical block number to a physical block, and optionally returns
1497 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1498 * The physical block number is based on the device block size, currently its 512.
1499 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1502 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, int *runp
)
1504 struct cnode
*cp
= VTOC(vp
);
1505 struct filefork
*fp
= VTOF(vp
);
1506 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1507 int retval
= E_NONE
;
1508 daddr_t logBlockSize
;
1509 size_t bytesContAvail
= 0;
1510 off_t blockposition
;
1515 * Check for underlying vnode requests and ensure that logical
1516 * to physical mapping is requested.
1523 logBlockSize
= GetLogicalBlockSize(vp
);
1524 blockposition
= (off_t
)bn
* (off_t
)logBlockSize
;
1526 lockExtBtree
= overflow_extents(fp
);
1529 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
1531 retval
= MacToVFSError(
1532 MapFileBlockC (HFSTOVCB(hfsmp
),
1540 hfs_systemfile_unlock(hfsmp
, lockflags
);
1542 if (retval
== E_NONE
) {
1543 /* Figure out how many read ahead blocks there are */
1545 if (can_cluster(logBlockSize
)) {
1546 /* Make sure this result never goes negative: */
1547 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1557 * Convert logical block number to file offset.
1560 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1562 struct vnop_blktooff_args {
1569 if (ap
->a_vp
== NULL
)
1571 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1577 * Convert file offset to logical block number.
1580 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1582 struct vnop_offtoblk_args {
1585 daddr64_t *a_lblkno;
1589 if (ap
->a_vp
== NULL
)
1591 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1597 * Map file offset to physical block number.
1599 * System file cnodes are expected to be locked (shared or exclusive).
1602 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1604 struct vnop_blockmap_args {
1612 vfs_context_t a_context;
1616 struct vnode
*vp
= ap
->a_vp
;
1618 struct filefork
*fp
;
1619 struct hfsmount
*hfsmp
;
1620 size_t bytesContAvail
= 0;
1621 int retval
= E_NONE
;
1624 struct rl_entry
*invalid_range
;
1625 enum rl_overlaptype overlaptype
;
1629 /* Do not allow blockmap operation on a directory */
1630 if (vnode_isdir(vp
)) {
1635 * Check for underlying vnode requests and ensure that logical
1636 * to physical mapping is requested.
1638 if (ap
->a_bpn
== NULL
)
1641 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
)) {
1642 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
1643 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
1647 panic("blockmap: %s cnode lock already held!\n",
1648 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
1656 if (fp
->ff_unallocblocks
) {
1657 if (hfs_start_transaction(hfsmp
) != 0) {
1663 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
1665 } else if (overflow_extents(fp
)) {
1666 syslocks
= SFL_EXTENTS
;
1670 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
1673 * Check for any delayed allocations.
1675 if (fp
->ff_unallocblocks
) {
1677 u_int32_t loanedBlocks
;
1680 // Make sure we have a transaction. It's possible
1681 // that we came in and fp->ff_unallocblocks was zero
1682 // but during the time we blocked acquiring the extents
1683 // btree, ff_unallocblocks became non-zero and so we
1684 // will need to start a transaction.
1686 if (started_tr
== 0) {
1688 hfs_systemfile_unlock(hfsmp
, lockflags
);
1695 * Note: ExtendFileC will Release any blocks on loan and
1696 * aquire real blocks. So we ask to extend by zero bytes
1697 * since ExtendFileC will account for the virtual blocks.
1700 loanedBlocks
= fp
->ff_unallocblocks
;
1701 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
1702 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
1705 fp
->ff_unallocblocks
= loanedBlocks
;
1706 cp
->c_blocks
+= loanedBlocks
;
1707 fp
->ff_blocks
+= loanedBlocks
;
1709 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1710 hfsmp
->loanedBlocks
+= loanedBlocks
;
1711 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1715 hfs_systemfile_unlock(hfsmp
, lockflags
);
1716 cp
->c_flag
|= C_MODIFIED
;
1718 (void) hfs_update(vp
, TRUE
);
1719 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1721 hfs_end_transaction(hfsmp
);
1727 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
1728 ap
->a_bpn
, &bytesContAvail
);
1730 hfs_systemfile_unlock(hfsmp
, lockflags
);
1735 (void) hfs_update(vp
, TRUE
);
1736 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1737 hfs_end_transaction(hfsmp
);
1744 /* Adjust the mapping information for invalid file ranges: */
1745 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
1746 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
1748 if (overlaptype
!= RL_NOOVERLAP
) {
1749 switch(overlaptype
) {
1750 case RL_MATCHINGOVERLAP
:
1751 case RL_OVERLAPCONTAINSRANGE
:
1752 case RL_OVERLAPSTARTSBEFORE
:
1753 /* There's no valid block for this byte offset: */
1754 *ap
->a_bpn
= (daddr64_t
)-1;
1755 /* There's no point limiting the amount to be returned
1756 * if the invalid range that was hit extends all the way
1757 * to the EOF (i.e. there's no valid bytes between the
1758 * end of this range and the file's EOF):
1760 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1761 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1762 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1766 case RL_OVERLAPISCONTAINED
:
1767 case RL_OVERLAPENDSAFTER
:
1768 /* The range of interest hits an invalid block before the end: */
1769 if (invalid_range
->rl_start
== ap
->a_foffset
) {
1770 /* There's actually no valid information to be had starting here: */
1771 *ap
->a_bpn
= (daddr64_t
)-1;
1772 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1773 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1774 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1777 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
1784 if (bytesContAvail
> ap
->a_size
)
1785 bytesContAvail
= ap
->a_size
;
1788 *ap
->a_run
= bytesContAvail
;
1791 *(int *)ap
->a_poff
= 0;
1796 return (MacToVFSError(retval
));
1801 * prepare and issue the I/O
1802 * buf_strategy knows how to deal
1803 * with requests that require
1807 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
1809 buf_t bp
= ap
->a_bp
;
1810 vnode_t vp
= buf_vnode(bp
);
1811 struct cnode
*cp
= VTOC(vp
);
1813 return (buf_strategy(cp
->c_devvp
, ap
));
1818 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
, vfs_context_t context
)
1820 register struct cnode
*cp
= VTOC(vp
);
1821 struct filefork
*fp
= VTOF(vp
);
1822 struct proc
*p
= vfs_context_proc(context
);;
1823 kauth_cred_t cred
= vfs_context_ucred(context
);
1826 off_t actualBytesAdded
;
1828 u_int64_t old_filesize
;
1831 struct hfsmount
*hfsmp
;
1834 blksize
= VTOVCB(vp
)->blockSize
;
1835 fileblocks
= fp
->ff_blocks
;
1836 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
1837 old_filesize
= fp
->ff_size
;
1839 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
1840 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1845 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
1852 /* Files that are changing size are not hot file candidates. */
1853 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
1854 fp
->ff_bytesread
= 0;
1858 * We cannot just check if fp->ff_size == length (as an optimization)
1859 * since there may be extra physical blocks that also need truncation.
1862 if ((retval
= hfs_getinoquota(cp
)))
1867 * Lengthen the size of the file. We must ensure that the
1868 * last byte of the file is allocated. Since the smallest
1869 * value of ff_size is 0, length will be at least 1.
1871 if (length
> (off_t
)fp
->ff_size
) {
1873 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
1879 * If we don't have enough physical space then
1880 * we need to extend the physical size.
1882 if (length
> filebytes
) {
1884 u_long blockHint
= 0;
1886 /* All or nothing and don't round up to clumpsize. */
1887 eflags
= kEFAllMask
| kEFNoClumpMask
;
1889 if (cred
&& suser(cred
, NULL
) != 0)
1890 eflags
|= kEFReserveMask
; /* keep a reserve */
1893 * Allocate Journal and Quota files in metadata zone.
1895 if (filebytes
== 0 &&
1896 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
1897 hfs_virtualmetafile(cp
)) {
1898 eflags
|= kEFMetadataMask
;
1899 blockHint
= hfsmp
->hfs_metazone_start
;
1901 if (hfs_start_transaction(hfsmp
) != 0) {
1906 /* Protect extents b-tree and allocation bitmap */
1907 lockflags
= SFL_BITMAP
;
1908 if (overflow_extents(fp
))
1909 lockflags
|= SFL_EXTENTS
;
1910 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
1912 while ((length
> filebytes
) && (retval
== E_NONE
)) {
1913 bytesToAdd
= length
- filebytes
;
1914 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
1919 &actualBytesAdded
));
1921 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
1922 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
1923 if (length
> filebytes
)
1929 hfs_systemfile_unlock(hfsmp
, lockflags
);
1932 (void) hfs_update(vp
, TRUE
);
1933 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1936 hfs_end_transaction(hfsmp
);
1941 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
1942 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1945 if (!(flags
& IO_NOZEROFILL
)) {
1946 if (UBCINFOEXISTS(vp
) && retval
== E_NONE
) {
1947 struct rl_entry
*invalid_range
;
1950 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
1951 if (length
< zero_limit
) zero_limit
= length
;
1953 if (length
> (off_t
)fp
->ff_size
) {
1956 /* Extending the file: time to fill out the current last page w. zeroes? */
1957 if ((fp
->ff_size
& PAGE_MASK_64
) &&
1958 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
1959 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
1961 /* There's some valid data at the start of the (current) last page
1962 of the file, so zero out the remainder of that page to ensure the
1963 entire page contains valid data. Since there is no invalid range
1964 possible past the (current) eof, there's no need to remove anything
1965 from the invalid range list before calling cluster_write(): */
1967 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
1968 fp
->ff_size
, (off_t
)0,
1969 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
1970 hfs_lock(cp
, HFS_FORCE_LOCK
);
1971 if (retval
) goto Err_Exit
;
1973 /* Merely invalidate the remaining area, if necessary: */
1974 if (length
> zero_limit
) {
1976 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
1977 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1980 /* The page containing the (current) eof is invalid: just add the
1981 remainder of the page to the invalid list, along with the area
1982 being newly allocated:
1985 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
1986 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1990 panic("hfs_truncate: invoked on non-UBC object?!");
1993 cp
->c_touch_modtime
= TRUE
;
1994 fp
->ff_size
= length
;
1996 /* Nested transactions will do their own ubc_setsize. */
1999 * ubc_setsize can cause a pagein here
2000 * so we need to drop cnode lock.
2003 ubc_setsize(vp
, length
);
2004 hfs_lock(cp
, HFS_FORCE_LOCK
);
2007 } else { /* Shorten the size of the file */
2009 if ((off_t
)fp
->ff_size
> length
) {
2011 * Any buffers that are past the truncation point need to be
2012 * invalidated (to maintain buffer cache consistency).
2015 /* Nested transactions will do their own ubc_setsize. */
2018 * ubc_setsize can cause a pageout here
2019 * so we need to drop cnode lock.
2022 ubc_setsize(vp
, length
);
2023 hfs_lock(cp
, HFS_FORCE_LOCK
);
2026 /* Any space previously marked as invalid is now irrelevant: */
2027 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2031 * Account for any unmapped blocks. Note that the new
2032 * file length can still end up with unmapped blocks.
2034 if (fp
->ff_unallocblocks
> 0) {
2035 u_int32_t finalblks
;
2036 u_int32_t loanedBlocks
;
2038 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2040 loanedBlocks
= fp
->ff_unallocblocks
;
2041 cp
->c_blocks
-= loanedBlocks
;
2042 fp
->ff_blocks
-= loanedBlocks
;
2043 fp
->ff_unallocblocks
= 0;
2045 hfsmp
->loanedBlocks
-= loanedBlocks
;
2047 finalblks
= (length
+ blksize
- 1) / blksize
;
2048 if (finalblks
> fp
->ff_blocks
) {
2049 /* calculate required unmapped blocks */
2050 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2051 hfsmp
->loanedBlocks
+= loanedBlocks
;
2053 fp
->ff_unallocblocks
= loanedBlocks
;
2054 cp
->c_blocks
+= loanedBlocks
;
2055 fp
->ff_blocks
+= loanedBlocks
;
2057 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2061 * For a TBE process the deallocation of the file blocks is
2062 * delayed until the file is closed. And hfs_close calls
2063 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2064 * isn't set, we make sure this isn't a TBE process.
2066 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2068 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2070 if (hfs_start_transaction(hfsmp
) != 0) {
2075 if (fp
->ff_unallocblocks
== 0) {
2076 /* Protect extents b-tree and allocation bitmap */
2077 lockflags
= SFL_BITMAP
;
2078 if (overflow_extents(fp
))
2079 lockflags
|= SFL_EXTENTS
;
2080 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2082 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2083 (FCB
*)fp
, length
, false));
2085 hfs_systemfile_unlock(hfsmp
, lockflags
);
2089 fp
->ff_size
= length
;
2091 (void) hfs_update(vp
, TRUE
);
2092 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2095 hfs_end_transaction(hfsmp
);
2097 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2101 /* These are bytesreleased */
2102 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2105 /* Only set update flag if the logical length changes */
2106 if (old_filesize
!= length
)
2107 cp
->c_touch_modtime
= TRUE
;
2108 fp
->ff_size
= length
;
2110 cp
->c_touch_chgtime
= TRUE
;
2111 retval
= hfs_update(vp
, MNT_WAIT
);
2113 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2114 -1, -1, -1, retval
, 0);
2119 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2120 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2128 * Truncate a cnode to at most length size, freeing (or adding) the
2133 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2134 vfs_context_t context
)
2136 struct filefork
*fp
= VTOF(vp
);
2139 int blksize
, error
= 0;
2140 struct cnode
*cp
= VTOC(vp
);
2142 if (vnode_isdir(vp
))
2143 return (EISDIR
); /* cannot truncate an HFS directory! */
2145 blksize
= VTOVCB(vp
)->blockSize
;
2146 fileblocks
= fp
->ff_blocks
;
2147 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2149 // have to loop truncating or growing files that are
2150 // really big because otherwise transactions can get
2151 // enormous and consume too many kernel resources.
2153 if (length
< filebytes
) {
2154 while (filebytes
> length
) {
2155 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
2156 filebytes
-= HFS_BIGFILE_SIZE
;
2160 cp
->c_flag
|= C_FORCEUPDATE
;
2161 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2165 } else if (length
> filebytes
) {
2166 while (filebytes
< length
) {
2167 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
2168 filebytes
+= HFS_BIGFILE_SIZE
;
2172 cp
->c_flag
|= C_FORCEUPDATE
;
2173 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2177 } else /* Same logical size */ {
2179 error
= do_hfs_truncate(vp
, length
, flags
, skipsetsize
, context
);
2181 /* Files that are changing size are not hot file candidates. */
2182 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2183 fp
->ff_bytesread
= 0;
2192 * Preallocate file storage space.
2195 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2199 off_t *a_bytesallocated;
2201 vfs_context_t a_context;
2204 struct vnode
*vp
= ap
->a_vp
;
2206 struct filefork
*fp
;
2208 off_t length
= ap
->a_length
;
2210 off_t moreBytesRequested
;
2211 off_t actualBytesAdded
;
2214 int retval
, retval2
;
2216 UInt32 extendFlags
; /* For call to ExtendFileC */
2217 struct hfsmount
*hfsmp
;
2218 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2221 *(ap
->a_bytesallocated
) = 0;
2223 if (!vnode_isreg(vp
))
2225 if (length
< (off_t
)0)
2228 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
)))
2235 fileblocks
= fp
->ff_blocks
;
2236 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2238 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2243 /* Fill in the flags word for the call to Extend the file */
2245 extendFlags
= kEFNoClumpMask
;
2246 if (ap
->a_flags
& ALLOCATECONTIG
)
2247 extendFlags
|= kEFContigMask
;
2248 if (ap
->a_flags
& ALLOCATEALL
)
2249 extendFlags
|= kEFAllMask
;
2250 if (cred
&& suser(cred
, NULL
) != 0)
2251 extendFlags
|= kEFReserveMask
;
2255 startingPEOF
= filebytes
;
2257 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2258 length
+= filebytes
;
2259 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2260 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2262 /* If no changes are necesary, then we're done */
2263 if (filebytes
== length
)
2267 * Lengthen the size of the file. We must ensure that the
2268 * last byte of the file is allocated. Since the smallest
2269 * value of filebytes is 0, length will be at least 1.
2271 if (length
> filebytes
) {
2272 moreBytesRequested
= length
- filebytes
;
2275 retval
= hfs_chkdq(cp
,
2276 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2283 * Metadata zone checks.
2285 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2287 * Allocate Journal and Quota files in metadata zone.
2289 if (hfs_virtualmetafile(cp
)) {
2290 extendFlags
|= kEFMetadataMask
;
2291 blockHint
= hfsmp
->hfs_metazone_start
;
2292 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2293 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2295 * Move blockHint outside metadata zone.
2297 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2301 if (hfs_start_transaction(hfsmp
) != 0) {
2306 /* Protect extents b-tree and allocation bitmap */
2307 lockflags
= SFL_BITMAP
;
2308 if (overflow_extents(fp
))
2309 lockflags
|= SFL_EXTENTS
;
2310 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2312 retval
= MacToVFSError(ExtendFileC(vcb
,
2317 &actualBytesAdded
));
2319 *(ap
->a_bytesallocated
) = actualBytesAdded
;
2320 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2322 hfs_systemfile_unlock(hfsmp
, lockflags
);
2325 (void) hfs_update(vp
, TRUE
);
2326 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2329 hfs_end_transaction(hfsmp
);
2332 * if we get an error and no changes were made then exit
2333 * otherwise we must do the hfs_update to reflect the changes
2335 if (retval
&& (startingPEOF
== filebytes
))
2339 * Adjust actualBytesAdded to be allocation block aligned, not
2340 * clump size aligned.
2341 * NOTE: So what we are reporting does not affect reality
2342 * until the file is closed, when we truncate the file to allocation
2345 if ((actualBytesAdded
!= 0) && (moreBytesRequested
< actualBytesAdded
))
2346 *(ap
->a_bytesallocated
) =
2347 roundup(moreBytesRequested
, (off_t
)vcb
->blockSize
);
2349 } else { /* Shorten the size of the file */
2351 if (fp
->ff_size
> length
) {
2353 * Any buffers that are past the truncation point need to be
2354 * invalidated (to maintain buffer cache consistency).
2358 if (hfs_start_transaction(hfsmp
) != 0) {
2363 /* Protect extents b-tree and allocation bitmap */
2364 lockflags
= SFL_BITMAP
;
2365 if (overflow_extents(fp
))
2366 lockflags
|= SFL_EXTENTS
;
2367 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2369 retval
= MacToVFSError(TruncateFileC(vcb
, (FCB
*)fp
, length
, false));
2371 hfs_systemfile_unlock(hfsmp
, lockflags
);
2373 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2376 (void) hfs_update(vp
, TRUE
);
2377 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2380 hfs_end_transaction(hfsmp
);
2384 * if we get an error and no changes were made then exit
2385 * otherwise we must do the hfs_update to reflect the changes
2387 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2389 /* These are bytesreleased */
2390 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2393 if (fp
->ff_size
> filebytes
) {
2394 fp
->ff_size
= filebytes
;
2397 ubc_setsize(vp
, fp
->ff_size
);
2398 hfs_lock(cp
, HFS_FORCE_LOCK
);
2403 cp
->c_touch_chgtime
= TRUE
;
2404 cp
->c_touch_modtime
= TRUE
;
2405 retval2
= hfs_update(vp
, MNT_WAIT
);
2416 * Pagein for HFS filesystem
2419 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2421 struct vnop_pagein_args {
2424 vm_offset_t a_pl_offset,
2428 vfs_context_t a_context;
2432 vnode_t vp
= ap
->a_vp
;
2435 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2436 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2438 * Keep track of blocks read.
2440 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2442 struct filefork
*fp
;
2444 int took_cnode_lock
= 0;
2449 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2450 bytesread
= fp
->ff_size
;
2452 bytesread
= ap
->a_size
;
2454 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2455 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
2456 hfs_lock(cp
, HFS_FORCE_LOCK
);
2457 took_cnode_lock
= 1;
2460 * If this file hasn't been seen since the start of
2461 * the current sampling period then start over.
2463 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2466 fp
->ff_bytesread
= bytesread
;
2468 cp
->c_atime
= tv
.tv_sec
;
2470 fp
->ff_bytesread
+= bytesread
;
2472 cp
->c_touch_acctime
= TRUE
;
2473 if (took_cnode_lock
)
2480 * Pageout for HFS filesystem.
2483 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2485 struct vnop_pageout_args {
2488 vm_offset_t a_pl_offset,
2492 vfs_context_t a_context;
2496 vnode_t vp
= ap
->a_vp
;
2498 struct filefork
*fp
;
2504 if (cp
->c_lockowner
== current_thread()) {
2505 panic("pageout: %s cnode lock already held!\n",
2506 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
2508 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2509 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2510 ubc_upl_abort_range(ap
->a_pl
,
2513 UPL_ABORT_FREE_ON_EMPTY
);
2519 filesize
= fp
->ff_size
;
2520 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2522 if (end_of_range
>= filesize
) {
2523 end_of_range
= (off_t
)(filesize
- 1);
2525 if (ap
->a_f_offset
< filesize
) {
2526 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2527 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2531 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2532 ap
->a_size
, filesize
, ap
->a_flags
);
2535 * If data was written, and setuid or setgid bits are set and
2536 * this process is not the superuser then clear the setuid and
2537 * setgid bits as a precaution against tampering.
2539 if ((retval
== 0) &&
2540 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2541 (vfs_context_suser(ap
->a_context
) != 0)) {
2542 hfs_lock(cp
, HFS_FORCE_LOCK
);
2543 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2544 cp
->c_touch_chgtime
= TRUE
;
2551 * Intercept B-Tree node writes to unswap them if necessary.
2554 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
2557 register struct buf
*bp
= ap
->a_bp
;
2558 register struct vnode
*vp
= buf_vnode(bp
);
2559 BlockDescriptor block
;
2561 /* Trap B-Tree writes */
2562 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
2563 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
2564 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
2565 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
2568 * Swap and validate the node if it is in native byte order.
2569 * This is always be true on big endian, so we always validate
2570 * before writing here. On little endian, the node typically has
2571 * been swapped and validatated when it was written to the journal,
2572 * so we won't do anything here.
2574 if (((UInt16
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
2575 /* Prepare the block pointer */
2576 block
.blockHeader
= bp
;
2577 block
.buffer
= (char *)buf_dataptr(bp
);
2578 block
.blockNum
= buf_lblkno(bp
);
2579 /* not found in cache ==> came from disk */
2580 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
2581 block
.blockSize
= buf_count(bp
);
2583 /* Endian un-swap B-Tree node */
2584 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
2586 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2590 /* This buffer shouldn't be locked anymore but if it is clear it */
2591 if ((buf_flags(bp
) & B_LOCKED
)) {
2593 if (VTOHFS(vp
)->jnl
) {
2594 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp
);
2596 buf_clearflags(bp
, B_LOCKED
);
2598 retval
= vn_bwrite (ap
);
2604 * Relocate a file to a new location on disk
2605 * cnode must be locked on entry
2607 * Relocation occurs by cloning the file's data from its
2608 * current set of blocks to a new set of blocks. During
2609 * the relocation all of the blocks (old and new) are
2610 * owned by the file.
2617 * ----------------- -----------------
2618 * |///////////////| | | STEP 1 (aquire new blocks)
2619 * ----------------- -----------------
2622 * ----------------- -----------------
2623 * |///////////////| |///////////////| STEP 2 (clone data)
2624 * ----------------- -----------------
2628 * |///////////////| STEP 3 (head truncate blocks)
2632 * During steps 2 and 3 page-outs to file offsets less
2633 * than or equal to N are suspended.
2635 * During step 3 page-ins to the file get supended.
2639 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
2643 struct filefork
*fp
;
2644 struct hfsmount
*hfsmp
;
2649 u_int32_t nextallocsave
;
2650 daddr64_t sector_a
, sector_b
;
2651 int disabled_caching
= 0;
2656 int took_trunc_lock
= 0;
2658 enum vtype vnodetype
;
2660 vnodetype
= vnode_vtype(vp
);
2661 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
2666 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
2672 if (fp
->ff_unallocblocks
)
2674 blksize
= hfsmp
->blockSize
;
2676 blockHint
= hfsmp
->nextAllocation
;
2678 if ((fp
->ff_size
> (u_int64_t
)0x7fffffff) ||
2679 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
2684 // We do not believe that this call to hfs_fsync() is
2685 // necessary and it causes a journal transaction
2686 // deadlock so we are removing it.
2688 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2689 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2694 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
2696 hfs_lock_truncate(cp
, TRUE
);
2697 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
2698 hfs_unlock_truncate(cp
);
2701 took_trunc_lock
= 1;
2703 headblks
= fp
->ff_blocks
;
2704 datablks
= howmany(fp
->ff_size
, blksize
);
2705 growsize
= datablks
* blksize
;
2706 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
2707 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
2708 blockHint
<= hfsmp
->hfs_metazone_end
)
2709 eflags
|= kEFMetadataMask
;
2711 if (hfs_start_transaction(hfsmp
) != 0) {
2712 if (took_trunc_lock
)
2713 hfs_unlock_truncate(cp
);
2718 * Protect the extents b-tree and the allocation bitmap
2719 * during MapFileBlockC and ExtendFileC operations.
2721 lockflags
= SFL_BITMAP
;
2722 if (overflow_extents(fp
))
2723 lockflags
|= SFL_EXTENTS
;
2724 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2726 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
2728 retval
= MacToVFSError(retval
);
2733 * STEP 1 - aquire new allocation blocks.
2735 if (!vnode_isnocache(vp
)) {
2736 vnode_setnocache(vp
);
2737 disabled_caching
= 1;
2740 nextallocsave
= hfsmp
->nextAllocation
;
2741 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
2742 if (eflags
& kEFMetadataMask
) {
2743 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2744 hfsmp
->nextAllocation
= nextallocsave
;
2745 hfsmp
->vcbFlags
|= 0xFF00;
2746 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2749 retval
= MacToVFSError(retval
);
2751 cp
->c_flag
|= C_MODIFIED
;
2752 if (newbytes
< growsize
) {
2755 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
2756 printf("hfs_relocate: allocation failed");
2761 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
2763 retval
= MacToVFSError(retval
);
2764 } else if ((sector_a
+ 1) == sector_b
) {
2767 } else if ((eflags
& kEFMetadataMask
) &&
2768 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
2769 hfsmp
->hfs_metazone_end
)) {
2770 printf("hfs_relocate: didn't move into metadata zone\n");
2775 /* Done with system locks and journal for now. */
2776 hfs_systemfile_unlock(hfsmp
, lockflags
);
2778 hfs_end_transaction(hfsmp
);
2783 * Check to see if failure is due to excessive fragmentation.
2785 if ((retval
== ENOSPC
) &&
2786 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
2787 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
2792 * STEP 2 - clone file data into the new allocation blocks.
2795 if (vnodetype
== VLNK
)
2796 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
2797 else if (vnode_issystem(vp
))
2798 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
2800 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
2802 /* Start transaction for step 3 or for a restore. */
2803 if (hfs_start_transaction(hfsmp
) != 0) {
2812 * STEP 3 - switch to cloned data and remove old blocks.
2814 lockflags
= SFL_BITMAP
;
2815 if (overflow_extents(fp
))
2816 lockflags
|= SFL_EXTENTS
;
2817 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2819 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
2821 hfs_systemfile_unlock(hfsmp
, lockflags
);
2826 if (took_trunc_lock
)
2827 hfs_unlock_truncate(cp
);
2830 hfs_systemfile_unlock(hfsmp
, lockflags
);
2834 /* Push cnode's new extent data to disk. */
2836 (void) hfs_update(vp
, MNT_WAIT
);
2840 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
2841 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
2843 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
2846 if (disabled_caching
) {
2847 vnode_clearnocache(vp
);
2850 hfs_end_transaction(hfsmp
);
2855 if (fp
->ff_blocks
== headblks
)
2858 * Give back any newly allocated space.
2860 if (lockflags
== 0) {
2861 lockflags
= SFL_BITMAP
;
2862 if (overflow_extents(fp
))
2863 lockflags
|= SFL_EXTENTS
;
2864 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2867 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
2869 hfs_systemfile_unlock(hfsmp
, lockflags
);
2872 if (took_trunc_lock
)
2873 hfs_unlock_truncate(cp
);
2883 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, struct proc
*p
)
2885 struct buf
*head_bp
= NULL
;
2886 struct buf
*tail_bp
= NULL
;
2890 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
2894 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
2895 if (tail_bp
== NULL
) {
2899 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
2900 error
= (int)buf_bwrite(tail_bp
);
2903 buf_markinvalid(head_bp
);
2904 buf_brelse(head_bp
);
2906 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
2912 * Clone a file's data within the file.
2916 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
2928 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
2929 writebase
= blkstart
* blksize
;
2930 copysize
= blkcnt
* blksize
;
2931 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
2934 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
2937 hfs_unlock(VTOC(vp
));
2939 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
2941 while (offset
< copysize
) {
2942 iosize
= MIN(copysize
- offset
, iosize
);
2944 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
2945 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2947 error
= cluster_read(vp
, auio
, copysize
, 0);
2949 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
2952 if (uio_resid(auio
) != 0) {
2953 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
2958 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
2959 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2961 error
= cluster_write(vp
, auio
, filesize
+ offset
,
2962 filesize
+ offset
+ iosize
,
2963 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
2965 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
2968 if (uio_resid(auio
) != 0) {
2969 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2978 * No need to call ubc_sync_range or hfs_invalbuf
2979 * since the file was copied using IO_NOCACHE.
2982 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
2984 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2989 * Clone a system (metadata) file.
2993 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
2994 kauth_cred_t cred
, struct proc
*p
)
3000 struct buf
*bp
= NULL
;
3003 daddr64_t start_blk
;
3010 iosize
= GetLogicalBlockSize(vp
);
3011 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3012 breadcnt
= bufsize
/ iosize
;
3014 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3017 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3018 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3021 while (blkno
< last_blk
) {
3023 * Read up to a megabyte
3026 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3027 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3029 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3032 if (buf_count(bp
) != iosize
) {
3033 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3036 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3038 buf_markinvalid(bp
);
3046 * Write up to a megabyte
3049 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3050 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3052 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3056 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3057 error
= (int)buf_bwrite(bp
);
3069 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3071 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);