2 * Copyright (c) 2014-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
31 #include "hfs_key_roll.h"
33 #include <sys/cprotect.h>
35 #include <sys/fcntl.h>
36 #include <sys/xattr.h>
37 #include <kern/sched_prim.h>
38 #include <vm/vm_pageout.h>
39 #include <pexpert/pexpert.h>
42 #include "hfs_extents.h"
43 #include "hfs_kdebug.h"
45 #define PTR_ADD(type, base, offset) (type)((uintptr_t)(base) + (offset))
47 #define HFS_KEY_ROLL_MAX_CHUNK_BYTES 2 * 1024 * 1024
49 static inline void log_error(const char *func
, unsigned line
, errno_t err
)
51 printf("%s:%u error: %d\n", func
, line
, err
);
54 #define LOG_ERROR(err) log_error(__func__, __LINE__, err)
56 #define CHECK(x, var, goto_label) \
67 ({ typeof (a) _a = (a); typeof (b) _b = (b); _a < _b ? _a : _b; })
69 // -- Some locking helpers --
72 * These helpers exist to help clean up at the end of a function. A
73 * lock context structure is stored on the stack and is cleaned up
74 * when we it goes out of scope and will automatically unlock the lock
75 * if it happens to be locked. It is also safe to call the unlock
76 * functions when no lock has been taken and this is cleaner than
77 * having to declare a separate variable like have_lock and having to
78 * test for it before unlocking.
86 #define DECL_HFS_LOCK_CTX(var, unlocker) \
87 hfs_lock_ctx_t var __attribute__((cleanup(unlocker))) = {}
89 static inline bool hfs_is_locked(hfs_lock_ctx_t
*lock_ctx
)
91 return lock_ctx
->object
!= NULL
;
94 static inline int hfs_lock_flags(hfs_lock_ctx_t
*lock_ctx
)
96 return lock_ctx
->flags
;
99 static inline void hfs_lock_cp(cnode_t
*cp
,
100 enum hfs_locktype lock_type
,
101 hfs_lock_ctx_t
*lock_ctx
)
103 hfs_lock_always(cp
, lock_type
);
104 lock_ctx
->object
= cp
;
107 static inline void hfs_unlock_cp(hfs_lock_ctx_t
*lock_ctx
)
109 if (lock_ctx
->object
) {
110 hfs_unlock(lock_ctx
->object
);
111 lock_ctx
->object
= NULL
;
115 static inline void hfs_lock_trunc(cnode_t
*cp
,
116 enum hfs_locktype lock_type
,
117 hfs_lock_ctx_t
*lock_ctx
)
119 hfs_lock_truncate(cp
, lock_type
, 0);
120 lock_ctx
->object
= cp
;
123 static inline void hfs_unlock_trunc(hfs_lock_ctx_t
*lock_ctx
)
125 if (lock_ctx
->object
) {
126 hfs_unlock_truncate(lock_ctx
->object
, 0);
127 lock_ctx
->object
= NULL
;
131 static inline void hfs_lock_sys(struct hfsmount
*hfsmp
, int flags
,
132 enum hfs_locktype lock_type
,
133 hfs_lock_ctx_t
*lock_ctx
)
135 lock_ctx
->flags
|= hfs_systemfile_lock(hfsmp
, flags
, lock_type
);
136 lock_ctx
->object
= hfsmp
;
139 static inline void hfs_unlock_sys(hfs_lock_ctx_t
*lock_ctx
)
141 if (lock_ctx
->object
) {
142 hfs_systemfile_unlock(lock_ctx
->object
, lock_ctx
->flags
);
143 lock_ctx
->object
= NULL
;
151 static const uint32_t ckr_magic1
= 0x7b726b63;
152 static const uint32_t ckr_magic2
= 0x726b637d;
155 hfs_cp_key_roll_ctx_t
*hfs_key_roll_ctx_alloc(const hfs_cp_key_roll_ctx_t
*old
,
156 uint16_t pers_key_len
,
157 uint16_t cached_key_len
,
158 cp_key_pair_t
**pcpkp
)
160 hfs_cp_key_roll_ctx_t
*ckr
;
162 size_t size
= (sizeof(*ckr
) - sizeof(cp_key_pair_t
)
163 + cpkp_size(pers_key_len
, cached_key_len
));
169 ckr
= hfs_mallocz(size
);
172 ckr
->ckr_magic1
= ckr_magic1
;
173 *PTR_ADD(uint32_t *, ckr
, size
- 4) = ckr_magic2
;
176 cpkp_init(&ckr
->ckr_keys
, pers_key_len
, cached_key_len
);
180 panic("hfs_key_roll_ctx_alloc: old context busy!");
181 ckr
->ckr_off_rsrc
= old
->ckr_off_rsrc
;
182 ckr
->ckr_preferred_next_block
= old
->ckr_preferred_next_block
;
184 cpx_copy(cpkp_cpx(&old
->ckr_keys
), cpkp_cpx(&ckr
->ckr_keys
));
187 *pcpkp
= &ckr
->ckr_keys
;
192 static void hfs_key_roll_ctx_free(hfs_cp_key_roll_ctx_t
*ckr
)
194 size_t size
= (sizeof(*ckr
) - sizeof(cp_key_pair_t
)
195 + cpkp_sizex(&ckr
->ckr_keys
));
204 void hfs_release_key_roll_ctx(hfsmount_t
*hfsmp
, cprotect_t cpr
)
206 hfs_cp_key_roll_ctx_t
*ckr
= cpr
->cp_key_roll_ctx
;
211 cpkp_flush(&ckr
->ckr_keys
);
213 if (ckr
->ckr_tentative_reservation
) {
214 int lockf
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
215 hfs_free_tentative(hfsmp
, &ckr
->ckr_tentative_reservation
);
216 hfs_systemfile_unlock(hfsmp
, lockf
);
219 cpr
->cp_key_roll_ctx
= NULL
;
221 cnode_t
*cp
= cpr
->cp_backing_cnode
;
223 wakeup(&cp
->c_cpentry
);
226 hfs_assert(ckr
->ckr_magic1
== ckr_magic1
);
227 hfs_assert(*PTR_ADD(uint32_t *, ckr
, sizeof(*ckr
) - sizeof(cp_key_pair_t
)
228 + cpkp_sizex(&ckr
->ckr_keys
) == ckr_magic2
));
231 hfs_key_roll_ctx_free(ckr
);
234 // Records current status into @args
235 static void hfs_key_roll_status(cnode_t
*cp
, hfs_key_roll_args_t
*args
)
237 hfsmount_t
*hfsmp
= VTOHFS(cp
->c_vp
);
238 cprotect_t
const cpr
= cp
->c_cpentry
;
239 filefork_t
*dfork
= cp
->c_datafork
;
241 if (!cpr
|| !dfork
) {
242 args
->key_class_generation
= 0;
243 args
->key_revision
= 0;
244 args
->key_os_version
= 0;
250 uint32_t total_blocks
= cp
->c_blocks
- dfork
->ff_unallocblocks
;
252 total_blocks
-= cp
->c_rsrcfork
->ff_unallocblocks
;
253 args
->total
= hfs_blk_to_bytes(total_blocks
, hfsmp
->blockSize
);
255 args
->key_class_generation
= cp_get_crypto_generation(cpr
->cp_pclass
);
256 args
->key_revision
= cpr
->cp_key_revision
;
257 args
->key_os_version
= cpr
->cp_key_os_version
;
259 hfs_cp_key_roll_ctx_t
*ckr
= cpr
->cp_key_roll_ctx
;
264 args
->done
= off_rsrc_get_off(ckr
->ckr_off_rsrc
);
266 if (off_rsrc_is_rsrc(ckr
->ckr_off_rsrc
)) {
267 args
->done
+= hfs_blk_to_bytes(ff_allocblocks(dfork
),
273 // The fsctl calls this
274 errno_t
hfs_key_roll_op(vfs_context_t vfs_ctx
, vnode_t vp
,
275 hfs_key_roll_args_t
*args
)
278 cnode_t
* const cp
= VTOC(vp
);
279 cprotect_t
const cpr
= cp
->c_cpentry
;
280 hfs_cp_key_roll_ctx_t
*ckr
= NULL
;
282 DECL_HFS_LOCK_CTX(cp_lock
, hfs_unlock_cp
);
283 DECL_HFS_LOCK_CTX(trunc_lock
, hfs_unlock_trunc
);
285 KDBG(HFSDBG_KEY_ROLL
| DBG_FUNC_START
, kdebug_vnode(vp
), args
->operation
);
287 if (args
->api_version
!= HFS_KR_API_VERSION_1
) {
292 if (args
->operation
!= HFS_KR_OP_STATUS
) {
293 ret
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0);
298 switch (args
->operation
) {
299 case HFS_KR_OP_START
:
306 * We must hold the truncate lock exclusively in case we have to
309 hfs_lock_trunc(cp
, HFS_EXCLUSIVE_LOCK
, &trunc_lock
);
310 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
312 ckr
= cpr
->cp_key_roll_ctx
;
316 // Only start rolling if criteria match
317 if (ISSET(args
->flags
, HFS_KR_MATCH_KEY_CLASS_GENERATION
)
318 && (args
->key_class_generation
319 != cp_get_crypto_generation(cpr
->cp_pclass
))) {
323 if (ISSET(args
->flags
, HFS_KR_MATCH_KEY_REVISION
)
324 && args
->key_revision
!= cpr
->cp_key_revision
) {
328 if (ISSET(args
->flags
, HFS_KR_MATCH_KEY_OS_VERSION
)
329 && args
->key_os_version
!= cpr
->cp_key_os_version
) {
333 if (cp
->c_cpentry
->cp_raw_open_count
> 0) {
334 // Cannot start key rolling if file is opened for raw access
339 ret
= hfs_key_roll_start(cp
);
344 case HFS_KR_OP_STATUS
:
348 CHECK(hfs_key_roll_step(vfs_ctx
, vp
, INT64_MAX
), ret
, exit
);
351 case HFS_KR_OP_SET_INFO
:
352 if (!ISSET(PE_i_can_has_kernel_configuration(), kPEICanHasDiagnosticAPI
)) {
362 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
363 cpr
->cp_key_revision
= args
->key_revision
;
364 cpr
->cp_key_os_version
= args
->key_os_version
;
365 ret
= cp_setxattr(cp
, cpr
, VTOHFS(vp
), 0, 0);
375 if (!hfs_is_locked(&cp_lock
))
376 hfs_lock_cp(cp
, HFS_SHARED_LOCK
, &cp_lock
);
378 hfs_key_roll_status(cp
, args
);
384 hfs_unlock_cp(&cp_lock
);
385 hfs_unlock_trunc(&trunc_lock
);
387 KDBG(HFSDBG_KEY_ROLL
| DBG_FUNC_END
, ret
, ret
? 0 : args
->done
);
393 * Initiates key rolling. cnode and truncate locks *must* be held
396 errno_t
hfs_key_roll_start(cnode_t
*cp
)
400 hfs_assert(!cp
->c_cpentry
->cp_key_roll_ctx
);
402 if (CP_CLASS(cp
->c_cpentry
->cp_pclass
) == PROTECTION_CLASS_F
)
405 hfsmount_t
*hfsmp
= VTOHFS(cp
->c_vp
);
407 if (ISSET(hfsmp
->hfs_flags
, HFS_READ_ONLY
))
410 if (!hfsmp
->jnl
|| !S_ISREG(cp
->c_mode
))
413 cprotect_t cpr
= cp
->c_cpentry
;
415 cp_key_class_t key_class
= cpr
->cp_pclass
;
419 hfs_cp_key_roll_ctx_t
*ckr
;
422 cp_key_revision_t rev
= cp_next_key_revision(cp
->c_cpentry
->cp_key_revision
);
425 ret
= cp_new(&key_class
, hfsmp
, cp
, cp
->c_mode
, CP_KEYWRAP_DIFFCLASS
, rev
,
426 (cp_new_alloc_fn
)hfs_key_roll_ctx_alloc
, (void **)&ckr
);
428 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
432 if (key_class
== cpr
->cp_pclass
) {
433 // The usual and easy case: the classes match
434 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
438 // AKS has given us a different class, so we need to rewrap
440 // The truncate lock is not sufficient
441 vnode_waitforwrites(cp
->c_vp
, 0, 0, 0, "hfs_key_roll_start");
443 // And the resource fork
445 vnode_waitforwrites(cp
->c_rsrc_vp
, 0, 0, 0, "hfs_key_roll_start");
447 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
449 cp_key_class_t key_class2
= key_class
;
450 cprotect_t new_entry
;
452 ret
= cp_rewrap(cp
, hfsmp
, &key_class2
, &cpr
->cp_keys
, cpr
,
453 (cp_new_alloc_fn
)cp_entry_alloc
, (void **)&new_entry
);
456 hfs_key_roll_ctx_free(ckr
);
460 if (key_class2
== key_class
) {
461 // Great, fix things up and we're done
462 cp_replace_entry(hfsmp
, cp
, new_entry
);
464 cpr
->cp_pclass
= key_class
;
469 * Oh dear, key classes don't match. Unlikely, but perhaps class
470 * generation was rolled again.
473 hfs_key_roll_ctx_free(ckr
);
474 cp_entry_destroy(hfsmp
, new_entry
);
476 if (++attempts
> 3) {
482 cpr
->cp_key_roll_ctx
= ckr
;
483 cpr
->cp_key_revision
= rev
;
484 cpr
->cp_key_os_version
= cp_os_version();
490 wakeup(&cp
->c_cpentry
);
494 // Rolls up to @up_to
495 errno_t
hfs_key_roll_up_to(vfs_context_t vfs_ctx
, vnode_t vp
, off_rsrc_t up_to
)
497 cnode_t
* const cp
= VTOC(vp
);
500 hfs_lock_always(cp
, HFS_SHARED_LOCK
);
501 cprotect_t cpr
= cp
->c_cpentry
;
505 hfs_cp_key_roll_ctx_t
*ckr
= cpr
->cp_key_roll_ctx
;
506 if (!ckr
|| ckr
->ckr_off_rsrc
>= up_to
)
510 errno_t ret
= hfs_key_roll_step(vfs_ctx
, vp
, up_to
);
521 * This function checks the size of the file and the key roll offset
522 * and updates the xattr accordingly if necessary. This is called
523 * when a file is truncated and also just before and after key
524 * rolling. cnode must be locked exclusively and might be dropped.
525 * truncate lock must be held shared or exclusive (arg indicates
526 * which). If the truncate lock is held shared, then caller must have
529 errno_t
hfs_key_roll_check(cnode_t
*cp
, bool have_excl_trunc_lock
)
531 hfs_assert(cp
->c_lockowner
== current_thread());
535 hfs_cp_key_roll_ctx_t
*ckr
= NULL
;
536 hfsmount_t
*hfsmp
= VTOHFS(cp
->c_vp
);
537 bool downgrade_trunc_lock
= false;
538 off_rsrc_t orig_off_rsrc
;
548 ckr
= cpr
->cp_key_roll_ctx
;
554 hfs_assert(have_excl_trunc_lock
|| ckr
->ckr_busy
);
556 if (!cp
->c_datafork
) {
561 orig_off_rsrc
= ckr
->ckr_off_rsrc
;
562 off_rsrc_t new_off_rsrc
= orig_off_rsrc
;
564 if (orig_off_rsrc
== INT64_MAX
) {
566 * If orig_off_rsrc == INT64_MAX it means we rolled to the end and we
567 * updated the xattr, but we haven't fixed up the in memory part of it
568 * because we didn't have the truncate lock exclusively.
570 } else if (off_rsrc_is_rsrc(orig_off_rsrc
)) {
573 if (!cp
->c_rsrcfork
) {
574 size
= hfs_blk_to_bytes(cp
->c_blocks
- cp
->c_datafork
->ff_blocks
,
577 size
= min(cp
->c_rsrcfork
->ff_size
,
578 hfs_blk_to_bytes(ff_allocblocks(cp
->c_rsrcfork
),
582 if (off_rsrc_get_off(orig_off_rsrc
) >= size
)
583 new_off_rsrc
= INT64_MAX
;
585 off_t size
= min(cp
->c_datafork
->ff_size
,
586 hfs_blk_to_bytes(ff_allocblocks(cp
->c_datafork
),
589 if (off_rsrc_get_off(orig_off_rsrc
) >= size
) {
590 new_off_rsrc
= hfs_has_rsrc(cp
) ? off_rsrc_make(0, true) : INT64_MAX
;
594 // Should we delete roll information?
595 if (new_off_rsrc
== INT64_MAX
) {
597 * If we're deleting the roll information, we need the truncate lock
598 * exclusively to flush out readers and sync writers and
599 * vnode_waitforwrites to flush out async writers.
601 if (!have_excl_trunc_lock
) {
602 ckr
->ckr_busy
= false;
605 if (!hfs_truncate_lock_upgrade(cp
))
606 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, 0);
607 have_excl_trunc_lock
= true;
608 downgrade_trunc_lock
= true;
609 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
611 // Things may have changed, go around again
615 // We hold the truncate lock exclusively so vnodes cannot be recycled here
616 vnode_waitforwrites(cp
->c_vp
, 0, 0, 0, "hfs_key_roll_check_size");
618 vnode_waitforwrites(cp
->c_rsrc_vp
, 0, 0,
619 0, "hfs_key_roll_check_size");
622 // It's now safe to copy the keys and free the context
623 if (!cpkp_can_copy(&ckr
->ckr_keys
, &cpr
->cp_keys
)) {
624 cprotect_t new_cpr
= cp_entry_alloc(cpr
,
625 ckr
->ckr_keys
.cpkp_max_pers_key_len
,
626 CP_MAX_CACHEBUFLEN
, NULL
);
627 cpkp_copy(&ckr
->ckr_keys
, &new_cpr
->cp_keys
);
628 cp_replace_entry(hfsmp
, cp
, new_cpr
);
631 cpkp_copy(&ckr
->ckr_keys
, &cpr
->cp_keys
);
632 hfs_release_key_roll_ctx(hfsmp
, cpr
);
637 if (new_off_rsrc
!= orig_off_rsrc
) {
641 ckr
->ckr_off_rsrc
= new_off_rsrc
;
642 ret
= cp_setxattr(cp
, cpr
, hfsmp
, 0, XATTR_REPLACE
);
645 ckr
->ckr_off_rsrc
= orig_off_rsrc
;
654 if (downgrade_trunc_lock
) {
656 ckr
->ckr_busy
= true;
657 hfs_truncate_lock_downgrade(cp
);
664 * We need to wrap the UPL routines because we will be dealing with
665 * allocation blocks and the UPL routines need to be page aligned.
668 static errno_t
kr_upl_create(vnode_t vp
, off_t offset
, int len
,
669 upl_offset_t
*start_upl_offset
,
670 upl_t
*upl
, upl_page_info_t
**pl
)
672 // Round parameters to page size
673 const int rounding
= offset
& PAGE_MASK
;
675 *start_upl_offset
= rounding
;
680 // Don't go beyond end of file
681 off_t max
= VTOF(vp
)->ff_size
- offset
;
685 len
= round_page_32(len
);
687 return mach_to_bsd_errno(ubc_create_upl(vp
, offset
, len
, upl
, pl
,
688 UPL_CLEAN_IN_PLACE
| UPL_SET_LITE
));
691 static errno_t
kr_page_out(vnode_t vp
, upl_t upl
, upl_offset_t upl_offset
,
692 off_t fork_offset
, int len
)
694 const int rounding
= upl_offset
& PAGE_MASK
;
696 upl_offset
-= rounding
;
697 fork_offset
-= rounding
;
700 const off_t fork_size
= VTOF(vp
)->ff_size
;
701 if (fork_offset
+ len
> fork_size
)
702 len
= fork_size
- fork_offset
;
704 len
= round_page_32(len
);
706 return cluster_pageout(vp
, upl
, upl_offset
, fork_offset
, len
,
707 fork_size
, UPL_IOSYNC
| UPL_NOCOMMIT
);
710 static void kr_upl_commit(upl_t upl
, upl_offset_t upl_offset
, int len
, bool last
)
715 const int rounding
= upl_offset
& PAGE_MASK
;
717 upl_offset
-= rounding
;
721 * If not last we cannot commit the partial page yet so we round
725 len
= upl_get_size(upl
) - upl_offset
;
727 len
= trunc_page_32(len
);
730 * This should send pages that were absent onto the speculative
731 * queue and because we passed in UPL_CLEAN_IN_PLACE when we
732 * created the UPL, dirty pages will end up clean.
735 = ubc_upl_commit_range(upl
, upl_offset
, len
,
736 UPL_COMMIT_FREE_ON_EMPTY
737 | UPL_COMMIT_SPECULATE
);
743 // Rolls 1 chunk if before @limit
744 errno_t
hfs_key_roll_step(__unused vfs_context_t vfs_ctx
, vnode_t vp
, off_rsrc_t limit
)
747 cnode_t
* const cp
= VTOC(vp
);
748 cprotect_t cpr
= NULL
;
749 hfsmount_t
* const hfsmp
= VTOHFS(vp
);
751 bool transaction_open
= false;
752 bool need_put
= false;
753 bool marked_busy
= false;
754 hfs_cp_key_roll_ctx_t
*ckr
= NULL
;
755 filefork_t
*pfork
= NULL
;
756 off_rsrc_t roll_back_off_rsrc
= 0;
758 const int max_extents
= 8; // Change mask below if this changes
760 ROLL_BACK_EXTENTS_MASK
= 0x00ff,
761 ROLL_BACK_XATTR
= 0x0100,
762 ROLL_BACK_OFFSET
= 0x0200,
767 printf ("{ hfs_key_roll_step\n");
770 if (ISSET(hfsmp
->hfs_flags
, HFS_READ_ONLY
))
773 HFSPlusExtentDescriptor extents
[max_extents
];
774 struct rl_entry
*reservations
[max_extents
];
776 DECL_HFS_LOCK_CTX(trunc_lock
, hfs_unlock_trunc
);
777 DECL_HFS_LOCK_CTX(cp_lock
, hfs_unlock_cp
);
778 DECL_HFS_LOCK_CTX(sys_lock
, hfs_unlock_sys
);
781 hfs_lock_trunc(cp
, HFS_SHARED_LOCK
, &trunc_lock
);
782 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
786 // File is not protected
791 ckr
= cpr
->cp_key_roll_ctx
;
793 // Rolling was finished by something else
801 // Something else is rolling, wait until they've finished
802 assert_wait(&cp
->c_cpentry
, THREAD_ABORTSAFE
);
803 hfs_unlock_cp(&cp_lock
);
804 hfs_unlock_trunc(&trunc_lock
);
806 if (msleep(NULL
, NULL
, PINOD
| PCATCH
,
807 "hfs_key_roll", NULL
) == EINTR
) {
813 ckr
->ckr_busy
= true;
816 CHECK(hfs_key_roll_check(cp
, false), ret
, exit
);
818 // hfs_key_roll_check can change things
820 ckr
= cpr
->cp_key_roll_ctx
;
827 if (ckr
->ckr_off_rsrc
>= limit
) {
832 // Early check for no space. We don't dip into the reserve pool.
833 if (!hfs_freeblks(hfsmp
, true)) {
838 if (off_rsrc_is_rsrc(ckr
->ckr_off_rsrc
)) {
839 if (!VNODE_IS_RSRC(vp
)) {
841 * We've called hfs_key_roll_check so there's no way we should get
845 CHECK(hfs_vgetrsrc(hfsmp
, vp
, &rvp
), ret
, exit
);
849 pfork
= cp
->c_rsrcfork
;
851 if (VNODE_IS_RSRC(vp
)) {
852 CHECK(vnode_get(cp
->c_vp
), ret
, exit
);
856 pfork
= cp
->c_datafork
;
859 hfs_unlock_cp(&cp_lock
);
861 // Get total blocks in fork
862 const uint32_t fork_blocks
= min(howmany(pfork
->ff_size
,
864 ff_allocblocks(pfork
));
866 off_t off
= off_rsrc_get_off(ckr
->ckr_off_rsrc
);
867 hfs_assert(!(off
% hfsmp
->blockSize
));
869 uint32_t block
= off
/ hfsmp
->blockSize
;
871 // Figure out remaining fork blocks
872 uint32_t rem_fork_blocks
;
873 if (fork_blocks
< block
)
876 rem_fork_blocks
= fork_blocks
- block
;
878 uint32_t chunk_blocks
= min(rem_fork_blocks
,
879 HFS_KEY_ROLL_MAX_CHUNK_BYTES
/ hfsmp
->blockSize
);
881 off_t chunk_bytes
= chunk_blocks
* hfsmp
->blockSize
;
882 upl_offset_t upl_offset
= 0;
885 if (!ckr
->ckr_preferred_next_block
&& off
) {
887 * Here we fix up ckr_preferred_next_block. This can
888 * happen when we rolled part of a file, then rebooted.
889 * We want to try and allocate from where we left off.
891 hfs_ext_iter_t
*iter
;
893 iter
= hfs_malloc(sizeof(*iter
));
895 hfs_lock_sys(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
, &sys_lock
);
897 // Errors are not fatal here
898 if (!hfs_ext_find(vp
, off
- 1, iter
)) {
899 ckr
->ckr_preferred_next_block
= (iter
->group
[iter
->ndx
].startBlock
900 + off
/ hfsmp
->blockSize
904 hfs_unlock_sys(&sys_lock
);
906 hfs_free(iter
, sizeof(*iter
));
909 // We need to wait for outstanding direct reads to be issued
910 cl_direct_read_lock_t
*lck
= cluster_lock_direct_read(vp
, LCK_RW_TYPE_EXCLUSIVE
);
913 ret
= kr_upl_create(vp
, off
, chunk_bytes
, &upl_offset
, &upl
, &pl
);
915 // We have the pages locked now so it's safe to...
916 cluster_unlock_direct_read(lck
);
923 int page_count
= upl_get_size(upl
) >> PAGE_SHIFT
;
926 // Page everything in
928 while (page_ndx
< page_count
&& upl_valid_page(pl
, page_ndx
))
931 if (page_ndx
>= page_count
)
934 const int page_group_start
= page_ndx
;
938 } while (page_ndx
< page_count
&& !upl_valid_page(pl
, page_ndx
));
940 const upl_offset_t start
= page_group_start
<< PAGE_SHIFT
;
942 CHECK(cluster_pagein(vp
, upl
, start
,
943 off
- upl_offset
+ start
,
944 (page_ndx
- page_group_start
) << PAGE_SHIFT
,
946 UPL_IOSYNC
| UPL_NOCOMMIT
), ret
, exit
);
950 bool tried_hard
= false;
953 * For each iteration of this loop, we roll up to @max_extents
954 * extents and update the metadata for those extents (one
955 * transaction per iteration.)
959 * This is the number of bytes rolled for the current
960 * iteration of the containing loop.
962 off_t bytes_rolled
= 0;
964 roll_back_off_rsrc
= ckr
->ckr_off_rsrc
;
967 // Allocate and write out up to @max_extents extents
968 while (chunk_bytes
&& ext_count
< max_extents
) {
970 * We're not making any on disk changes here but
971 * hfs_block_alloc needs to ask the journal about pending
972 * trims and for that it needs the journal lock and the
973 * journal lock must be taken before any system file lock.
974 * We could fix the journal code so that it can deal with
975 * this when there is no active transaction but the
976 * overhead from opening a transaction and then closing it
977 * without making any changes is actually quite small so
978 * we take that much simpler approach here.
980 CHECK(hfs_start_transaction(hfsmp
), ret
, exit
);
981 transaction_open
= true;
983 hfs_lock_sys(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
, &sys_lock
);
985 HFSPlusExtentDescriptor
*ext
= &extents
[ext_count
];
988 && (!ckr
->ckr_tentative_reservation
989 || !rl_len(ckr
->ckr_tentative_reservation
))) {
990 hfs_free_tentative(hfsmp
, &ckr
->ckr_tentative_reservation
);
994 HFSPlusExtentDescriptor extent
= {
995 .startBlock
= ckr
->ckr_preferred_next_block
,
996 .blockCount
= 1, // This is the minimum
999 hfs_alloc_extra_args_t args
= {
1000 .max_blocks
= rem_fork_blocks
,
1001 .reservation_out
= &ckr
->ckr_tentative_reservation
,
1002 .alignment
= PAGE_SIZE
/ hfsmp
->blockSize
,
1003 .alignment_offset
= (off
+ bytes_rolled
) / hfsmp
->blockSize
,
1006 ret
= hfs_block_alloc(hfsmp
, &extent
,
1007 HFS_ALLOC_TENTATIVE
| HFS_ALLOC_TRY_HARD
,
1010 if (ret
== ENOSPC
&& ext_count
) {
1011 ext
->blockCount
= 0;
1012 goto roll_what_we_have
;
1020 ext
->startBlock
= ckr
->ckr_preferred_next_block
;
1021 ext
->blockCount
= 1;
1023 hfs_alloc_extra_args_t args
= {
1024 .max_blocks
= chunk_blocks
,
1025 .reservation_in
= &ckr
->ckr_tentative_reservation
,
1026 .reservation_out
= &reservations
[ext_count
],
1027 .alignment
= PAGE_SIZE
/ hfsmp
->blockSize
,
1028 .alignment_offset
= (off
+ bytes_rolled
) / hfsmp
->blockSize
,
1031 // Lock the reservation
1032 ret
= hfs_block_alloc(hfsmp
, ext
,
1033 (HFS_ALLOC_USE_TENTATIVE
1034 | HFS_ALLOC_LOCKED
), &args
);
1036 if (ret
== ENOSPC
&& ext_count
) {
1037 // We've got something we can do
1038 ext
->blockCount
= 0;
1047 hfs_unlock_sys(&sys_lock
);
1049 transaction_open
= false;
1050 CHECK(hfs_end_transaction(hfsmp
), ret
, exit
);
1052 if (!ext
->blockCount
)
1055 const off_t ext_bytes
= hfs_blk_to_bytes(ext
->blockCount
,
1059 * We set things up here so that cp_io_params can do the
1060 * right thing for this extent. Note that we have a UPL with the
1061 * pages locked so we are the only thing that can do reads and
1062 * writes in the region that we're rolling. We set ckr_off_rsrc
1063 * to point to the *end* of the extent.
1065 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
1066 ckr
->ckr_off_rsrc
+= ext_bytes
;
1067 roll_back
|= ROLL_BACK_OFFSET
;
1068 ckr
->ckr_roll_extent
= *ext
;
1069 hfs_unlock_cp(&cp_lock
);
1071 // Write the data out
1072 CHECK(kr_page_out(vp
, upl
, upl_offset
+ bytes_rolled
,
1074 ext_bytes
), ret
, exit
);
1076 chunk_bytes
-= ext_bytes
;
1077 chunk_blocks
-= ext
->blockCount
;
1078 rem_fork_blocks
-= ext
->blockCount
;
1079 ckr
->ckr_preferred_next_block
+= ext
->blockCount
;
1080 bytes_rolled
+= ext_bytes
;
1082 } // while (chunk_bytes && ext_count < max_extents)
1085 * We must make sure the above data hits the device before we update
1086 * metadata to point to it.
1089 CHECK(hfs_flush(hfsmp
, HFS_FLUSH_BARRIER
), ret
, exit
);
1091 // Update the metadata to point at the data we just wrote
1093 // We'll be changing in-memory structures so we need this lock
1094 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
1096 CHECK(hfs_start_transaction(hfsmp
), ret
, exit
);
1097 transaction_open
= true;
1099 hfs_lock_sys(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
, &sys_lock
);
1101 // Commit the allocations
1102 hfs_alloc_extra_args_t args
= {};
1104 for (int i
= 0; i
< ext_count
; ++i
) {
1105 args
.reservation_in
= &reservations
[i
];
1107 CHECK(hfs_block_alloc(hfsmp
, &extents
[i
],
1108 HFS_ALLOC_COMMIT
, &args
), ret
, exit
);
1110 roll_back
|= 1 << i
;
1113 hfs_unlock_sys(&sys_lock
);
1115 // Keep the changes to the catalog extents here
1116 HFSPlusExtentRecord cat_extents
;
1118 // If at the end of this chunk, fix up ckr_off_rsrc
1121 * Are we at the end of the fork? It's possible that
1122 * blocks that were unallocated when we started rolling
1123 * this chunk have now been allocated.
1125 off_t fork_end
= min(pfork
->ff_size
,
1126 hfs_blk_to_bytes(ff_allocblocks(pfork
), hfsmp
->blockSize
));
1128 if (off
+ bytes_rolled
>= fork_end
) {
1129 if (!off_rsrc_is_rsrc(ckr
->ckr_off_rsrc
)
1130 && hfs_has_rsrc(cp
)) {
1131 ckr
->ckr_off_rsrc
= off_rsrc_make(0, true);
1134 * In this case, we will deal with the xattr here,
1135 * but we save the freeing up of the context until
1136 * hfs_key_roll_check where it can take the
1137 * truncate lock exclusively.
1139 ckr
->ckr_off_rsrc
= INT64_MAX
;
1144 roll_back
|= ROLL_BACK_XATTR
;
1146 CHECK(cp_setxattr(cp
, cpr
, hfsmp
, 0, XATTR_REPLACE
), ret
, exit
);
1149 * Replace the extents. This must be last because we cannot easily
1150 * roll back if anything fails after this.
1152 hfs_lock_sys(hfsmp
, SFL_EXTENTS
| SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
, &sys_lock
);
1153 CHECK(hfs_ext_replace(hfsmp
, vp
, off
/ hfsmp
->blockSize
,
1154 extents
, ext_count
, cat_extents
), ret
, exit
);
1155 hfs_unlock_sys(&sys_lock
);
1157 transaction_open
= false;
1160 CHECK(hfs_end_transaction(hfsmp
), ret
, exit
);
1162 // ** N.B. We *must* not fail after here **
1164 // Copy the catalog extents if we changed them
1165 if (cat_extents
[0].blockCount
)
1166 memcpy(pfork
->ff_data
.cf_extents
, cat_extents
, sizeof(cat_extents
));
1168 ckr
->ckr_roll_extent
= (HFSPlusExtentDescriptor
){ 0, 0 };
1170 hfs_unlock_cp(&cp_lock
);
1172 kr_upl_commit(upl
, upl_offset
, bytes_rolled
, /* last: */ !chunk_bytes
);
1179 upl_offset
+= bytes_rolled
;
1180 off
+= bytes_rolled
;
1183 // UPL will have been freed
1186 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
1188 // Ignore errors here; they shouldn't be fatal
1189 hfs_key_roll_check(cp
, false);
1195 // hfs_key_roll_check can change things so update here
1196 cpr
= cp
->c_cpentry
;
1197 ckr
= cpr
->cp_key_roll_ctx
;
1199 if (roll_back
& ROLL_BACK_EXTENTS_MASK
) {
1200 if (!ISSET(hfs_lock_flags(&sys_lock
), SFL_BITMAP
)) {
1201 hfs_lock_sys(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
,
1205 for (int i
= 0; i
< ext_count
; ++i
) {
1206 if (!ISSET(roll_back
, 1 << i
))
1209 if (BlockDeallocate(hfsmp
, extents
[i
].startBlock
,
1210 extents
[i
].blockCount
, 0)) {
1211 hfs_mark_inconsistent(hfsmp
, HFS_ROLLBACK_FAILED
);
1216 hfs_unlock_sys(&sys_lock
);
1218 if (roll_back
& ROLL_BACK_XATTR
) {
1219 hfs_assert(hfs_is_locked(&cp_lock
));
1221 if (cp_setxattr(cp
, cpr
, hfsmp
, 0, XATTR_REPLACE
))
1222 hfs_mark_inconsistent(hfsmp
, HFS_ROLLBACK_FAILED
);
1225 if (transaction_open
)
1226 hfs_end_transaction(hfsmp
);
1228 if (roll_back
& ROLL_BACK_OFFSET
) {
1229 if (!hfs_is_locked(&cp_lock
))
1230 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
1231 ckr
->ckr_off_rsrc
= roll_back_off_rsrc
;
1232 ckr
->ckr_roll_extent
= (HFSPlusExtentDescriptor
){ 0, 0 };
1235 if (marked_busy
&& ckr
) {
1236 if (!hfs_is_locked(&cp_lock
))
1237 hfs_lock_cp(cp
, HFS_EXCLUSIVE_LOCK
, &cp_lock
);
1238 ckr
->ckr_busy
= false;
1239 wakeup(&cp
->c_cpentry
);
1242 hfs_unlock_cp(&cp_lock
);
1243 hfs_unlock_trunc(&trunc_lock
);
1246 ubc_upl_abort(upl
, UPL_ABORT_FREE_ON_EMPTY
);
1248 if (ext_count
&& reservations
[ext_count
- 1]) {
1249 hfs_lock_sys(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
, &sys_lock
);
1250 for (int i
= 0; i
< ext_count
; ++i
)
1251 hfs_free_locked(hfsmp
, &reservations
[i
]);
1252 hfs_unlock_sys(&sys_lock
);
1258 if (ret
== ESTALE
) {
1259 hfs_mark_inconsistent(hfsmp
, HFS_INCONSISTENCY_DETECTED
);
1260 ret
= HFS_EINCONSISTENT
;
1264 printf ("hfs_key_roll_step }\n");
1270 // cnode must be locked (shared at least)
1271 bool hfs_is_key_rolling(cnode_t
*cp
)
1273 return (cp
->c_cpentry
&& cp
->c_cpentry
->cp_key_roll_ctx
1274 && cp
->c_cpentry
->cp_key_roll_ctx
->ckr_off_rsrc
!= INT64_MAX
);
1277 #endif // CONFIG_PROTECT