]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_key_roll.c
hfs-407.30.1.tar.gz
[apple/hfs.git] / core / hfs_key_roll.c
1 /*
2 * Copyright (c) 2014-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #if CONFIG_PROTECT
30
31 #include "hfs_key_roll.h"
32
33 #include <sys/cprotect.h>
34 #include <sys/ubc.h>
35 #include <sys/fcntl.h>
36 #include <sys/xattr.h>
37 #include <kern/sched_prim.h>
38 #include <vm/vm_pageout.h>
39 #include <pexpert/pexpert.h>
40
41 #include "hfs.h"
42 #include "hfs_extents.h"
43 #include "hfs_kdebug.h"
44
45 #define PTR_ADD(type, base, offset) (type)((uintptr_t)(base) + (offset))
46
47 #define HFS_KEY_ROLL_MAX_CHUNK_BYTES 2 * 1024 * 1024
48
49 static inline void log_error(const char *func, unsigned line, errno_t err)
50 {
51 printf("%s:%u error: %d\n", func, line, err);
52 }
53
54 #define LOG_ERROR(err) log_error(__func__, __LINE__, err)
55
56 #define CHECK(x, var, goto_label) \
57 do { \
58 var = (x); \
59 if (var) { \
60 if (var != ENOSPC) \
61 LOG_ERROR(var); \
62 goto goto_label; \
63 } \
64 } while (0)
65
66 #define min(a, b) \
67 ({ typeof (a) _a = (a); typeof (b) _b = (b); _a < _b ? _a : _b; })
68
69 // -- Some locking helpers --
70
71 /*
72 * These helpers exist to help clean up at the end of a function. A
73 * lock context structure is stored on the stack and is cleaned up
74 * when we it goes out of scope and will automatically unlock the lock
75 * if it happens to be locked. It is also safe to call the unlock
76 * functions when no lock has been taken and this is cleaner than
77 * having to declare a separate variable like have_lock and having to
78 * test for it before unlocking.
79 */
80
81 typedef struct {
82 void *object;
83 int flags;
84 } hfs_lock_ctx_t;
85
86 #define DECL_HFS_LOCK_CTX(var, unlocker) \
87 hfs_lock_ctx_t var __attribute__((cleanup(unlocker))) = {}
88
89 static inline bool hfs_is_locked(hfs_lock_ctx_t *lock_ctx)
90 {
91 return lock_ctx->object != NULL;
92 }
93
94 static inline int hfs_lock_flags(hfs_lock_ctx_t *lock_ctx)
95 {
96 return lock_ctx->flags;
97 }
98
99 static inline void hfs_lock_cp(cnode_t *cp,
100 enum hfs_locktype lock_type,
101 hfs_lock_ctx_t *lock_ctx)
102 {
103 hfs_lock_always(cp, lock_type);
104 lock_ctx->object = cp;
105 }
106
107 static inline void hfs_unlock_cp(hfs_lock_ctx_t *lock_ctx)
108 {
109 if (lock_ctx->object) {
110 hfs_unlock(lock_ctx->object);
111 lock_ctx->object = NULL;
112 }
113 }
114
115 static inline void hfs_lock_trunc(cnode_t *cp,
116 enum hfs_locktype lock_type,
117 hfs_lock_ctx_t *lock_ctx)
118 {
119 hfs_lock_truncate(cp, lock_type, 0);
120 lock_ctx->object = cp;
121 }
122
123 static inline void hfs_unlock_trunc(hfs_lock_ctx_t *lock_ctx)
124 {
125 if (lock_ctx->object) {
126 hfs_unlock_truncate(lock_ctx->object, 0);
127 lock_ctx->object = NULL;
128 }
129 }
130
131 static inline void hfs_lock_sys(struct hfsmount *hfsmp, int flags,
132 enum hfs_locktype lock_type,
133 hfs_lock_ctx_t *lock_ctx)
134 {
135 lock_ctx->flags |= hfs_systemfile_lock(hfsmp, flags, lock_type);
136 lock_ctx->object = hfsmp;
137 }
138
139 static inline void hfs_unlock_sys(hfs_lock_ctx_t *lock_ctx)
140 {
141 if (lock_ctx->object) {
142 hfs_systemfile_unlock(lock_ctx->object, lock_ctx->flags);
143 lock_ctx->object = NULL;
144 lock_ctx->flags = 0;
145 }
146 }
147
148 // --
149
150 #if DEBUG
151 static const uint32_t ckr_magic1 = 0x7b726b63;
152 static const uint32_t ckr_magic2 = 0x726b637d;
153 #endif
154
155 hfs_cp_key_roll_ctx_t *hfs_key_roll_ctx_alloc(const hfs_cp_key_roll_ctx_t *old,
156 uint16_t pers_key_len,
157 uint16_t cached_key_len,
158 cp_key_pair_t **pcpkp)
159 {
160 hfs_cp_key_roll_ctx_t *ckr;
161
162 size_t size = (sizeof(*ckr) - sizeof(cp_key_pair_t)
163 + cpkp_size(pers_key_len, cached_key_len));
164
165 #if DEBUG
166 size += 4;
167 #endif
168
169 ckr = hfs_mallocz(size);
170
171 #if DEBUG
172 ckr->ckr_magic1 = ckr_magic1;
173 *PTR_ADD(uint32_t *, ckr, size - 4) = ckr_magic2;
174 #endif
175
176 cpkp_init(&ckr->ckr_keys, pers_key_len, cached_key_len);
177
178 if (old) {
179 if (old->ckr_busy)
180 panic("hfs_key_roll_ctx_alloc: old context busy!");
181 ckr->ckr_off_rsrc = old->ckr_off_rsrc;
182 ckr->ckr_preferred_next_block = old->ckr_preferred_next_block;
183
184 cpx_copy(cpkp_cpx(&old->ckr_keys), cpkp_cpx(&ckr->ckr_keys));
185 }
186
187 *pcpkp = &ckr->ckr_keys;
188
189 return ckr;
190 }
191
192 static void hfs_key_roll_ctx_free(hfs_cp_key_roll_ctx_t *ckr)
193 {
194 size_t size = (sizeof(*ckr) - sizeof(cp_key_pair_t)
195 + cpkp_sizex(&ckr->ckr_keys));
196
197 #if DEBUG
198 size += 4;
199 #endif
200
201 hfs_free(ckr, size);
202 }
203
204 void hfs_release_key_roll_ctx(hfsmount_t *hfsmp, cprotect_t cpr)
205 {
206 hfs_cp_key_roll_ctx_t *ckr = cpr->cp_key_roll_ctx;
207
208 if (!ckr)
209 return;
210
211 cpkp_flush(&ckr->ckr_keys);
212
213 if (ckr->ckr_tentative_reservation) {
214 int lockf = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
215 hfs_free_tentative(hfsmp, &ckr->ckr_tentative_reservation);
216 hfs_systemfile_unlock(hfsmp, lockf);
217 }
218
219 cpr->cp_key_roll_ctx = NULL;
220
221 cnode_t *cp = cpr->cp_backing_cnode;
222 if (cp)
223 wakeup(&cp->c_cpentry);
224
225 #if DEBUG
226 hfs_assert(ckr->ckr_magic1 == ckr_magic1);
227 hfs_assert(*PTR_ADD(uint32_t *, ckr, sizeof(*ckr) - sizeof(cp_key_pair_t)
228 + cpkp_sizex(&ckr->ckr_keys) == ckr_magic2));
229 #endif
230
231 hfs_key_roll_ctx_free(ckr);
232 }
233
234 // Records current status into @args
235 static void hfs_key_roll_status(cnode_t *cp, hfs_key_roll_args_t *args)
236 {
237 hfsmount_t *hfsmp = VTOHFS(cp->c_vp);
238 cprotect_t const cpr = cp->c_cpentry;
239 filefork_t *dfork = cp->c_datafork;
240
241 if (!cpr || !dfork) {
242 args->key_class_generation = 0;
243 args->key_revision = 0;
244 args->key_os_version = 0;
245 args->done = -1;
246 args->total = 0;
247 return;
248 }
249
250 uint32_t total_blocks = cp->c_blocks - dfork->ff_unallocblocks;
251 if (cp->c_rsrcfork)
252 total_blocks -= cp->c_rsrcfork->ff_unallocblocks;
253 args->total = hfs_blk_to_bytes(total_blocks, hfsmp->blockSize);
254
255 args->key_class_generation = cp_get_crypto_generation(cpr->cp_pclass);
256 args->key_revision = cpr->cp_key_revision;
257 args->key_os_version = cpr->cp_key_os_version;
258
259 hfs_cp_key_roll_ctx_t *ckr = cpr->cp_key_roll_ctx;
260
261 if (!ckr)
262 args->done = -1;
263 else {
264 args->done = off_rsrc_get_off(ckr->ckr_off_rsrc);
265
266 if (off_rsrc_is_rsrc(ckr->ckr_off_rsrc)) {
267 args->done += hfs_blk_to_bytes(ff_allocblocks(dfork),
268 hfsmp->blockSize);
269 }
270 }
271 }
272
273 // The fsctl calls this
274 errno_t hfs_key_roll_op(vfs_context_t vfs_ctx, vnode_t vp,
275 hfs_key_roll_args_t *args)
276 {
277 errno_t ret;
278 cnode_t * const cp = VTOC(vp);
279 cprotect_t const cpr = cp->c_cpentry;
280 hfs_cp_key_roll_ctx_t *ckr = NULL;
281
282 DECL_HFS_LOCK_CTX(cp_lock, hfs_unlock_cp);
283 DECL_HFS_LOCK_CTX(trunc_lock, hfs_unlock_trunc);
284
285 KDBG(HFSDBG_KEY_ROLL | DBG_FUNC_START, kdebug_vnode(vp), args->operation);
286
287 if (args->api_version != HFS_KR_API_VERSION_1) {
288 ret = ENOTSUP;
289 goto exit;
290 }
291
292 if (args->operation != HFS_KR_OP_STATUS) {
293 ret = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0);
294 if (ret)
295 goto exit;
296 }
297
298 switch (args->operation) {
299 case HFS_KR_OP_START:
300 if (!cpr) {
301 ret = ENOTSUP;
302 goto exit;
303 }
304
305 /*
306 * We must hold the truncate lock exclusively in case we have to
307 * rewrap.
308 */
309 hfs_lock_trunc(cp, HFS_EXCLUSIVE_LOCK, &trunc_lock);
310 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
311
312 ckr = cpr->cp_key_roll_ctx;
313 if (ckr)
314 break;
315
316 // Only start rolling if criteria match
317 if (ISSET(args->flags, HFS_KR_MATCH_KEY_CLASS_GENERATION)
318 && (args->key_class_generation
319 != cp_get_crypto_generation(cpr->cp_pclass))) {
320 break;
321 }
322
323 if (ISSET(args->flags, HFS_KR_MATCH_KEY_REVISION)
324 && args->key_revision != cpr->cp_key_revision) {
325 break;
326 }
327
328 if (ISSET(args->flags, HFS_KR_MATCH_KEY_OS_VERSION)
329 && args->key_os_version != cpr->cp_key_os_version) {
330 break;
331 }
332
333 if (cp->c_cpentry->cp_raw_open_count > 0) {
334 // Cannot start key rolling if file is opened for raw access
335 ret = EBUSY;
336 goto exit;
337 }
338
339 ret = hfs_key_roll_start(cp);
340 if (ret)
341 goto exit;
342 break;
343
344 case HFS_KR_OP_STATUS:
345 break;
346
347 case HFS_KR_OP_STEP:
348 CHECK(hfs_key_roll_step(vfs_ctx, vp, INT64_MAX), ret, exit);
349 break;
350
351 case HFS_KR_OP_SET_INFO:
352 if (!ISSET(PE_i_can_has_kernel_configuration(), kPEICanHasDiagnosticAPI)) {
353 ret = EPERM;
354 goto exit;
355 }
356
357 if (!cpr) {
358 ret = ENOTSUP;
359 goto exit;
360 }
361
362 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
363 cpr->cp_key_revision = args->key_revision;
364 cpr->cp_key_os_version = args->key_os_version;
365 ret = cp_setxattr(cp, cpr, VTOHFS(vp), 0, 0);
366 if (ret)
367 goto exit;
368 break;
369
370 default:
371 ret = EINVAL;
372 goto exit;
373 }
374
375 if (!hfs_is_locked(&cp_lock))
376 hfs_lock_cp(cp, HFS_SHARED_LOCK, &cp_lock);
377
378 hfs_key_roll_status(cp, args);
379
380 ret = 0;
381
382 exit:
383
384 hfs_unlock_cp(&cp_lock);
385 hfs_unlock_trunc(&trunc_lock);
386
387 KDBG(HFSDBG_KEY_ROLL | DBG_FUNC_END, ret, ret ? 0 : args->done);
388
389 return ret;
390 }
391
392 /*
393 * Initiates key rolling. cnode and truncate locks *must* be held
394 * exclusively.
395 */
396 errno_t hfs_key_roll_start(cnode_t *cp)
397 {
398 errno_t ret;
399
400 hfs_assert(!cp->c_cpentry->cp_key_roll_ctx);
401
402 if (CP_CLASS(cp->c_cpentry->cp_pclass) == PROTECTION_CLASS_F)
403 return ENOTSUP;
404
405 hfsmount_t *hfsmp = VTOHFS(cp->c_vp);
406
407 if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY))
408 return EROFS;
409
410 if (!hfsmp->jnl || !S_ISREG(cp->c_mode))
411 return ENOTSUP;
412
413 cprotect_t cpr = cp->c_cpentry;
414
415 cp_key_class_t key_class = cpr->cp_pclass;
416
417 hfs_unlock(cp);
418
419 hfs_cp_key_roll_ctx_t *ckr;
420
421 int attempts = 0;
422 cp_key_revision_t rev = cp_next_key_revision(cp->c_cpentry->cp_key_revision);
423
424 for (;;) {
425 ret = cp_new(&key_class, hfsmp, cp, cp->c_mode, CP_KEYWRAP_DIFFCLASS, rev,
426 (cp_new_alloc_fn)hfs_key_roll_ctx_alloc, (void **)&ckr);
427 if (ret) {
428 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
429 goto exit;
430 }
431
432 if (key_class == cpr->cp_pclass) {
433 // The usual and easy case: the classes match
434 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
435 break;
436 }
437
438 // AKS has given us a different class, so we need to rewrap
439
440 // The truncate lock is not sufficient
441 vnode_waitforwrites(cp->c_vp, 0, 0, 0, "hfs_key_roll_start");
442
443 // And the resource fork
444 if (cp->c_rsrc_vp)
445 vnode_waitforwrites(cp->c_rsrc_vp, 0, 0, 0, "hfs_key_roll_start");
446
447 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
448
449 cp_key_class_t key_class2 = key_class;
450 cprotect_t new_entry;
451
452 ret = cp_rewrap(cp, hfsmp, &key_class2, &cpr->cp_keys, cpr,
453 (cp_new_alloc_fn)cp_entry_alloc, (void **)&new_entry);
454
455 if (ret) {
456 hfs_key_roll_ctx_free(ckr);
457 goto exit;
458 }
459
460 if (key_class2 == key_class) {
461 // Great, fix things up and we're done
462 cp_replace_entry(hfsmp, cp, new_entry);
463 cpr = new_entry;
464 cpr->cp_pclass = key_class;
465 break;
466 }
467
468 /*
469 * Oh dear, key classes don't match. Unlikely, but perhaps class
470 * generation was rolled again.
471 */
472
473 hfs_key_roll_ctx_free(ckr);
474 cp_entry_destroy(hfsmp, new_entry);
475
476 if (++attempts > 3) {
477 ret = EPERM;
478 goto exit;
479 }
480 } // for (;;)
481
482 cpr->cp_key_roll_ctx = ckr;
483 cpr->cp_key_revision = rev;
484 cpr->cp_key_os_version = cp_os_version();
485
486 return 0;
487
488 exit:
489
490 wakeup(&cp->c_cpentry);
491 return ret;
492 }
493
494 // Rolls up to @up_to
495 errno_t hfs_key_roll_up_to(vfs_context_t vfs_ctx, vnode_t vp, off_rsrc_t up_to)
496 {
497 cnode_t * const cp = VTOC(vp);
498
499 for (;;) {
500 hfs_lock_always(cp, HFS_SHARED_LOCK);
501 cprotect_t cpr = cp->c_cpentry;
502 if (!cpr)
503 break;
504
505 hfs_cp_key_roll_ctx_t *ckr = cpr->cp_key_roll_ctx;
506 if (!ckr || ckr->ckr_off_rsrc >= up_to)
507 break;
508
509 hfs_unlock(cp);
510 errno_t ret = hfs_key_roll_step(vfs_ctx, vp, up_to);
511 if (ret)
512 return ret;
513 }
514
515 hfs_unlock(cp);
516
517 return 0;
518 }
519
520 /*
521 * This function checks the size of the file and the key roll offset
522 * and updates the xattr accordingly if necessary. This is called
523 * when a file is truncated and also just before and after key
524 * rolling. cnode must be locked exclusively and might be dropped.
525 * truncate lock must be held shared or exclusive (arg indicates
526 * which). If the truncate lock is held shared, then caller must have
527 * set ckr->busy.
528 */
529 errno_t hfs_key_roll_check(cnode_t *cp, bool have_excl_trunc_lock)
530 {
531 hfs_assert(cp->c_lockowner == current_thread());
532
533 errno_t ret;
534 cprotect_t cpr;
535 hfs_cp_key_roll_ctx_t *ckr = NULL;
536 hfsmount_t *hfsmp = VTOHFS(cp->c_vp);
537 bool downgrade_trunc_lock = false;
538 off_rsrc_t orig_off_rsrc;
539
540 again:
541
542 cpr = cp->c_cpentry;
543 if (!cpr) {
544 ret = 0;
545 goto exit;
546 }
547
548 ckr = cpr->cp_key_roll_ctx;
549 if (!ckr) {
550 ret = 0;
551 goto exit;
552 }
553
554 hfs_assert(have_excl_trunc_lock || ckr->ckr_busy);
555
556 if (!cp->c_datafork) {
557 ret = 0;
558 goto exit;
559 }
560
561 orig_off_rsrc = ckr->ckr_off_rsrc;
562 off_rsrc_t new_off_rsrc = orig_off_rsrc;
563
564 if (orig_off_rsrc == INT64_MAX) {
565 /*
566 * If orig_off_rsrc == INT64_MAX it means we rolled to the end and we
567 * updated the xattr, but we haven't fixed up the in memory part of it
568 * because we didn't have the truncate lock exclusively.
569 */
570 } else if (off_rsrc_is_rsrc(orig_off_rsrc)) {
571 off_t size;
572
573 if (!cp->c_rsrcfork) {
574 size = hfs_blk_to_bytes(cp->c_blocks - cp->c_datafork->ff_blocks,
575 hfsmp->blockSize);
576 } else {
577 size = min(cp->c_rsrcfork->ff_size,
578 hfs_blk_to_bytes(ff_allocblocks(cp->c_rsrcfork),
579 hfsmp->blockSize));
580 }
581
582 if (off_rsrc_get_off(orig_off_rsrc) >= size)
583 new_off_rsrc = INT64_MAX;
584 } else {
585 off_t size = min(cp->c_datafork->ff_size,
586 hfs_blk_to_bytes(ff_allocblocks(cp->c_datafork),
587 hfsmp->blockSize));
588
589 if (off_rsrc_get_off(orig_off_rsrc) >= size) {
590 new_off_rsrc = hfs_has_rsrc(cp) ? off_rsrc_make(0, true) : INT64_MAX;
591 }
592 }
593
594 // Should we delete roll information?
595 if (new_off_rsrc == INT64_MAX) {
596 /*
597 * If we're deleting the roll information, we need the truncate lock
598 * exclusively to flush out readers and sync writers and
599 * vnode_waitforwrites to flush out async writers.
600 */
601 if (!have_excl_trunc_lock) {
602 ckr->ckr_busy = false;
603 hfs_unlock(cp);
604
605 if (!hfs_truncate_lock_upgrade(cp))
606 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, 0);
607 have_excl_trunc_lock = true;
608 downgrade_trunc_lock = true;
609 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
610
611 // Things may have changed, go around again
612 goto again;
613 }
614
615 // We hold the truncate lock exclusively so vnodes cannot be recycled here
616 vnode_waitforwrites(cp->c_vp, 0, 0, 0, "hfs_key_roll_check_size");
617 if (cp->c_rsrc_vp) {
618 vnode_waitforwrites(cp->c_rsrc_vp, 0, 0,
619 0, "hfs_key_roll_check_size");
620 }
621
622 // It's now safe to copy the keys and free the context
623 if (!cpkp_can_copy(&ckr->ckr_keys, &cpr->cp_keys)) {
624 cprotect_t new_cpr = cp_entry_alloc(cpr,
625 ckr->ckr_keys.cpkp_max_pers_key_len,
626 CP_MAX_CACHEBUFLEN, NULL);
627 cpkp_copy(&ckr->ckr_keys, &new_cpr->cp_keys);
628 cp_replace_entry(hfsmp, cp, new_cpr);
629 cpr = new_cpr;
630 } else {
631 cpkp_copy(&ckr->ckr_keys, &cpr->cp_keys);
632 hfs_release_key_roll_ctx(hfsmp, cpr);
633 }
634 ckr = NULL;
635 }
636
637 if (new_off_rsrc != orig_off_rsrc) {
638 // Update the xattr
639
640 if (ckr)
641 ckr->ckr_off_rsrc = new_off_rsrc;
642 ret = cp_setxattr(cp, cpr, hfsmp, 0, XATTR_REPLACE);
643 if (ret) {
644 if (ckr)
645 ckr->ckr_off_rsrc = orig_off_rsrc;
646 goto exit;
647 }
648 }
649
650 ret = 0;
651
652 exit:
653
654 if (downgrade_trunc_lock) {
655 if (ckr)
656 ckr->ckr_busy = true;
657 hfs_truncate_lock_downgrade(cp);
658 }
659
660 return ret;
661 }
662
663 /*
664 * We need to wrap the UPL routines because we will be dealing with
665 * allocation blocks and the UPL routines need to be page aligned.
666 */
667
668 static errno_t kr_upl_create(vnode_t vp, off_t offset, int len,
669 upl_offset_t *start_upl_offset,
670 upl_t *upl, upl_page_info_t **pl)
671 {
672 // Round parameters to page size
673 const int rounding = offset & PAGE_MASK;
674
675 *start_upl_offset = rounding;
676
677 offset -= rounding;
678 len += rounding;
679
680 // Don't go beyond end of file
681 off_t max = VTOF(vp)->ff_size - offset;
682 if (len > max)
683 len = max;
684
685 len = round_page_32(len);
686
687 return mach_to_bsd_errno(ubc_create_upl(vp, offset, len, upl, pl,
688 UPL_CLEAN_IN_PLACE | UPL_SET_LITE));
689 }
690
691 static errno_t kr_page_out(vnode_t vp, upl_t upl, upl_offset_t upl_offset,
692 off_t fork_offset, int len)
693 {
694 const int rounding = upl_offset & PAGE_MASK;
695
696 upl_offset -= rounding;
697 fork_offset -= rounding;
698 len += rounding;
699
700 const off_t fork_size = VTOF(vp)->ff_size;
701 if (fork_offset + len > fork_size)
702 len = fork_size - fork_offset;
703
704 len = round_page_32(len);
705
706 return cluster_pageout(vp, upl, upl_offset, fork_offset, len,
707 fork_size, UPL_IOSYNC | UPL_NOCOMMIT);
708 }
709
710 static void kr_upl_commit(upl_t upl, upl_offset_t upl_offset, int len, bool last)
711 {
712 if (!upl)
713 return;
714
715 const int rounding = upl_offset & PAGE_MASK;
716
717 upl_offset -= rounding;
718 len += rounding;
719
720 /*
721 * If not last we cannot commit the partial page yet so we round
722 * down.
723 */
724 if (last)
725 len = upl_get_size(upl) - upl_offset;
726 else
727 len = trunc_page_32(len);
728
729 /*
730 * This should send pages that were absent onto the speculative
731 * queue and because we passed in UPL_CLEAN_IN_PLACE when we
732 * created the UPL, dirty pages will end up clean.
733 */
734 __unused errno_t err
735 = ubc_upl_commit_range(upl, upl_offset, len,
736 UPL_COMMIT_FREE_ON_EMPTY
737 | UPL_COMMIT_SPECULATE);
738
739 hfs_assert(!err);
740
741 }
742
743 // Rolls 1 chunk if before @limit
744 errno_t hfs_key_roll_step(__unused vfs_context_t vfs_ctx, vnode_t vp, off_rsrc_t limit)
745 {
746 int ret = EIO;
747 cnode_t * const cp = VTOC(vp);
748 cprotect_t cpr = NULL;
749 hfsmount_t * const hfsmp = VTOHFS(vp);
750 upl_t upl = NULL;
751 bool transaction_open = false;
752 bool need_put = false;
753 bool marked_busy = false;
754 hfs_cp_key_roll_ctx_t *ckr = NULL;
755 filefork_t *pfork = NULL;
756 off_rsrc_t roll_back_off_rsrc = 0;
757 int ext_count = 0;
758 const int max_extents = 8; // Change mask below if this changes
759 enum {
760 ROLL_BACK_EXTENTS_MASK = 0x00ff,
761 ROLL_BACK_XATTR = 0x0100,
762 ROLL_BACK_OFFSET = 0x0200,
763 };
764 int roll_back = 0;
765
766 #if 0
767 printf ("{ hfs_key_roll_step\n");
768 #endif
769
770 if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY))
771 return EROFS;
772
773 HFSPlusExtentDescriptor extents[max_extents];
774 struct rl_entry *reservations[max_extents];
775
776 DECL_HFS_LOCK_CTX(trunc_lock, hfs_unlock_trunc);
777 DECL_HFS_LOCK_CTX(cp_lock, hfs_unlock_cp);
778 DECL_HFS_LOCK_CTX(sys_lock, hfs_unlock_sys);
779
780 for (;;) {
781 hfs_lock_trunc(cp, HFS_SHARED_LOCK, &trunc_lock);
782 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
783
784 cpr = cp->c_cpentry;
785 if (!cpr) {
786 // File is not protected
787 ret = 0;
788 goto exit;
789 }
790
791 ckr = cpr->cp_key_roll_ctx;
792 if (!ckr) {
793 // Rolling was finished by something else
794 ret = 0;
795 goto exit;
796 }
797
798 if (!ckr->ckr_busy)
799 break;
800
801 // Something else is rolling, wait until they've finished
802 assert_wait(&cp->c_cpentry, THREAD_ABORTSAFE);
803 hfs_unlock_cp(&cp_lock);
804 hfs_unlock_trunc(&trunc_lock);
805
806 if (msleep(NULL, NULL, PINOD | PCATCH,
807 "hfs_key_roll", NULL) == EINTR) {
808 ret = EINTR;
809 goto exit;
810 }
811 }
812
813 ckr->ckr_busy = true;
814 marked_busy = true;
815
816 CHECK(hfs_key_roll_check(cp, false), ret, exit);
817
818 // hfs_key_roll_check can change things
819 cpr = cp->c_cpentry;
820 ckr = cpr->cp_key_roll_ctx;
821
822 if (!ckr) {
823 ret = 0;
824 goto exit;
825 }
826
827 if (ckr->ckr_off_rsrc >= limit) {
828 ret = 0;
829 goto exit;
830 }
831
832 // Early check for no space. We don't dip into the reserve pool.
833 if (!hfs_freeblks(hfsmp, true)) {
834 ret = ENOSPC;
835 goto exit;
836 }
837
838 if (off_rsrc_is_rsrc(ckr->ckr_off_rsrc)) {
839 if (!VNODE_IS_RSRC(vp)) {
840 /*
841 * We've called hfs_key_roll_check so there's no way we should get
842 * ENOENT here.
843 */
844 vnode_t rvp;
845 CHECK(hfs_vgetrsrc(hfsmp, vp, &rvp), ret, exit);
846 need_put = true;
847 vp = rvp;
848 }
849 pfork = cp->c_rsrcfork;
850 } else {
851 if (VNODE_IS_RSRC(vp)) {
852 CHECK(vnode_get(cp->c_vp), ret, exit);
853 vp = cp->c_vp;
854 need_put = true;
855 }
856 pfork = cp->c_datafork;
857 }
858
859 hfs_unlock_cp(&cp_lock);
860
861 // Get total blocks in fork
862 const uint32_t fork_blocks = min(howmany(pfork->ff_size,
863 hfsmp->blockSize),
864 ff_allocblocks(pfork));
865
866 off_t off = off_rsrc_get_off(ckr->ckr_off_rsrc);
867 hfs_assert(!(off % hfsmp->blockSize));
868
869 uint32_t block = off / hfsmp->blockSize;
870
871 // Figure out remaining fork blocks
872 uint32_t rem_fork_blocks;
873 if (fork_blocks < block)
874 rem_fork_blocks = 0;
875 else
876 rem_fork_blocks = fork_blocks - block;
877
878 uint32_t chunk_blocks = min(rem_fork_blocks,
879 HFS_KEY_ROLL_MAX_CHUNK_BYTES / hfsmp->blockSize);
880
881 off_t chunk_bytes = chunk_blocks * hfsmp->blockSize;
882 upl_offset_t upl_offset = 0;
883
884 if (chunk_bytes) {
885 if (!ckr->ckr_preferred_next_block && off) {
886 /*
887 * Here we fix up ckr_preferred_next_block. This can
888 * happen when we rolled part of a file, then rebooted.
889 * We want to try and allocate from where we left off.
890 */
891 hfs_ext_iter_t *iter;
892
893 iter = hfs_malloc(sizeof(*iter));
894
895 hfs_lock_sys(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK, &sys_lock);
896
897 // Errors are not fatal here
898 if (!hfs_ext_find(vp, off - 1, iter)) {
899 ckr->ckr_preferred_next_block = (iter->group[iter->ndx].startBlock
900 + off / hfsmp->blockSize
901 - iter->file_block);
902 }
903
904 hfs_unlock_sys(&sys_lock);
905
906 hfs_free(iter, sizeof(*iter));
907 }
908
909 // We need to wait for outstanding direct reads to be issued
910 cl_direct_read_lock_t *lck = cluster_lock_direct_read(vp, LCK_RW_TYPE_EXCLUSIVE);
911
912 upl_page_info_t *pl;
913 ret = kr_upl_create(vp, off, chunk_bytes, &upl_offset, &upl, &pl);
914
915 // We have the pages locked now so it's safe to...
916 cluster_unlock_direct_read(lck);
917
918 if (ret) {
919 LOG_ERROR(ret);
920 goto exit;
921 }
922
923 int page_count = upl_get_size(upl) >> PAGE_SHIFT;
924 int page_ndx = 0;
925
926 // Page everything in
927 for (;;) {
928 while (page_ndx < page_count && upl_valid_page(pl, page_ndx))
929 ++page_ndx;
930
931 if (page_ndx >= page_count)
932 break;
933
934 const int page_group_start = page_ndx;
935
936 do {
937 ++page_ndx;
938 } while (page_ndx < page_count && !upl_valid_page(pl, page_ndx));
939
940 const upl_offset_t start = page_group_start << PAGE_SHIFT;
941
942 CHECK(cluster_pagein(vp, upl, start,
943 off - upl_offset + start,
944 (page_ndx - page_group_start) << PAGE_SHIFT,
945 pfork->ff_size,
946 UPL_IOSYNC | UPL_NOCOMMIT), ret, exit);
947 }
948 }
949
950 bool tried_hard = false;
951
952 /*
953 * For each iteration of this loop, we roll up to @max_extents
954 * extents and update the metadata for those extents (one
955 * transaction per iteration.)
956 */
957 for (;;) {
958 /*
959 * This is the number of bytes rolled for the current
960 * iteration of the containing loop.
961 */
962 off_t bytes_rolled = 0;
963
964 roll_back_off_rsrc = ckr->ckr_off_rsrc;
965 ext_count = 0;
966
967 // Allocate and write out up to @max_extents extents
968 while (chunk_bytes && ext_count < max_extents) {
969 /*
970 * We're not making any on disk changes here but
971 * hfs_block_alloc needs to ask the journal about pending
972 * trims and for that it needs the journal lock and the
973 * journal lock must be taken before any system file lock.
974 * We could fix the journal code so that it can deal with
975 * this when there is no active transaction but the
976 * overhead from opening a transaction and then closing it
977 * without making any changes is actually quite small so
978 * we take that much simpler approach here.
979 */
980 CHECK(hfs_start_transaction(hfsmp), ret, exit);
981 transaction_open = true;
982
983 hfs_lock_sys(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK, &sys_lock);
984
985 HFSPlusExtentDescriptor *ext = &extents[ext_count];
986
987 if (!tried_hard
988 && (!ckr->ckr_tentative_reservation
989 || !rl_len(ckr->ckr_tentative_reservation))) {
990 hfs_free_tentative(hfsmp, &ckr->ckr_tentative_reservation);
991
992 tried_hard = true;
993
994 HFSPlusExtentDescriptor extent = {
995 .startBlock = ckr->ckr_preferred_next_block,
996 .blockCount = 1, // This is the minimum
997 };
998
999 hfs_alloc_extra_args_t args = {
1000 .max_blocks = rem_fork_blocks,
1001 .reservation_out = &ckr->ckr_tentative_reservation,
1002 .alignment = PAGE_SIZE / hfsmp->blockSize,
1003 .alignment_offset = (off + bytes_rolled) / hfsmp->blockSize,
1004 };
1005
1006 ret = hfs_block_alloc(hfsmp, &extent,
1007 HFS_ALLOC_TENTATIVE | HFS_ALLOC_TRY_HARD,
1008 &args);
1009
1010 if (ret == ENOSPC && ext_count) {
1011 ext->blockCount = 0;
1012 goto roll_what_we_have;
1013 } else if (ret) {
1014 if (ret != ENOSPC)
1015 LOG_ERROR(ret);
1016 goto exit;
1017 }
1018 }
1019
1020 ext->startBlock = ckr->ckr_preferred_next_block;
1021 ext->blockCount = 1;
1022
1023 hfs_alloc_extra_args_t args = {
1024 .max_blocks = chunk_blocks,
1025 .reservation_in = &ckr->ckr_tentative_reservation,
1026 .reservation_out = &reservations[ext_count],
1027 .alignment = PAGE_SIZE / hfsmp->blockSize,
1028 .alignment_offset = (off + bytes_rolled) / hfsmp->blockSize,
1029 };
1030
1031 // Lock the reservation
1032 ret = hfs_block_alloc(hfsmp, ext,
1033 (HFS_ALLOC_USE_TENTATIVE
1034 | HFS_ALLOC_LOCKED), &args);
1035
1036 if (ret == ENOSPC && ext_count) {
1037 // We've got something we can do
1038 ext->blockCount = 0;
1039 } else if (ret) {
1040 if (ret != ENOSPC)
1041 LOG_ERROR(ret);
1042 goto exit;
1043 }
1044
1045 roll_what_we_have:
1046
1047 hfs_unlock_sys(&sys_lock);
1048
1049 transaction_open = false;
1050 CHECK(hfs_end_transaction(hfsmp), ret, exit);
1051
1052 if (!ext->blockCount)
1053 break;
1054
1055 const off_t ext_bytes = hfs_blk_to_bytes(ext->blockCount,
1056 hfsmp->blockSize);
1057
1058 /*
1059 * We set things up here so that cp_io_params can do the
1060 * right thing for this extent. Note that we have a UPL with the
1061 * pages locked so we are the only thing that can do reads and
1062 * writes in the region that we're rolling. We set ckr_off_rsrc
1063 * to point to the *end* of the extent.
1064 */
1065 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
1066 ckr->ckr_off_rsrc += ext_bytes;
1067 roll_back |= ROLL_BACK_OFFSET;
1068 ckr->ckr_roll_extent = *ext;
1069 hfs_unlock_cp(&cp_lock);
1070
1071 // Write the data out
1072 CHECK(kr_page_out(vp, upl, upl_offset + bytes_rolled,
1073 off + bytes_rolled,
1074 ext_bytes), ret, exit);
1075
1076 chunk_bytes -= ext_bytes;
1077 chunk_blocks -= ext->blockCount;
1078 rem_fork_blocks -= ext->blockCount;
1079 ckr->ckr_preferred_next_block += ext->blockCount;
1080 bytes_rolled += ext_bytes;
1081 ++ext_count;
1082 } // while (chunk_bytes && ext_count < max_extents)
1083
1084 /*
1085 * We must make sure the above data hits the device before we update
1086 * metadata to point to it.
1087 */
1088 if (bytes_rolled)
1089 CHECK(hfs_flush(hfsmp, HFS_FLUSH_BARRIER), ret, exit);
1090
1091 // Update the metadata to point at the data we just wrote
1092
1093 // We'll be changing in-memory structures so we need this lock
1094 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
1095
1096 CHECK(hfs_start_transaction(hfsmp), ret, exit);
1097 transaction_open = true;
1098
1099 hfs_lock_sys(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK, &sys_lock);
1100
1101 // Commit the allocations
1102 hfs_alloc_extra_args_t args = {};
1103
1104 for (int i = 0; i < ext_count; ++i) {
1105 args.reservation_in = &reservations[i];
1106
1107 CHECK(hfs_block_alloc(hfsmp, &extents[i],
1108 HFS_ALLOC_COMMIT, &args), ret, exit);
1109
1110 roll_back |= 1 << i;
1111 }
1112
1113 hfs_unlock_sys(&sys_lock);
1114
1115 // Keep the changes to the catalog extents here
1116 HFSPlusExtentRecord cat_extents;
1117
1118 // If at the end of this chunk, fix up ckr_off_rsrc
1119 if (!chunk_bytes) {
1120 /*
1121 * Are we at the end of the fork? It's possible that
1122 * blocks that were unallocated when we started rolling
1123 * this chunk have now been allocated.
1124 */
1125 off_t fork_end = min(pfork->ff_size,
1126 hfs_blk_to_bytes(ff_allocblocks(pfork), hfsmp->blockSize));
1127
1128 if (off + bytes_rolled >= fork_end) {
1129 if (!off_rsrc_is_rsrc(ckr->ckr_off_rsrc)
1130 && hfs_has_rsrc(cp)) {
1131 ckr->ckr_off_rsrc = off_rsrc_make(0, true);
1132 } else {
1133 /*
1134 * In this case, we will deal with the xattr here,
1135 * but we save the freeing up of the context until
1136 * hfs_key_roll_check where it can take the
1137 * truncate lock exclusively.
1138 */
1139 ckr->ckr_off_rsrc = INT64_MAX;
1140 }
1141 }
1142 }
1143
1144 roll_back |= ROLL_BACK_XATTR;
1145
1146 CHECK(cp_setxattr(cp, cpr, hfsmp, 0, XATTR_REPLACE), ret, exit);
1147
1148 /*
1149 * Replace the extents. This must be last because we cannot easily
1150 * roll back if anything fails after this.
1151 */
1152 hfs_lock_sys(hfsmp, SFL_EXTENTS | SFL_CATALOG, HFS_EXCLUSIVE_LOCK, &sys_lock);
1153 CHECK(hfs_ext_replace(hfsmp, vp, off / hfsmp->blockSize,
1154 extents, ext_count, cat_extents), ret, exit);
1155 hfs_unlock_sys(&sys_lock);
1156
1157 transaction_open = false;
1158 roll_back = 0;
1159
1160 CHECK(hfs_end_transaction(hfsmp), ret, exit);
1161
1162 // ** N.B. We *must* not fail after here **
1163
1164 // Copy the catalog extents if we changed them
1165 if (cat_extents[0].blockCount)
1166 memcpy(pfork->ff_data.cf_extents, cat_extents, sizeof(cat_extents));
1167
1168 ckr->ckr_roll_extent = (HFSPlusExtentDescriptor){ 0, 0 };
1169
1170 hfs_unlock_cp(&cp_lock);
1171
1172 kr_upl_commit(upl, upl_offset, bytes_rolled, /* last: */ !chunk_bytes);
1173
1174 if (!chunk_bytes) {
1175 // We're done
1176 break;
1177 }
1178
1179 upl_offset += bytes_rolled;
1180 off += bytes_rolled;
1181 } // for (;;)
1182
1183 // UPL will have been freed
1184 upl = NULL;
1185
1186 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
1187
1188 // Ignore errors here; they shouldn't be fatal
1189 hfs_key_roll_check(cp, false);
1190
1191 ret = 0;
1192
1193 exit:
1194
1195 // hfs_key_roll_check can change things so update here
1196 cpr = cp->c_cpentry;
1197 ckr = cpr->cp_key_roll_ctx;
1198
1199 if (roll_back & ROLL_BACK_EXTENTS_MASK) {
1200 if (!ISSET(hfs_lock_flags(&sys_lock), SFL_BITMAP)) {
1201 hfs_lock_sys(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK,
1202 &sys_lock);
1203 }
1204
1205 for (int i = 0; i < ext_count; ++i) {
1206 if (!ISSET(roll_back, 1 << i))
1207 continue;
1208
1209 if (BlockDeallocate(hfsmp, extents[i].startBlock,
1210 extents[i].blockCount, 0)) {
1211 hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
1212 }
1213 }
1214 }
1215
1216 hfs_unlock_sys(&sys_lock);
1217
1218 if (roll_back & ROLL_BACK_XATTR) {
1219 hfs_assert(hfs_is_locked(&cp_lock));
1220
1221 if (cp_setxattr(cp, cpr, hfsmp, 0, XATTR_REPLACE))
1222 hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
1223 }
1224
1225 if (transaction_open)
1226 hfs_end_transaction(hfsmp);
1227
1228 if (roll_back & ROLL_BACK_OFFSET) {
1229 if (!hfs_is_locked(&cp_lock))
1230 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
1231 ckr->ckr_off_rsrc = roll_back_off_rsrc;
1232 ckr->ckr_roll_extent = (HFSPlusExtentDescriptor){ 0, 0 };
1233 }
1234
1235 if (marked_busy && ckr) {
1236 if (!hfs_is_locked(&cp_lock))
1237 hfs_lock_cp(cp, HFS_EXCLUSIVE_LOCK, &cp_lock);
1238 ckr->ckr_busy = false;
1239 wakeup(&cp->c_cpentry);
1240 }
1241
1242 hfs_unlock_cp(&cp_lock);
1243 hfs_unlock_trunc(&trunc_lock);
1244
1245 if (upl)
1246 ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY);
1247
1248 if (ext_count && reservations[ext_count - 1]) {
1249 hfs_lock_sys(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK, &sys_lock);
1250 for (int i = 0; i < ext_count; ++i)
1251 hfs_free_locked(hfsmp, &reservations[i]);
1252 hfs_unlock_sys(&sys_lock);
1253 }
1254
1255 if (need_put)
1256 vnode_put(vp);
1257
1258 if (ret == ESTALE) {
1259 hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
1260 ret = HFS_EINCONSISTENT;
1261 }
1262
1263 #if 0
1264 printf ("hfs_key_roll_step }\n");
1265 #endif
1266
1267 return ret;
1268 }
1269
1270 // cnode must be locked (shared at least)
1271 bool hfs_is_key_rolling(cnode_t *cp)
1272 {
1273 return (cp->c_cpentry && cp->c_cpentry->cp_key_roll_ctx
1274 && cp->c_cpentry->cp_key_roll_ctx->ckr_off_rsrc != INT64_MAX);
1275 }
1276
1277 #endif // CONFIG_PROTECT