]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_shared_region.c
64c6aab05a0150719114501e499b2d6acffb55ad
[apple/xnu.git] / osfmk / vm / vm_shared_region.c
1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30 /*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78 /*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101 #include <debug.h>
102
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106
107 #include <mach/mach_vm.h>
108
109 #include <vm/vm_map.h>
110 #include <vm/vm_shared_region.h>
111
112 #include <vm/vm_protos.h>
113
114 #include <machine/commpage.h>
115 #include <machine/cpu_capabilities.h>
116 #include <sys/random.h>
117
118 #if defined (__arm__) || defined(__arm64__)
119 #include <arm/cpu_data_internal.h>
120 #include <arm/misc_protos.h>
121 #endif
122
123 /*
124 * the following codes are used in the subclass
125 * of the DBG_MACH_SHAREDREGION class
126 */
127 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
128
129 #if __has_feature(ptrauth_calls)
130 #include <ptrauth.h>
131 #endif /* __has_feature(ptrauth_calls) */
132
133 /* "dyld" uses this to figure out what the kernel supports */
134 int shared_region_version = 3;
135
136 /* trace level, output is sent to the system log file */
137 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
138
139 /* should local (non-chroot) shared regions persist when no task uses them ? */
140 int shared_region_persistence = 0; /* no by default */
141
142
143 /* delay in seconds before reclaiming an unused shared region */
144 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
145
146 struct vm_shared_region *init_task_shared_region = NULL;
147
148 #ifndef CONFIG_EMBEDDED
149 /*
150 * Only one cache gets to slide on Desktop, since we can't
151 * tear down slide info properly today and the desktop actually
152 * produces lots of shared caches.
153 */
154 boolean_t shared_region_completed_slide = FALSE;
155 #endif
156
157 /* this lock protects all the shared region data structures */
158 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
159 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
160
161 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
162 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
163 #define vm_shared_region_sleep(event, interruptible) \
164 lck_mtx_sleep(&vm_shared_region_lock, \
165 LCK_SLEEP_DEFAULT, \
166 (event_t) (event), \
167 (interruptible))
168
169 /* the list of currently available shared regions (one per environment) */
170 queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
171 int vm_shared_region_count = 0;
172 int vm_shared_region_peak = 0;
173
174 /*
175 * the number of times an event has forced the recalculation of the reslide
176 * shared region slide.
177 */
178 #if __has_feature(ptrauth_calls)
179 int vm_shared_region_reslide_count = 0;
180 #endif /* __has_feature(ptrauth_calls) */
181
182 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
183 static vm_shared_region_t vm_shared_region_create(
184 void *root_dir,
185 cpu_type_t cputype,
186 cpu_subtype_t cpu_subtype,
187 boolean_t is_64bit,
188 boolean_t reslide);
189 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
190
191 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
192 static void vm_shared_region_timeout(thread_call_param_t param0,
193 thread_call_param_t param1);
194 static kern_return_t vm_shared_region_slide_mapping(
195 vm_shared_region_t sr,
196 user_addr_t slide_info_addr,
197 mach_vm_size_t slide_info_size,
198 mach_vm_offset_t start,
199 mach_vm_size_t size,
200 mach_vm_offset_t slid_mapping,
201 uint32_t slide,
202 memory_object_control_t,
203 vm_prot_t prot); /* forward */
204
205 static int __commpage_setup = 0;
206 #if !CONFIG_EMBEDDED
207 static int __system_power_source = 1; /* init to extrnal power source */
208 static void post_sys_powersource_internal(int i, int internal);
209 #endif
210
211 extern u_int32_t random(void);
212
213 /*
214 * Retrieve a task's shared region and grab an extra reference to
215 * make sure it doesn't disappear while the caller is using it.
216 * The caller is responsible for consuming that extra reference if
217 * necessary.
218 */
219 vm_shared_region_t
220 vm_shared_region_get(
221 task_t task)
222 {
223 vm_shared_region_t shared_region;
224
225 SHARED_REGION_TRACE_DEBUG(
226 ("shared_region: -> get(%p)\n",
227 (void *)VM_KERNEL_ADDRPERM(task)));
228
229 task_lock(task);
230 vm_shared_region_lock();
231 shared_region = task->shared_region;
232 if (shared_region) {
233 assert(shared_region->sr_ref_count > 0);
234 vm_shared_region_reference_locked(shared_region);
235 }
236 vm_shared_region_unlock();
237 task_unlock(task);
238
239 SHARED_REGION_TRACE_DEBUG(
240 ("shared_region: get(%p) <- %p\n",
241 (void *)VM_KERNEL_ADDRPERM(task),
242 (void *)VM_KERNEL_ADDRPERM(shared_region)));
243
244 return shared_region;
245 }
246
247 /*
248 * Get the base address of the shared region.
249 * That's the address at which it needs to be mapped in the process's address
250 * space.
251 * No need to lock since this data is set when the shared region is
252 * created and is never modified after that. The caller must hold an extra
253 * reference on the shared region to prevent it from being destroyed.
254 */
255 mach_vm_offset_t
256 vm_shared_region_base_address(
257 vm_shared_region_t shared_region)
258 {
259 SHARED_REGION_TRACE_DEBUG(
260 ("shared_region: -> base_address(%p)\n",
261 (void *)VM_KERNEL_ADDRPERM(shared_region)));
262 assert(shared_region->sr_ref_count > 1);
263 SHARED_REGION_TRACE_DEBUG(
264 ("shared_region: base_address(%p) <- 0x%llx\n",
265 (void *)VM_KERNEL_ADDRPERM(shared_region),
266 (long long)shared_region->sr_base_address));
267 return shared_region->sr_base_address;
268 }
269
270 /*
271 * Get the size of the shared region.
272 * That's the size that needs to be mapped in the process's address
273 * space.
274 * No need to lock since this data is set when the shared region is
275 * created and is never modified after that. The caller must hold an extra
276 * reference on the shared region to prevent it from being destroyed.
277 */
278 mach_vm_size_t
279 vm_shared_region_size(
280 vm_shared_region_t shared_region)
281 {
282 SHARED_REGION_TRACE_DEBUG(
283 ("shared_region: -> size(%p)\n",
284 (void *)VM_KERNEL_ADDRPERM(shared_region)));
285 assert(shared_region->sr_ref_count > 1);
286 SHARED_REGION_TRACE_DEBUG(
287 ("shared_region: size(%p) <- 0x%llx\n",
288 (void *)VM_KERNEL_ADDRPERM(shared_region),
289 (long long)shared_region->sr_size));
290 return shared_region->sr_size;
291 }
292
293 /*
294 * Get the memory entry of the shared region.
295 * That's the "memory object" that needs to be mapped in the process's address
296 * space.
297 * No need to lock since this data is set when the shared region is
298 * created and is never modified after that. The caller must hold an extra
299 * reference on the shared region to prevent it from being destroyed.
300 */
301 ipc_port_t
302 vm_shared_region_mem_entry(
303 vm_shared_region_t shared_region)
304 {
305 SHARED_REGION_TRACE_DEBUG(
306 ("shared_region: -> mem_entry(%p)\n",
307 (void *)VM_KERNEL_ADDRPERM(shared_region)));
308 assert(shared_region->sr_ref_count > 1);
309 SHARED_REGION_TRACE_DEBUG(
310 ("shared_region: mem_entry(%p) <- %p\n",
311 (void *)VM_KERNEL_ADDRPERM(shared_region),
312 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_mem_entry)));
313 return shared_region->sr_mem_entry;
314 }
315
316 vm_map_t
317 vm_shared_region_vm_map(
318 vm_shared_region_t shared_region)
319 {
320 ipc_port_t sr_handle;
321 vm_named_entry_t sr_mem_entry;
322 vm_map_t sr_map;
323
324 SHARED_REGION_TRACE_DEBUG(
325 ("shared_region: -> vm_map(%p)\n",
326 (void *)VM_KERNEL_ADDRPERM(shared_region)));
327 assert(shared_region->sr_ref_count > 1);
328
329 sr_handle = shared_region->sr_mem_entry;
330 sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
331 sr_map = sr_mem_entry->backing.map;
332 assert(sr_mem_entry->is_sub_map);
333
334 SHARED_REGION_TRACE_DEBUG(
335 ("shared_region: vm_map(%p) <- %p\n",
336 (void *)VM_KERNEL_ADDRPERM(shared_region),
337 (void *)VM_KERNEL_ADDRPERM(sr_map)));
338 return sr_map;
339 }
340
341 /*
342 * Set the shared region the process should use.
343 * A NULL new shared region means that we just want to release the old
344 * shared region.
345 * The caller should already have an extra reference on the new shared region
346 * (if any). We release a reference on the old shared region (if any).
347 */
348 void
349 vm_shared_region_set(
350 task_t task,
351 vm_shared_region_t new_shared_region)
352 {
353 vm_shared_region_t old_shared_region;
354
355 SHARED_REGION_TRACE_DEBUG(
356 ("shared_region: -> set(%p, %p)\n",
357 (void *)VM_KERNEL_ADDRPERM(task),
358 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
359
360 task_lock(task);
361 vm_shared_region_lock();
362
363 old_shared_region = task->shared_region;
364 if (new_shared_region) {
365 assert(new_shared_region->sr_ref_count > 0);
366 }
367
368 task->shared_region = new_shared_region;
369
370 vm_shared_region_unlock();
371 task_unlock(task);
372
373 if (old_shared_region) {
374 assert(old_shared_region->sr_ref_count > 0);
375 vm_shared_region_deallocate(old_shared_region);
376 }
377
378 SHARED_REGION_TRACE_DEBUG(
379 ("shared_region: set(%p) <- old=%p new=%p\n",
380 (void *)VM_KERNEL_ADDRPERM(task),
381 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
382 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
383 }
384
385 /*
386 * Lookup up the shared region for the desired environment.
387 * If none is found, create a new (empty) one.
388 * Grab an extra reference on the returned shared region, to make sure
389 * it doesn't get destroyed before the caller is done with it. The caller
390 * is responsible for consuming that extra reference if necessary.
391 */
392 vm_shared_region_t
393 vm_shared_region_lookup(
394 void *root_dir,
395 cpu_type_t cputype,
396 cpu_subtype_t cpu_subtype,
397 boolean_t is_64bit,
398 boolean_t reslide)
399 {
400 vm_shared_region_t shared_region;
401 vm_shared_region_t new_shared_region;
402
403 SHARED_REGION_TRACE_DEBUG(
404 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d)\n",
405 (void *)VM_KERNEL_ADDRPERM(root_dir),
406 cputype, cpu_subtype, is_64bit, reslide));
407
408 shared_region = NULL;
409 new_shared_region = NULL;
410
411 vm_shared_region_lock();
412 for (;;) {
413 queue_iterate(&vm_shared_region_queue,
414 shared_region,
415 vm_shared_region_t,
416 sr_q) {
417 assert(shared_region->sr_ref_count > 0);
418 if (shared_region->sr_cpu_type == cputype &&
419 #if !__has_feature(ptrauth_calls) /* arm64e/arm64 use same region */
420 shared_region->sr_cpu_subtype == cpu_subtype &&
421 #endif /* !__has_feature(ptrauth_calls) */
422 shared_region->sr_root_dir == root_dir &&
423 shared_region->sr_64bit == is_64bit &&
424 #if __has_feature(ptrauth_calls)
425 shared_region->sr_reslide == reslide &&
426 #endif /* __has_feature(ptrauth_calls) */
427 !shared_region->sr_stale) {
428 /* found a match ! */
429 vm_shared_region_reference_locked(shared_region);
430 goto done;
431 }
432 }
433 if (new_shared_region == NULL) {
434 /* no match: create a new one */
435 vm_shared_region_unlock();
436 new_shared_region = vm_shared_region_create(root_dir,
437 cputype,
438 cpu_subtype,
439 is_64bit,
440 reslide);
441 /* do the lookup again, in case we lost a race */
442 vm_shared_region_lock();
443 continue;
444 }
445 /* still no match: use our new one */
446 shared_region = new_shared_region;
447 new_shared_region = NULL;
448 queue_enter(&vm_shared_region_queue,
449 shared_region,
450 vm_shared_region_t,
451 sr_q);
452 vm_shared_region_count++;
453 if (vm_shared_region_count > vm_shared_region_peak) {
454 vm_shared_region_peak = vm_shared_region_count;
455 }
456 break;
457 }
458
459 done:
460 vm_shared_region_unlock();
461
462 if (new_shared_region) {
463 /*
464 * We lost a race with someone else to create a new shared
465 * region for that environment. Get rid of our unused one.
466 */
467 assert(new_shared_region->sr_ref_count == 1);
468 new_shared_region->sr_ref_count--;
469 vm_shared_region_destroy(new_shared_region);
470 new_shared_region = NULL;
471 }
472
473 SHARED_REGION_TRACE_DEBUG(
474 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d) <- %p\n",
475 (void *)VM_KERNEL_ADDRPERM(root_dir),
476 cputype, cpu_subtype, is_64bit, reslide,
477 (void *)VM_KERNEL_ADDRPERM(shared_region)));
478
479 assert(shared_region->sr_ref_count > 0);
480 return shared_region;
481 }
482
483 /*
484 * Take an extra reference on a shared region.
485 * The vm_shared_region_lock should already be held by the caller.
486 */
487 static void
488 vm_shared_region_reference_locked(
489 vm_shared_region_t shared_region)
490 {
491 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
492
493 SHARED_REGION_TRACE_DEBUG(
494 ("shared_region: -> reference_locked(%p)\n",
495 (void *)VM_KERNEL_ADDRPERM(shared_region)));
496 assert(shared_region->sr_ref_count > 0);
497 shared_region->sr_ref_count++;
498 assert(shared_region->sr_ref_count != 0);
499
500 if (shared_region->sr_timer_call != NULL) {
501 boolean_t cancelled;
502
503 /* cancel and free any pending timeout */
504 cancelled = thread_call_cancel(shared_region->sr_timer_call);
505 if (cancelled) {
506 thread_call_free(shared_region->sr_timer_call);
507 shared_region->sr_timer_call = NULL;
508 /* release the reference held by the cancelled timer */
509 shared_region->sr_ref_count--;
510 } else {
511 /* the timer will drop the reference and free itself */
512 }
513 }
514
515 SHARED_REGION_TRACE_DEBUG(
516 ("shared_region: reference_locked(%p) <- %d\n",
517 (void *)VM_KERNEL_ADDRPERM(shared_region),
518 shared_region->sr_ref_count));
519 }
520
521 /*
522 * Take a reference on a shared region.
523 */
524 void
525 vm_shared_region_reference(vm_shared_region_t shared_region)
526 {
527 SHARED_REGION_TRACE_DEBUG(
528 ("shared_region: -> reference(%p)\n",
529 (void *)VM_KERNEL_ADDRPERM(shared_region)));
530
531 vm_shared_region_lock();
532 vm_shared_region_reference_locked(shared_region);
533 vm_shared_region_unlock();
534
535 SHARED_REGION_TRACE_DEBUG(
536 ("shared_region: reference(%p) <- %d\n",
537 (void *)VM_KERNEL_ADDRPERM(shared_region),
538 shared_region->sr_ref_count));
539 }
540
541 /*
542 * Release a reference on the shared region.
543 * Destroy it if there are no references left.
544 */
545 void
546 vm_shared_region_deallocate(
547 vm_shared_region_t shared_region)
548 {
549 SHARED_REGION_TRACE_DEBUG(
550 ("shared_region: -> deallocate(%p)\n",
551 (void *)VM_KERNEL_ADDRPERM(shared_region)));
552
553 vm_shared_region_lock();
554
555 assert(shared_region->sr_ref_count > 0);
556
557 if (shared_region->sr_root_dir == NULL) {
558 /*
559 * Local (i.e. based on the boot volume) shared regions
560 * can persist or not based on the "shared_region_persistence"
561 * sysctl.
562 * Make sure that this one complies.
563 *
564 * See comments in vm_shared_region_slide() for notes about
565 * shared regions we have slid (which are not torn down currently).
566 */
567 if (shared_region_persistence &&
568 !shared_region->sr_persists) {
569 /* make this one persistent */
570 shared_region->sr_ref_count++;
571 shared_region->sr_persists = TRUE;
572 } else if (!shared_region_persistence &&
573 shared_region->sr_persists) {
574 /* make this one no longer persistent */
575 assert(shared_region->sr_ref_count > 1);
576 shared_region->sr_ref_count--;
577 shared_region->sr_persists = FALSE;
578 }
579 }
580
581 assert(shared_region->sr_ref_count > 0);
582 shared_region->sr_ref_count--;
583 SHARED_REGION_TRACE_DEBUG(
584 ("shared_region: deallocate(%p): ref now %d\n",
585 (void *)VM_KERNEL_ADDRPERM(shared_region),
586 shared_region->sr_ref_count));
587
588 if (shared_region->sr_ref_count == 0) {
589 uint64_t deadline;
590
591 /*
592 * Even though a shared region is unused, delay a while before
593 * tearing it down, in case a new app launch can use it.
594 */
595 if (shared_region->sr_timer_call == NULL &&
596 shared_region_destroy_delay != 0 &&
597 !shared_region->sr_stale) {
598 /* hold one reference for the timer */
599 assert(!shared_region->sr_mapping_in_progress);
600 shared_region->sr_ref_count++;
601
602 /* set up the timer */
603 shared_region->sr_timer_call = thread_call_allocate(
604 (thread_call_func_t) vm_shared_region_timeout,
605 (thread_call_param_t) shared_region);
606
607 /* schedule the timer */
608 clock_interval_to_deadline(shared_region_destroy_delay,
609 NSEC_PER_SEC,
610 &deadline);
611 thread_call_enter_delayed(shared_region->sr_timer_call,
612 deadline);
613
614 SHARED_REGION_TRACE_DEBUG(
615 ("shared_region: deallocate(%p): armed timer\n",
616 (void *)VM_KERNEL_ADDRPERM(shared_region)));
617
618 vm_shared_region_unlock();
619 } else {
620 /* timer expired: let go of this shared region */
621
622 /*
623 * Remove it from the queue first, so no one can find
624 * it...
625 */
626 queue_remove(&vm_shared_region_queue,
627 shared_region,
628 vm_shared_region_t,
629 sr_q);
630 vm_shared_region_count--;
631 vm_shared_region_unlock();
632
633 /* ... and destroy it */
634 vm_shared_region_destroy(shared_region);
635 shared_region = NULL;
636 }
637 } else {
638 vm_shared_region_unlock();
639 }
640
641 SHARED_REGION_TRACE_DEBUG(
642 ("shared_region: deallocate(%p) <-\n",
643 (void *)VM_KERNEL_ADDRPERM(shared_region)));
644 }
645
646 void
647 vm_shared_region_timeout(
648 thread_call_param_t param0,
649 __unused thread_call_param_t param1)
650 {
651 vm_shared_region_t shared_region;
652
653 shared_region = (vm_shared_region_t) param0;
654
655 vm_shared_region_deallocate(shared_region);
656 }
657
658
659 /*
660 * Create a new (empty) shared region for a new environment.
661 */
662 static vm_shared_region_t
663 vm_shared_region_create(
664 void *root_dir,
665 cpu_type_t cputype,
666 cpu_subtype_t cpu_subtype,
667 boolean_t is_64bit,
668 #if !__has_feature(ptrauth_calls)
669 __unused
670 #endif /* __has_feature(ptrauth_calls) */
671 boolean_t reslide)
672 {
673 kern_return_t kr;
674 vm_named_entry_t mem_entry;
675 ipc_port_t mem_entry_port;
676 vm_shared_region_t shared_region;
677 vm_map_t sub_map;
678 mach_vm_offset_t base_address, pmap_nesting_start;
679 mach_vm_size_t size, pmap_nesting_size;
680
681 SHARED_REGION_TRACE_INFO(
682 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d)\n",
683 (void *)VM_KERNEL_ADDRPERM(root_dir),
684 cputype, cpu_subtype, is_64bit, reslide));
685
686 base_address = 0;
687 size = 0;
688 mem_entry = NULL;
689 mem_entry_port = IPC_PORT_NULL;
690 sub_map = VM_MAP_NULL;
691
692 /* create a new shared region structure... */
693 shared_region = kalloc(sizeof(*shared_region));
694 if (shared_region == NULL) {
695 SHARED_REGION_TRACE_ERROR(
696 ("shared_region: create: couldn't allocate\n"));
697 goto done;
698 }
699
700 /* figure out the correct settings for the desired environment */
701 if (is_64bit) {
702 switch (cputype) {
703 #if defined(__arm64__)
704 case CPU_TYPE_ARM64:
705 base_address = SHARED_REGION_BASE_ARM64;
706 size = SHARED_REGION_SIZE_ARM64;
707 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
708 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
709 break;
710 #elif !defined(__arm__)
711 case CPU_TYPE_I386:
712 base_address = SHARED_REGION_BASE_X86_64;
713 size = SHARED_REGION_SIZE_X86_64;
714 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
715 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
716 break;
717 case CPU_TYPE_POWERPC:
718 base_address = SHARED_REGION_BASE_PPC64;
719 size = SHARED_REGION_SIZE_PPC64;
720 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
721 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
722 break;
723 #endif
724 default:
725 SHARED_REGION_TRACE_ERROR(
726 ("shared_region: create: unknown cpu type %d\n",
727 cputype));
728 kfree(shared_region, sizeof(*shared_region));
729 shared_region = NULL;
730 goto done;
731 }
732 } else {
733 switch (cputype) {
734 #if defined(__arm__) || defined(__arm64__)
735 case CPU_TYPE_ARM:
736 base_address = SHARED_REGION_BASE_ARM;
737 size = SHARED_REGION_SIZE_ARM;
738 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
739 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
740 break;
741 #else
742 case CPU_TYPE_I386:
743 base_address = SHARED_REGION_BASE_I386;
744 size = SHARED_REGION_SIZE_I386;
745 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
746 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
747 break;
748 case CPU_TYPE_POWERPC:
749 base_address = SHARED_REGION_BASE_PPC;
750 size = SHARED_REGION_SIZE_PPC;
751 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
752 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
753 break;
754 #endif
755 default:
756 SHARED_REGION_TRACE_ERROR(
757 ("shared_region: create: unknown cpu type %d\n",
758 cputype));
759 kfree(shared_region, sizeof(*shared_region));
760 shared_region = NULL;
761 goto done;
762 }
763 }
764
765 /* create a memory entry structure and a Mach port handle */
766 kr = mach_memory_entry_allocate(&mem_entry, &mem_entry_port);
767 if (kr != KERN_SUCCESS) {
768 kfree(shared_region, sizeof(*shared_region));
769 shared_region = NULL;
770 SHARED_REGION_TRACE_ERROR(
771 ("shared_region: create: "
772 "couldn't allocate mem_entry\n"));
773 goto done;
774 }
775
776 #if defined(__arm__) || defined(__arm64__)
777 {
778 struct pmap *pmap_nested;
779 int pmap_flags = 0;
780 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
781
782
783 pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
784 if (pmap_nested != PMAP_NULL) {
785 pmap_set_nested(pmap_nested);
786 sub_map = vm_map_create(pmap_nested, 0, (vm_map_offset_t)size, TRUE);
787 #if defined(__arm64__)
788 if (is_64bit ||
789 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
790 /* enforce 16KB alignment of VM map entries */
791 vm_map_set_page_shift(sub_map,
792 SIXTEENK_PAGE_SHIFT);
793 }
794
795 #elif (__ARM_ARCH_7K__ >= 2)
796 /* enforce 16KB alignment for watch targets with new ABI */
797 vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
798 #endif /* __arm64__ */
799 } else {
800 sub_map = VM_MAP_NULL;
801 }
802 }
803 #else
804 /* create a VM sub map and its pmap */
805 sub_map = vm_map_create(pmap_create_options(NULL, 0, is_64bit), 0, size, TRUE);
806 #endif
807 if (sub_map == VM_MAP_NULL) {
808 ipc_port_release_send(mem_entry_port);
809 kfree(shared_region, sizeof(*shared_region));
810 shared_region = NULL;
811 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
812 goto done;
813 }
814
815 /* shared regions should always enforce code-signing */
816 vm_map_cs_enforcement_set(sub_map, true);
817 assert(vm_map_cs_enforcement(sub_map));
818 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
819
820 assert(!sub_map->disable_vmentry_reuse);
821 sub_map->is_nested_map = TRUE;
822
823 /* make the memory entry point to the VM sub map */
824 mem_entry->is_sub_map = TRUE;
825 mem_entry->backing.map = sub_map;
826 mem_entry->size = size;
827 mem_entry->protection = VM_PROT_ALL;
828
829 /* make the shared region point at the memory entry */
830 shared_region->sr_mem_entry = mem_entry_port;
831
832 /* fill in the shared region's environment and settings */
833 shared_region->sr_base_address = base_address;
834 shared_region->sr_size = size;
835 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
836 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
837 shared_region->sr_cpu_type = cputype;
838 shared_region->sr_cpu_subtype = cpu_subtype;
839 shared_region->sr_64bit = (uint8_t)is_64bit;
840 shared_region->sr_root_dir = root_dir;
841
842 queue_init(&shared_region->sr_q);
843 shared_region->sr_mapping_in_progress = FALSE;
844 shared_region->sr_slide_in_progress = FALSE;
845 shared_region->sr_persists = FALSE;
846 shared_region->sr_stale = FALSE;
847 shared_region->sr_timer_call = NULL;
848 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
849
850 /* grab a reference for the caller */
851 shared_region->sr_ref_count = 1;
852
853 shared_region->sr_slide = 0; /* not slid yet */
854
855 /* Initialize UUID and other metadata */
856 memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
857 shared_region->sr_uuid_copied = FALSE;
858 shared_region->sr_images_count = 0;
859 shared_region->sr_images = NULL;
860 #if __has_feature(ptrauth_calls)
861 shared_region->sr_reslide = reslide;
862 shared_region->sr_num_auth_section = 0;
863 for (uint_t i = 0; i < NUM_SR_AUTH_SECTIONS; ++i) {
864 shared_region->sr_auth_section[i] = NULL;
865 }
866 shared_region->sr_num_auth_section = 0;
867 #endif /* __has_feature(ptrauth_calls) */
868
869 done:
870 if (shared_region) {
871 SHARED_REGION_TRACE_INFO(
872 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d"
873 "base=0x%llx,size=0x%llx) <- "
874 "%p mem=(%p,%p) map=%p pmap=%p\n",
875 (void *)VM_KERNEL_ADDRPERM(root_dir),
876 cputype, cpu_subtype, is_64bit, reslide,
877 (long long)base_address,
878 (long long)size,
879 (void *)VM_KERNEL_ADDRPERM(shared_region),
880 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
881 (void *)VM_KERNEL_ADDRPERM(mem_entry),
882 (void *)VM_KERNEL_ADDRPERM(sub_map),
883 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
884 } else {
885 SHARED_REGION_TRACE_INFO(
886 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,"
887 "base=0x%llx,size=0x%llx) <- NULL",
888 (void *)VM_KERNEL_ADDRPERM(root_dir),
889 cputype, cpu_subtype, is_64bit,
890 (long long)base_address,
891 (long long)size));
892 }
893 return shared_region;
894 }
895
896 /*
897 * Destroy a now-unused shared region.
898 * The shared region is no longer in the queue and can not be looked up.
899 */
900 static void
901 vm_shared_region_destroy(
902 vm_shared_region_t shared_region)
903 {
904 vm_named_entry_t mem_entry;
905 vm_map_t map;
906
907 SHARED_REGION_TRACE_INFO(
908 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d)\n",
909 (void *)VM_KERNEL_ADDRPERM(shared_region),
910 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
911 shared_region->sr_cpu_type,
912 shared_region->sr_cpu_subtype,
913 shared_region->sr_64bit));
914
915 assert(shared_region->sr_ref_count == 0);
916 assert(!shared_region->sr_persists);
917
918 mem_entry = (vm_named_entry_t) ip_get_kobject(shared_region->sr_mem_entry);
919 assert(mem_entry->is_sub_map);
920 assert(!mem_entry->internal);
921 assert(!mem_entry->is_copy);
922 map = mem_entry->backing.map;
923
924 /*
925 * Clean up the pmap first. The virtual addresses that were
926 * entered in this possibly "nested" pmap may have different values
927 * than the VM map's min and max offsets, if the VM sub map was
928 * mapped at a non-zero offset in the processes' main VM maps, which
929 * is usually the case, so the clean-up we do in vm_map_destroy() would
930 * not be enough.
931 */
932 if (map->pmap) {
933 pmap_remove(map->pmap,
934 (vm_map_offset_t)shared_region->sr_base_address,
935 (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
936 }
937
938 /*
939 * Release our (one and only) handle on the memory entry.
940 * This will generate a no-senders notification, which will be processed
941 * by ipc_kobject_notify(), which will release the one and only
942 * reference on the memory entry and cause it to be destroyed, along
943 * with the VM sub map and its pmap.
944 */
945 mach_memory_entry_port_release(shared_region->sr_mem_entry);
946 mem_entry = NULL;
947 shared_region->sr_mem_entry = IPC_PORT_NULL;
948
949 if (shared_region->sr_timer_call) {
950 thread_call_free(shared_region->sr_timer_call);
951 }
952
953 #if __has_feature(ptrauth_calls)
954 /*
955 * Free the cached copies of slide_info for the AUTH regions.
956 */
957 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
958 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
959 if (si != NULL) {
960 vm_object_deallocate(si->si_slide_object);
961 kheap_free(KHEAP_DATA_BUFFERS, si->si_slide_info_entry, si->si_slide_info_size);
962 kfree(si, sizeof *si);
963 shared_region->sr_auth_section[i] = NULL;
964 }
965 }
966 shared_region->sr_num_auth_section = 0;
967 #endif /* __has_feature(ptrauth_calls) */
968
969 /* release the shared region structure... */
970 kfree(shared_region, sizeof(*shared_region));
971
972 SHARED_REGION_TRACE_DEBUG(
973 ("shared_region: destroy(%p) <-\n",
974 (void *)VM_KERNEL_ADDRPERM(shared_region)));
975 shared_region = NULL;
976 }
977
978 /*
979 * Gets the address of the first (in time) mapping in the shared region.
980 */
981 kern_return_t
982 vm_shared_region_start_address(
983 vm_shared_region_t shared_region,
984 mach_vm_offset_t *start_address)
985 {
986 kern_return_t kr;
987 mach_vm_offset_t sr_base_address;
988 mach_vm_offset_t sr_first_mapping;
989
990 SHARED_REGION_TRACE_DEBUG(
991 ("shared_region: -> start_address(%p)\n",
992 (void *)VM_KERNEL_ADDRPERM(shared_region)));
993 assert(shared_region->sr_ref_count > 1);
994
995 vm_shared_region_lock();
996
997 /*
998 * Wait if there's another thread establishing a mapping
999 * in this shared region right when we're looking at it.
1000 * We want a consistent view of the map...
1001 */
1002 while (shared_region->sr_mapping_in_progress) {
1003 /* wait for our turn... */
1004 assert(shared_region->sr_ref_count > 1);
1005 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1006 THREAD_UNINT);
1007 }
1008 assert(!shared_region->sr_mapping_in_progress);
1009 assert(shared_region->sr_ref_count > 1);
1010
1011 sr_base_address = shared_region->sr_base_address;
1012 sr_first_mapping = shared_region->sr_first_mapping;
1013
1014 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1015 /* shared region is empty */
1016 kr = KERN_INVALID_ADDRESS;
1017 } else {
1018 kr = KERN_SUCCESS;
1019 *start_address = sr_base_address + sr_first_mapping;
1020 }
1021
1022
1023 vm_shared_region_unlock();
1024
1025 SHARED_REGION_TRACE_DEBUG(
1026 ("shared_region: start_address(%p) <- 0x%llx\n",
1027 (void *)VM_KERNEL_ADDRPERM(shared_region),
1028 (long long)shared_region->sr_base_address));
1029
1030 return kr;
1031 }
1032
1033 /*
1034 * Look up a pre-existing mapping in shared region, for replacement.
1035 * Takes an extra object reference if found.
1036 */
1037 static kern_return_t
1038 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1039 {
1040 vm_map_entry_t found;
1041
1042 /* find the shared region's map entry to slide */
1043 vm_map_lock_read(map);
1044 if (!vm_map_lookup_entry(map, addr, &found)) {
1045 /* no mapping there */
1046 vm_map_unlock(map);
1047 return KERN_INVALID_ARGUMENT;
1048 }
1049
1050 *entry = *found;
1051 /* extra ref to keep object alive while map is unlocked */
1052 vm_object_reference(VME_OBJECT(found));
1053 vm_map_unlock_read(map);
1054 return KERN_SUCCESS;
1055 }
1056
1057 #if __has_feature(ptrauth_calls)
1058
1059 /*
1060 * Determine if this task is actually using pointer signing.
1061 */
1062 static boolean_t
1063 task_sign_pointers(task_t task)
1064 {
1065 if (task->map &&
1066 task->map->pmap &&
1067 !task->map->pmap->disable_jop) {
1068 return TRUE;
1069 }
1070 return FALSE;
1071 }
1072
1073 /*
1074 * If the shared region contains mappings that are authenticated, then
1075 * remap them into the task private map.
1076 *
1077 * Failures are possible in this routine when jetsam kills a process
1078 * just as dyld is trying to set it up. The vm_map and task shared region
1079 * info get torn down w/o waiting for this thread to finish up.
1080 */
1081 __attribute__((noinline))
1082 kern_return_t
1083 vm_shared_region_auth_remap(vm_shared_region_t sr)
1084 {
1085 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1086 task_t task = current_task();
1087 vm_shared_region_slide_info_t si;
1088 uint_t i;
1089 vm_object_t object;
1090 vm_map_t sr_map;
1091 struct vm_map_entry tmp_entry_store = {0};
1092 vm_map_entry_t tmp_entry = NULL;
1093 int vm_flags;
1094 vm_map_kernel_flags_t vmk_flags;
1095 vm_map_offset_t map_addr;
1096 kern_return_t kr = KERN_SUCCESS;
1097 boolean_t use_ptr_auth = task_sign_pointers(task);
1098
1099 /*
1100 * Don't do this more than once and avoid any race conditions in finishing it.
1101 */
1102 vm_shared_region_lock();
1103 while (sr->sr_mapping_in_progress) {
1104 /* wait for our turn... */
1105 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1106 }
1107 assert(!sr->sr_mapping_in_progress);
1108 assert(sr->sr_ref_count > 1);
1109
1110 /* Just return if already done. */
1111 if (task->shared_region_auth_remapped) {
1112 vm_shared_region_unlock();
1113 return KERN_SUCCESS;
1114 }
1115
1116 /* let others know to wait while we're working in this shared region */
1117 sr->sr_mapping_in_progress = TRUE;
1118 vm_shared_region_unlock();
1119
1120 /*
1121 * Remap any sections with pointer authentications into the private map.
1122 */
1123 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1124 si = sr->sr_auth_section[i];
1125 assert(si != NULL);
1126 assert(si->si_ptrauth);
1127
1128 /*
1129 * We have mapping that needs to be private.
1130 * Look for an existing slid mapping's pager with matching
1131 * object, offset, slide info and shared_region_id to reuse.
1132 */
1133 object = si->si_slide_object;
1134 sr_pager = shared_region_pager_match(object, si->si_start, si,
1135 use_ptr_auth ? task->jop_pid : 0);
1136 if (sr_pager == MEMORY_OBJECT_NULL) {
1137 kr = KERN_FAILURE;
1138 goto done;
1139 }
1140
1141 /*
1142 * verify matching jop_pid for this task and this pager
1143 */
1144 if (use_ptr_auth) {
1145 shared_region_pager_match_task_key(sr_pager, task);
1146 }
1147
1148 sr_map = vm_shared_region_vm_map(sr);
1149 tmp_entry = NULL;
1150
1151 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1152 if (kr != KERN_SUCCESS) {
1153 goto done;
1154 }
1155 tmp_entry = &tmp_entry_store;
1156
1157 /*
1158 * Check that the object exactly covers the region to slide.
1159 */
1160 if (VME_OFFSET(tmp_entry) != si->si_start ||
1161 tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1162 kr = KERN_FAILURE;
1163 goto done;
1164 }
1165
1166 /*
1167 * map the pager over the portion of the mapping that needs sliding
1168 */
1169 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
1170 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1171 vmk_flags.vmkf_overwrite_immutable = TRUE;
1172 map_addr = si->si_slid_address;
1173 kr = vm_map_enter_mem_object(task->map,
1174 &map_addr,
1175 si->si_end - si->si_start,
1176 (mach_vm_offset_t) 0,
1177 vm_flags,
1178 vmk_flags,
1179 VM_KERN_MEMORY_NONE,
1180 (ipc_port_t)(uintptr_t) sr_pager,
1181 0,
1182 TRUE,
1183 tmp_entry->protection,
1184 tmp_entry->max_protection,
1185 tmp_entry->inheritance);
1186 memory_object_deallocate(sr_pager);
1187 sr_pager = MEMORY_OBJECT_NULL;
1188 if (kr != KERN_SUCCESS) {
1189 goto done;
1190 }
1191 assertf(map_addr == si->si_slid_address,
1192 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1193 (uint64_t)map_addr,
1194 (uint64_t)si->si_slid_address,
1195 tmp_entry);
1196
1197 /* Drop the ref count grabbed by find_mapping_to_slide */
1198 vm_object_deallocate(VME_OBJECT(tmp_entry));
1199 tmp_entry = NULL;
1200 }
1201
1202 done:
1203 if (tmp_entry) {
1204 /* Drop the ref count grabbed by find_mapping_to_slide */
1205 vm_object_deallocate(VME_OBJECT(tmp_entry));
1206 tmp_entry = NULL;
1207 }
1208
1209 /*
1210 * Drop any extra reference to the pager in case we're quitting due to an error above.
1211 */
1212 if (sr_pager != MEMORY_OBJECT_NULL) {
1213 memory_object_deallocate(sr_pager);
1214 }
1215
1216 /*
1217 * Mark the region as having it's auth sections remapped.
1218 */
1219 vm_shared_region_lock();
1220 task->shared_region_auth_remapped = TRUE;
1221 sr->sr_mapping_in_progress = FALSE;
1222 thread_wakeup((event_t)&sr->sr_mapping_in_progress);
1223 vm_shared_region_unlock();
1224 return kr;
1225 }
1226 #endif /* __has_feature(ptrauth_calls) */
1227
1228 void
1229 vm_shared_region_undo_mappings(
1230 vm_map_t sr_map,
1231 mach_vm_offset_t sr_base_address,
1232 struct _sr_file_mappings *srf_mappings,
1233 struct _sr_file_mappings *srf_mappings_current,
1234 unsigned int srf_current_mappings_count)
1235 {
1236 unsigned int j = 0;
1237 vm_shared_region_t shared_region = NULL;
1238 boolean_t reset_shared_region_state = FALSE;
1239 struct _sr_file_mappings *srfmp;
1240 unsigned int mappings_count;
1241 struct shared_file_mapping_slide_np *mappings;
1242
1243 shared_region = vm_shared_region_get(current_task());
1244 if (shared_region == NULL) {
1245 printf("Failed to undo mappings because of NULL shared region.\n");
1246 return;
1247 }
1248
1249 if (sr_map == NULL) {
1250 ipc_port_t sr_handle;
1251 vm_named_entry_t sr_mem_entry;
1252
1253 vm_shared_region_lock();
1254 assert(shared_region->sr_ref_count > 1);
1255
1256 while (shared_region->sr_mapping_in_progress) {
1257 /* wait for our turn... */
1258 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1259 THREAD_UNINT);
1260 }
1261 assert(!shared_region->sr_mapping_in_progress);
1262 assert(shared_region->sr_ref_count > 1);
1263 /* let others know we're working in this shared region */
1264 shared_region->sr_mapping_in_progress = TRUE;
1265
1266 vm_shared_region_unlock();
1267
1268 reset_shared_region_state = TRUE;
1269
1270 /* no need to lock because this data is never modified... */
1271 sr_handle = shared_region->sr_mem_entry;
1272 sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
1273 sr_map = sr_mem_entry->backing.map;
1274 sr_base_address = shared_region->sr_base_address;
1275 }
1276 /*
1277 * Undo the mappings we've established so far.
1278 */
1279 for (srfmp = &srf_mappings[0];
1280 srfmp <= srf_mappings_current;
1281 srfmp++) {
1282 mappings = srfmp->mappings;
1283 mappings_count = srfmp->mappings_count;
1284 if (srfmp == srf_mappings_current) {
1285 mappings_count = srf_current_mappings_count;
1286 }
1287
1288 for (j = 0; j < mappings_count; j++) {
1289 kern_return_t kr2;
1290
1291 if (mappings[j].sms_size == 0) {
1292 /*
1293 * We didn't establish this
1294 * mapping, so nothing to undo.
1295 */
1296 continue;
1297 }
1298 SHARED_REGION_TRACE_INFO(
1299 ("shared_region: mapping[%d]: "
1300 "address:0x%016llx "
1301 "size:0x%016llx "
1302 "offset:0x%016llx "
1303 "maxprot:0x%x prot:0x%x: "
1304 "undoing...\n",
1305 j,
1306 (long long)mappings[j].sms_address,
1307 (long long)mappings[j].sms_size,
1308 (long long)mappings[j].sms_file_offset,
1309 mappings[j].sms_max_prot,
1310 mappings[j].sms_init_prot));
1311 kr2 = mach_vm_deallocate(
1312 sr_map,
1313 (mappings[j].sms_address -
1314 sr_base_address),
1315 mappings[j].sms_size);
1316 assert(kr2 == KERN_SUCCESS);
1317 }
1318 }
1319
1320 if (reset_shared_region_state) {
1321 vm_shared_region_lock();
1322 assert(shared_region->sr_ref_count > 1);
1323 assert(shared_region->sr_mapping_in_progress);
1324 /* we're done working on that shared region */
1325 shared_region->sr_mapping_in_progress = FALSE;
1326 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1327 vm_shared_region_unlock();
1328 reset_shared_region_state = FALSE;
1329 }
1330
1331 vm_shared_region_deallocate(shared_region);
1332 }
1333
1334 /*
1335 * For now we only expect to see at most 2 regions to relocate/authenticate
1336 * per file. One that's VM_PROT_SLIDE and one VM_PROT_SLIDE | VM_PROT_NOAUTH.
1337 */
1338 #define VMSR_NUM_SLIDES 2
1339
1340 /*
1341 * First part of vm_shared_region_map_file(). Split out to
1342 * avoid kernel stack overflow.
1343 */
1344 __attribute__((noinline))
1345 static kern_return_t
1346 vm_shared_region_map_file_setup(
1347 vm_shared_region_t shared_region,
1348 void *root_dir,
1349 int sr_file_mappings_count,
1350 struct _sr_file_mappings *sr_file_mappings,
1351 unsigned int *mappings_to_slide_cnt,
1352 struct shared_file_mapping_slide_np **mappings_to_slide,
1353 mach_vm_offset_t *slid_mappings,
1354 memory_object_control_t *slid_file_controls,
1355 mach_vm_offset_t *first_mapping,
1356 mach_vm_offset_t *file_first_mappings,
1357 mach_vm_offset_t *sfm_min_address,
1358 mach_vm_offset_t *sfm_max_address,
1359 vm_map_t *sr_map_ptr,
1360 vm_map_offset_t *lowest_unnestable_addr_ptr)
1361 {
1362 kern_return_t kr = KERN_SUCCESS;
1363 memory_object_control_t file_control;
1364 vm_object_t file_object;
1365 ipc_port_t sr_handle;
1366 vm_named_entry_t sr_mem_entry;
1367 vm_map_t sr_map;
1368 mach_vm_offset_t sr_base_address;
1369 unsigned int i = 0;
1370 mach_port_t map_port;
1371 vm_map_offset_t target_address;
1372 vm_object_t object;
1373 vm_object_size_t obj_size;
1374 vm_map_offset_t lowest_unnestable_addr = 0;
1375 vm_map_kernel_flags_t vmk_flags;
1376 mach_vm_offset_t sfm_end;
1377 uint32_t mappings_count;
1378 struct shared_file_mapping_slide_np *mappings;
1379 struct _sr_file_mappings *srfmp;
1380 unsigned int current_file_index = 0;
1381
1382 vm_shared_region_lock();
1383 assert(shared_region->sr_ref_count > 1);
1384
1385 if (shared_region->sr_root_dir != root_dir) {
1386 /*
1387 * This shared region doesn't match the current root
1388 * directory of this process. Deny the mapping to
1389 * avoid tainting the shared region with something that
1390 * doesn't quite belong into it.
1391 */
1392 vm_shared_region_unlock();
1393
1394 SHARED_REGION_TRACE_DEBUG(
1395 ("shared_region: map(%p) <- 0x%x \n",
1396 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
1397 return KERN_PROTECTION_FAILURE;
1398 }
1399
1400 /*
1401 * Make sure we handle only one mapping at a time in a given
1402 * shared region, to avoid race conditions. This should not
1403 * happen frequently...
1404 */
1405 while (shared_region->sr_mapping_in_progress) {
1406 /* wait for our turn... */
1407 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1408 THREAD_UNINT);
1409 }
1410 assert(!shared_region->sr_mapping_in_progress);
1411 assert(shared_region->sr_ref_count > 1);
1412 /* let others know we're working in this shared region */
1413 shared_region->sr_mapping_in_progress = TRUE;
1414
1415 vm_shared_region_unlock();
1416
1417 /* no need to lock because this data is never modified... */
1418 sr_handle = shared_region->sr_mem_entry;
1419 sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
1420 sr_map = sr_mem_entry->backing.map;
1421 sr_base_address = shared_region->sr_base_address;
1422
1423 SHARED_REGION_TRACE_DEBUG(
1424 ("shared_region: -> map(%p)\n",
1425 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1426
1427 mappings_count = 0;
1428 mappings = NULL;
1429 srfmp = NULL;
1430
1431 /* process all the files to be mapped */
1432 for (srfmp = &sr_file_mappings[0];
1433 srfmp < &sr_file_mappings[sr_file_mappings_count];
1434 srfmp++) {
1435 mappings_count = srfmp->mappings_count;
1436 mappings = srfmp->mappings;
1437 file_control = srfmp->file_control;
1438
1439 if (mappings_count == 0) {
1440 /* no mappings here... */
1441 continue;
1442 }
1443
1444 /*
1445 * The code below can only correctly "slide" (perform relocations) for one
1446 * value of the slide amount. So if a file has a non-zero slide, it has to
1447 * match any previous value. A zero slide value is ok for things that are
1448 * just directly mapped.
1449 */
1450 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1451 shared_region->sr_slide = srfmp->slide;
1452 } else if (shared_region->sr_slide != 0 &&
1453 srfmp->slide != 0 &&
1454 shared_region->sr_slide != srfmp->slide) {
1455 SHARED_REGION_TRACE_ERROR(
1456 ("shared_region: more than 1 non-zero slide value amount "
1457 "slide 1:0x%x slide 2:0x%x\n ",
1458 shared_region->sr_slide, srfmp->slide));
1459 kr = KERN_INVALID_ARGUMENT;
1460 break;
1461 }
1462
1463 #if __arm64__
1464 if ((shared_region->sr_64bit ||
1465 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1466 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1467 printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1468 __FUNCTION__, srfmp->slide);
1469 kr = KERN_INVALID_ARGUMENT;
1470 break;
1471 }
1472 #endif /* __arm64__ */
1473
1474 /* get the VM object associated with the file to be mapped */
1475 file_object = memory_object_control_to_vm_object(file_control);
1476 assert(file_object);
1477
1478 /* establish the mappings for that file */
1479 for (i = 0; i < mappings_count; i++) {
1480 SHARED_REGION_TRACE_INFO(
1481 ("shared_region: mapping[%d]: "
1482 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1483 "maxprot:0x%x prot:0x%x\n",
1484 i,
1485 (long long)mappings[i].sms_address,
1486 (long long)mappings[i].sms_size,
1487 (long long)mappings[i].sms_file_offset,
1488 mappings[i].sms_max_prot,
1489 mappings[i].sms_init_prot));
1490
1491 if (mappings[i].sms_address < *sfm_min_address) {
1492 *sfm_min_address = mappings[i].sms_address;
1493 }
1494
1495 if (os_add_overflow(mappings[i].sms_address,
1496 mappings[i].sms_size,
1497 &sfm_end) ||
1498 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1499 mappings[i].sms_address)) {
1500 /* overflow */
1501 kr = KERN_INVALID_ARGUMENT;
1502 break;
1503 }
1504 if (sfm_end > *sfm_max_address) {
1505 *sfm_max_address = sfm_end;
1506 }
1507
1508 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1509 /* zero-filled memory */
1510 map_port = MACH_PORT_NULL;
1511 } else {
1512 /* file-backed memory */
1513 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1514 }
1515
1516 /*
1517 * Remember which mappings need sliding.
1518 */
1519 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1520 if (*mappings_to_slide_cnt == VMSR_NUM_SLIDES) {
1521 SHARED_REGION_TRACE_INFO(
1522 ("shared_region: mapping[%d]: "
1523 "address:0x%016llx size:0x%016llx "
1524 "offset:0x%016llx "
1525 "maxprot:0x%x prot:0x%x "
1526 "too many mappings to slide...\n",
1527 i,
1528 (long long)mappings[i].sms_address,
1529 (long long)mappings[i].sms_size,
1530 (long long)mappings[i].sms_file_offset,
1531 mappings[i].sms_max_prot,
1532 mappings[i].sms_init_prot));
1533 } else {
1534 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1535 *mappings_to_slide_cnt += 1;
1536 }
1537 }
1538
1539 /* mapping's address is relative to the shared region base */
1540 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1541
1542 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1543 vmk_flags.vmkf_already = TRUE;
1544 /* no copy-on-read for mapped binaries */
1545 vmk_flags.vmkf_no_copy_on_read = 1;
1546
1547
1548 /* establish that mapping, OK if it's "already" there */
1549 if (map_port == MACH_PORT_NULL) {
1550 /*
1551 * We want to map some anonymous memory in a shared region.
1552 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1553 */
1554 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1555 object = vm_object_allocate(obj_size);
1556 if (object == VM_OBJECT_NULL) {
1557 kr = KERN_RESOURCE_SHORTAGE;
1558 } else {
1559 kr = vm_map_enter(
1560 sr_map,
1561 &target_address,
1562 vm_map_round_page(mappings[i].sms_size,
1563 VM_MAP_PAGE_MASK(sr_map)),
1564 0,
1565 VM_FLAGS_FIXED,
1566 vmk_flags,
1567 VM_KERN_MEMORY_NONE,
1568 object,
1569 0,
1570 TRUE,
1571 mappings[i].sms_init_prot & VM_PROT_ALL,
1572 mappings[i].sms_max_prot & VM_PROT_ALL,
1573 VM_INHERIT_DEFAULT);
1574 }
1575 } else {
1576 object = VM_OBJECT_NULL; /* no anonymous memory here */
1577 kr = vm_map_enter_mem_object(
1578 sr_map,
1579 &target_address,
1580 vm_map_round_page(mappings[i].sms_size,
1581 VM_MAP_PAGE_MASK(sr_map)),
1582 0,
1583 VM_FLAGS_FIXED,
1584 vmk_flags,
1585 VM_KERN_MEMORY_NONE,
1586 map_port,
1587 mappings[i].sms_file_offset,
1588 TRUE,
1589 mappings[i].sms_init_prot & VM_PROT_ALL,
1590 mappings[i].sms_max_prot & VM_PROT_ALL,
1591 VM_INHERIT_DEFAULT);
1592 }
1593
1594 if (kr == KERN_SUCCESS) {
1595 /*
1596 * Record the first (chronologically) successful
1597 * mapping in this shared region.
1598 * We're protected by "sr_mapping_in_progress" here,
1599 * so no need to lock "shared_region".
1600 */
1601 assert(current_file_index < VMSR_NUM_SLIDES);
1602 if (file_first_mappings[current_file_index] == (mach_vm_offset_t) -1) {
1603 file_first_mappings[current_file_index] = target_address;
1604 }
1605
1606 if (*mappings_to_slide_cnt > 0 &&
1607 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1608 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1609 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1610 }
1611
1612 /*
1613 * Record the lowest writable address in this
1614 * sub map, to log any unexpected unnesting below
1615 * that address (see log_unnest_badness()).
1616 */
1617 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1618 sr_map->is_nested_map &&
1619 (lowest_unnestable_addr == 0 ||
1620 (target_address < lowest_unnestable_addr))) {
1621 lowest_unnestable_addr = target_address;
1622 }
1623 } else {
1624 if (map_port == MACH_PORT_NULL) {
1625 /*
1626 * Get rid of the VM object we just created
1627 * but failed to map.
1628 */
1629 vm_object_deallocate(object);
1630 object = VM_OBJECT_NULL;
1631 }
1632 if (kr == KERN_MEMORY_PRESENT) {
1633 /*
1634 * This exact mapping was already there:
1635 * that's fine.
1636 */
1637 SHARED_REGION_TRACE_INFO(
1638 ("shared_region: mapping[%d]: "
1639 "address:0x%016llx size:0x%016llx "
1640 "offset:0x%016llx "
1641 "maxprot:0x%x prot:0x%x "
1642 "already mapped...\n",
1643 i,
1644 (long long)mappings[i].sms_address,
1645 (long long)mappings[i].sms_size,
1646 (long long)mappings[i].sms_file_offset,
1647 mappings[i].sms_max_prot,
1648 mappings[i].sms_init_prot));
1649 /*
1650 * We didn't establish this mapping ourselves;
1651 * let's reset its size, so that we do not
1652 * attempt to undo it if an error occurs later.
1653 */
1654 mappings[i].sms_size = 0;
1655 kr = KERN_SUCCESS;
1656 } else {
1657 break;
1658 }
1659 }
1660 }
1661
1662 if (kr != KERN_SUCCESS) {
1663 break;
1664 }
1665
1666 ++current_file_index;
1667 }
1668
1669 if (file_first_mappings[0] != (mach_vm_offset_t)-1) {
1670 *first_mapping = file_first_mappings[0];
1671 }
1672
1673
1674 if (kr != KERN_SUCCESS) {
1675 /* the last mapping we tried (mappings[i]) failed ! */
1676 assert(i < mappings_count);
1677 SHARED_REGION_TRACE_ERROR(
1678 ("shared_region: mapping[%d]: "
1679 "address:0x%016llx size:0x%016llx "
1680 "offset:0x%016llx "
1681 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1682 i,
1683 (long long)mappings[i].sms_address,
1684 (long long)mappings[i].sms_size,
1685 (long long)mappings[i].sms_file_offset,
1686 mappings[i].sms_max_prot,
1687 mappings[i].sms_init_prot,
1688 kr));
1689
1690 /*
1691 * Respect the design of vm_shared_region_undo_mappings
1692 * as we are holding the sr_mapping_in_progress == true here.
1693 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1694 * will be blocked at waiting sr_mapping_in_progress to be false.
1695 */
1696 assert(sr_map != NULL);
1697 /* undo all the previous mappings */
1698 vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1699 return kr;
1700 }
1701
1702 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1703 *sr_map_ptr = sr_map;
1704 return KERN_SUCCESS;
1705 }
1706
1707 /* forwared declaration */
1708 __attribute__((noinline))
1709 static void
1710 vm_shared_region_map_file_final(
1711 vm_shared_region_t shared_region,
1712 vm_map_t sr_map,
1713 mach_vm_offset_t sfm_min_address,
1714 mach_vm_offset_t sfm_max_address,
1715 mach_vm_offset_t *file_first_mappings);
1716
1717 /*
1718 * Establish some mappings of a file in the shared region.
1719 * This is used by "dyld" via the shared_region_map_np() system call
1720 * to populate the shared region with the appropriate shared cache.
1721 *
1722 * One could also call it several times to incrementally load several
1723 * libraries, as long as they do not overlap.
1724 * It will return KERN_SUCCESS if the mappings were successfully established
1725 * or if they were already established identically by another process.
1726 */
1727 __attribute__((noinline))
1728 kern_return_t
1729 vm_shared_region_map_file(
1730 vm_shared_region_t shared_region,
1731 void *root_dir,
1732 int sr_file_mappings_count,
1733 struct _sr_file_mappings *sr_file_mappings)
1734 {
1735 kern_return_t kr = KERN_SUCCESS;
1736 unsigned int i;
1737 unsigned int mappings_to_slide_cnt = 0;
1738 struct shared_file_mapping_slide_np *mappings_to_slide[VMSR_NUM_SLIDES] = {};
1739 mach_vm_offset_t slid_mappings[VMSR_NUM_SLIDES];
1740 memory_object_control_t slid_file_controls[VMSR_NUM_SLIDES];
1741 mach_vm_offset_t first_mapping = (mach_vm_offset_t)-1;
1742 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1743 mach_vm_offset_t sfm_max_address = 0;
1744 vm_map_t sr_map = NULL;
1745 vm_map_offset_t lowest_unnestable_addr = 0;
1746 mach_vm_offset_t file_first_mappings[VMSR_NUM_SLIDES] = {(mach_vm_offset_t) -1, (mach_vm_offset_t) -1};
1747
1748 kr = vm_shared_region_map_file_setup(shared_region, root_dir, sr_file_mappings_count, sr_file_mappings,
1749 &mappings_to_slide_cnt, &mappings_to_slide[0], slid_mappings, slid_file_controls,
1750 &first_mapping, &file_first_mappings[0],
1751 &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr);
1752 if (kr != KERN_SUCCESS) {
1753 vm_shared_region_lock();
1754 goto done;
1755 }
1756
1757 /*
1758 * The call above installed direct mappings to the shared cache file.
1759 * Now we go back and overwrite the mappings that need relocation
1760 * with a special shared region pager.
1761 */
1762 for (i = 0; i < mappings_to_slide_cnt; ++i) {
1763 kr = vm_shared_region_slide(shared_region->sr_slide,
1764 mappings_to_slide[i]->sms_file_offset,
1765 mappings_to_slide[i]->sms_size,
1766 mappings_to_slide[i]->sms_slide_start,
1767 mappings_to_slide[i]->sms_slide_size,
1768 slid_mappings[i],
1769 slid_file_controls[i],
1770 mappings_to_slide[i]->sms_max_prot);
1771 if (kr != KERN_SUCCESS) {
1772 SHARED_REGION_TRACE_ERROR(
1773 ("shared_region: region_slide("
1774 "slide:0x%x start:0x%016llx "
1775 "size:0x%016llx) failed 0x%x\n",
1776 shared_region->sr_slide,
1777 (long long)mappings_to_slide[i]->sms_slide_start,
1778 (long long)mappings_to_slide[i]->sms_slide_size,
1779 kr));
1780 vm_shared_region_lock();
1781 goto done;
1782 }
1783 }
1784
1785 assert(kr == KERN_SUCCESS);
1786
1787 /* adjust the map's "lowest_unnestable_start" */
1788 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
1789 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
1790 vm_map_lock(sr_map);
1791 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
1792 vm_map_unlock(sr_map);
1793 }
1794
1795 vm_shared_region_lock();
1796 assert(shared_region->sr_ref_count > 1);
1797 assert(shared_region->sr_mapping_in_progress);
1798
1799 /* set "sr_first_mapping"; dyld uses it to validate the shared cache */
1800 if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) {
1801 shared_region->sr_first_mapping = first_mapping;
1802 }
1803
1804 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address,
1805 &file_first_mappings[0]);
1806
1807 done:
1808 /*
1809 * We're done working on that shared region.
1810 * Wake up any waiting threads.
1811 */
1812 shared_region->sr_mapping_in_progress = FALSE;
1813 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1814 vm_shared_region_unlock();
1815
1816 #if __has_feature(ptrauth_calls)
1817 if (kr == KERN_SUCCESS) {
1818 /*
1819 * Since authenticated mappings were just added to the shared region,
1820 * go back and remap them into private mappings for this task.
1821 */
1822 kr = vm_shared_region_auth_remap(shared_region);
1823 }
1824 #endif /* __has_feature(ptrauth_calls) */
1825
1826 SHARED_REGION_TRACE_DEBUG(
1827 ("shared_region: map(%p) <- 0x%x \n",
1828 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
1829 return kr;
1830 }
1831
1832 /*
1833 * Final part of vm_shared_region_map_file().
1834 * Kept in separate function to avoid blowing out the stack.
1835 */
1836 __attribute__((noinline))
1837 static void
1838 vm_shared_region_map_file_final(
1839 vm_shared_region_t shared_region,
1840 vm_map_t sr_map,
1841 mach_vm_offset_t sfm_min_address,
1842 mach_vm_offset_t sfm_max_address,
1843 __unused mach_vm_offset_t *file_first_mappings)
1844 {
1845 struct _dyld_cache_header sr_cache_header;
1846 int error;
1847 size_t image_array_length;
1848 struct _dyld_cache_image_text_info *sr_image_layout;
1849
1850
1851 /*
1852 * copy in the shared region UUID to the shared region structure.
1853 * we do this indirectly by first copying in the shared cache header
1854 * and then copying the UUID from there because we'll need to look
1855 * at other content from the shared cache header.
1856 */
1857 if (!shared_region->sr_uuid_copied) {
1858 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
1859 (char *)&sr_cache_header,
1860 sizeof(sr_cache_header));
1861 if (error == 0) {
1862 memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
1863 shared_region->sr_uuid_copied = TRUE;
1864 } else {
1865 #if DEVELOPMENT || DEBUG
1866 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
1867 "offset:0 size:0x%016llx) failed with %d\n",
1868 (long long)shared_region->sr_base_address,
1869 (long long)shared_region->sr_first_mapping,
1870 (long long)sizeof(sr_cache_header),
1871 error);
1872 #endif /* DEVELOPMENT || DEBUG */
1873 shared_region->sr_uuid_copied = FALSE;
1874 }
1875 }
1876
1877 /*
1878 * If the shared cache is associated with the init task (and is therefore the system shared cache),
1879 * check whether it is a custom built shared cache and copy in the shared cache layout accordingly.
1880 */
1881 boolean_t is_init_task = (task_pid(current_task()) == 1);
1882 if (shared_region->sr_uuid_copied && is_init_task) {
1883 /* Copy in the shared cache layout if we're running with a locally built shared cache */
1884 if (sr_cache_header.locallyBuiltCache) {
1885 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
1886 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
1887 sr_image_layout = kheap_alloc(KHEAP_DATA_BUFFERS, image_array_length, Z_WAITOK);
1888 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
1889 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
1890 if (error == 0) {
1891 shared_region->sr_images = kalloc((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)));
1892 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
1893 memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
1894 sizeof(shared_region->sr_images[index].imageUUID));
1895 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
1896 }
1897
1898 assert(sr_cache_header.imagesTextCount < UINT32_MAX);
1899 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
1900 } else {
1901 #if DEVELOPMENT || DEBUG
1902 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
1903 "offset:0x%016llx size:0x%016llx) failed with %d\n",
1904 (long long)shared_region->sr_base_address,
1905 (long long)shared_region->sr_first_mapping,
1906 (long long)sr_cache_header.imagesTextOffset,
1907 (long long)image_array_length,
1908 error);
1909 #endif /* DEVELOPMENT || DEBUG */
1910 }
1911 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
1912 kheap_free(KHEAP_DATA_BUFFERS, sr_image_layout, image_array_length);
1913 sr_image_layout = NULL;
1914 }
1915 init_task_shared_region = shared_region;
1916 }
1917
1918 /*
1919 * If we succeeded, we know the bounds of the shared region.
1920 * Trim our pmaps to only cover this range (if applicable to
1921 * this platform).
1922 */
1923 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
1924 pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
1925 }
1926 }
1927
1928 /*
1929 * Retrieve a task's shared region and grab an extra reference to
1930 * make sure it doesn't disappear while the caller is using it.
1931 * The caller is responsible for consuming that extra reference if
1932 * necessary.
1933 *
1934 * This also tries to trim the pmap for the shared region.
1935 */
1936 vm_shared_region_t
1937 vm_shared_region_trim_and_get(task_t task)
1938 {
1939 vm_shared_region_t shared_region;
1940 ipc_port_t sr_handle;
1941 vm_named_entry_t sr_mem_entry;
1942 vm_map_t sr_map;
1943
1944 /* Get the shared region and the map. */
1945 shared_region = vm_shared_region_get(task);
1946 if (shared_region == NULL) {
1947 return NULL;
1948 }
1949
1950 sr_handle = shared_region->sr_mem_entry;
1951 sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
1952 sr_map = sr_mem_entry->backing.map;
1953
1954 /* Trim the pmap if possible. */
1955 if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
1956 pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
1957 }
1958
1959 return shared_region;
1960 }
1961
1962 /*
1963 * Enter the appropriate shared region into "map" for "task".
1964 * This involves looking up the shared region (and possibly creating a new
1965 * one) for the desired environment, then mapping the VM sub map into the
1966 * task's VM "map", with the appropriate level of pmap-nesting.
1967 */
1968 kern_return_t
1969 vm_shared_region_enter(
1970 struct _vm_map *map,
1971 struct task *task,
1972 boolean_t is_64bit,
1973 void *fsroot,
1974 cpu_type_t cpu,
1975 cpu_subtype_t cpu_subtype,
1976 boolean_t reslide)
1977 {
1978 kern_return_t kr;
1979 vm_shared_region_t shared_region;
1980 vm_map_offset_t sr_address, sr_offset, target_address;
1981 vm_map_size_t sr_size, mapping_size;
1982 vm_map_offset_t sr_pmap_nesting_start;
1983 vm_map_size_t sr_pmap_nesting_size;
1984 ipc_port_t sr_handle;
1985 vm_prot_t cur_prot, max_prot;
1986
1987 SHARED_REGION_TRACE_DEBUG(
1988 ("shared_region: -> "
1989 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d)\n",
1990 (void *)VM_KERNEL_ADDRPERM(map),
1991 (void *)VM_KERNEL_ADDRPERM(task),
1992 (void *)VM_KERNEL_ADDRPERM(fsroot),
1993 cpu, cpu_subtype, is_64bit));
1994
1995 /* lookup (create if needed) the shared region for this environment */
1996 shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, reslide);
1997 if (shared_region == NULL) {
1998 /* this should not happen ! */
1999 SHARED_REGION_TRACE_ERROR(
2000 ("shared_region: -> "
2001 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d): "
2002 "lookup failed !\n",
2003 (void *)VM_KERNEL_ADDRPERM(map),
2004 (void *)VM_KERNEL_ADDRPERM(task),
2005 (void *)VM_KERNEL_ADDRPERM(fsroot),
2006 cpu, cpu_subtype, is_64bit, reslide));
2007 //panic("shared_region_enter: lookup failed\n");
2008 return KERN_FAILURE;
2009 }
2010
2011 kr = KERN_SUCCESS;
2012 /* no need to lock since this data is never modified */
2013 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2014 sr_size = (vm_map_size_t)shared_region->sr_size;
2015 sr_handle = shared_region->sr_mem_entry;
2016 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2017 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2018
2019 cur_prot = VM_PROT_READ;
2020 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2021 /*
2022 * XXX BINARY COMPATIBILITY
2023 * java6 apparently needs to modify some code in the
2024 * dyld shared cache and needs to be allowed to add
2025 * write access...
2026 */
2027 max_prot = VM_PROT_ALL;
2028 } else {
2029 max_prot = VM_PROT_READ;
2030 }
2031
2032 /*
2033 * Start mapping the shared region's VM sub map into the task's VM map.
2034 */
2035 sr_offset = 0;
2036
2037 if (sr_pmap_nesting_start > sr_address) {
2038 /* we need to map a range without pmap-nesting first */
2039 target_address = sr_address;
2040 mapping_size = sr_pmap_nesting_start - sr_address;
2041 kr = vm_map_enter_mem_object(
2042 map,
2043 &target_address,
2044 mapping_size,
2045 0,
2046 VM_FLAGS_FIXED,
2047 VM_MAP_KERNEL_FLAGS_NONE,
2048 VM_KERN_MEMORY_NONE,
2049 sr_handle,
2050 sr_offset,
2051 TRUE,
2052 cur_prot,
2053 max_prot,
2054 VM_INHERIT_SHARE);
2055 if (kr != KERN_SUCCESS) {
2056 SHARED_REGION_TRACE_ERROR(
2057 ("shared_region: enter(%p,%p,%p,%d,%d,%d): "
2058 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2059 (void *)VM_KERNEL_ADDRPERM(map),
2060 (void *)VM_KERNEL_ADDRPERM(task),
2061 (void *)VM_KERNEL_ADDRPERM(fsroot),
2062 cpu, cpu_subtype, is_64bit,
2063 (long long)target_address,
2064 (long long)mapping_size,
2065 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2066 goto done;
2067 }
2068 SHARED_REGION_TRACE_DEBUG(
2069 ("shared_region: enter(%p,%p,%p,%d,%d,%d): "
2070 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2071 (void *)VM_KERNEL_ADDRPERM(map),
2072 (void *)VM_KERNEL_ADDRPERM(task),
2073 (void *)VM_KERNEL_ADDRPERM(fsroot),
2074 cpu, cpu_subtype, is_64bit,
2075 (long long)target_address, (long long)mapping_size,
2076 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2077 sr_offset += mapping_size;
2078 sr_size -= mapping_size;
2079 }
2080 /*
2081 * We may need to map several pmap-nested portions, due to platform
2082 * specific restrictions on pmap nesting.
2083 * The pmap-nesting is triggered by the "vmkf_nested_pmap" flag...
2084 */
2085 for (;
2086 sr_pmap_nesting_size > 0;
2087 sr_offset += mapping_size,
2088 sr_size -= mapping_size,
2089 sr_pmap_nesting_size -= mapping_size) {
2090 vm_map_kernel_flags_t vmk_flags;
2091
2092 target_address = sr_address + sr_offset;
2093 mapping_size = sr_pmap_nesting_size;
2094 if (mapping_size > pmap_nesting_size_max(map->pmap)) {
2095 mapping_size = (vm_map_offset_t) pmap_nesting_size_max(map->pmap);
2096 }
2097 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2098 vmk_flags.vmkf_nested_pmap = TRUE;
2099 kr = vm_map_enter_mem_object(
2100 map,
2101 &target_address,
2102 mapping_size,
2103 0,
2104 VM_FLAGS_FIXED,
2105 vmk_flags,
2106 VM_MEMORY_SHARED_PMAP,
2107 sr_handle,
2108 sr_offset,
2109 TRUE,
2110 cur_prot,
2111 max_prot,
2112 VM_INHERIT_SHARE);
2113 if (kr != KERN_SUCCESS) {
2114 SHARED_REGION_TRACE_ERROR(
2115 ("shared_region: enter(%p,%p,%p,%d,%d,%d): "
2116 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2117 (void *)VM_KERNEL_ADDRPERM(map),
2118 (void *)VM_KERNEL_ADDRPERM(task),
2119 (void *)VM_KERNEL_ADDRPERM(fsroot),
2120 cpu, cpu_subtype, is_64bit,
2121 (long long)target_address,
2122 (long long)mapping_size,
2123 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2124 goto done;
2125 }
2126 SHARED_REGION_TRACE_DEBUG(
2127 ("shared_region: enter(%p,%p,%p,%d,%d,%d): "
2128 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2129 (void *)VM_KERNEL_ADDRPERM(map),
2130 (void *)VM_KERNEL_ADDRPERM(task),
2131 (void *)VM_KERNEL_ADDRPERM(fsroot),
2132 cpu, cpu_subtype, is_64bit,
2133 (long long)target_address, (long long)mapping_size,
2134 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2135 }
2136 if (sr_size > 0) {
2137 /* and there's some left to be mapped without pmap-nesting */
2138 target_address = sr_address + sr_offset;
2139 mapping_size = sr_size;
2140 kr = vm_map_enter_mem_object(
2141 map,
2142 &target_address,
2143 mapping_size,
2144 0,
2145 VM_FLAGS_FIXED,
2146 VM_MAP_KERNEL_FLAGS_NONE,
2147 VM_KERN_MEMORY_NONE,
2148 sr_handle,
2149 sr_offset,
2150 TRUE,
2151 cur_prot,
2152 max_prot,
2153 VM_INHERIT_SHARE);
2154 if (kr != KERN_SUCCESS) {
2155 SHARED_REGION_TRACE_ERROR(
2156 ("shared_region: enter(%p,%p,%p,%d,%d,%d): "
2157 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2158 (void *)VM_KERNEL_ADDRPERM(map),
2159 (void *)VM_KERNEL_ADDRPERM(task),
2160 (void *)VM_KERNEL_ADDRPERM(fsroot),
2161 cpu, cpu_subtype, is_64bit,
2162 (long long)target_address,
2163 (long long)mapping_size,
2164 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2165 goto done;
2166 }
2167 SHARED_REGION_TRACE_DEBUG(
2168 ("shared_region: enter(%p,%p,%p,%d,%d,%d): "
2169 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2170 (void *)VM_KERNEL_ADDRPERM(map),
2171 (void *)VM_KERNEL_ADDRPERM(task),
2172 (void *)VM_KERNEL_ADDRPERM(fsroot),
2173 cpu, cpu_subtype, is_64bit,
2174 (long long)target_address, (long long)mapping_size,
2175 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2176 sr_offset += mapping_size;
2177 sr_size -= mapping_size;
2178 }
2179 assert(sr_size == 0);
2180
2181 done:
2182 if (kr == KERN_SUCCESS) {
2183 /* let the task use that shared region */
2184 vm_shared_region_set(task, shared_region);
2185 } else {
2186 /* drop our reference since we're not using it */
2187 vm_shared_region_deallocate(shared_region);
2188 vm_shared_region_set(task, NULL);
2189 }
2190
2191 SHARED_REGION_TRACE_DEBUG(
2192 ("shared_region: enter(%p,%p,%p,%d,%d,%d) <- 0x%x\n",
2193 (void *)VM_KERNEL_ADDRPERM(map),
2194 (void *)VM_KERNEL_ADDRPERM(task),
2195 (void *)VM_KERNEL_ADDRPERM(fsroot),
2196 cpu, cpu_subtype, is_64bit,
2197 kr));
2198 return kr;
2199 }
2200
2201 #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2202 struct vm_shared_region_slide_info slide_info;
2203
2204 kern_return_t
2205 vm_shared_region_sliding_valid(uint32_t slide)
2206 {
2207 kern_return_t kr = KERN_SUCCESS;
2208 vm_shared_region_t sr = vm_shared_region_get(current_task());
2209
2210 /* No region yet? we're fine. */
2211 if (sr == NULL) {
2212 return kr;
2213 }
2214
2215 if (sr->sr_slide != 0 && slide != 0) {
2216 if (slide == sr->sr_slide) {
2217 /*
2218 * Request for sliding when we've
2219 * already done it with exactly the
2220 * same slide value before.
2221 * This isn't wrong technically but
2222 * we don't want to slide again and
2223 * so we return this value.
2224 */
2225 kr = KERN_INVALID_ARGUMENT;
2226 } else {
2227 printf("Mismatched shared region slide\n");
2228 kr = KERN_FAILURE;
2229 }
2230 }
2231 vm_shared_region_deallocate(sr);
2232 return kr;
2233 }
2234
2235 /*
2236 * Actually create (really overwrite) the mapping to part of the shared cache which
2237 * undergoes relocation. This routine reads in the relocation info from dyld and
2238 * verifies it. It then creates a (or finds a matching) shared region pager which
2239 * handles the actual modification of the page contents and installs the mapping
2240 * using that pager.
2241 */
2242 kern_return_t
2243 vm_shared_region_slide_mapping(
2244 vm_shared_region_t sr,
2245 user_addr_t slide_info_addr,
2246 mach_vm_size_t slide_info_size,
2247 mach_vm_offset_t start,
2248 mach_vm_size_t size,
2249 mach_vm_offset_t slid_mapping,
2250 uint32_t slide,
2251 memory_object_control_t sr_file_control,
2252 vm_prot_t prot)
2253 {
2254 kern_return_t kr;
2255 vm_object_t object = VM_OBJECT_NULL;
2256 vm_shared_region_slide_info_t si = NULL;
2257 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2258 struct vm_map_entry tmp_entry_store;
2259 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2260 vm_map_t sr_map;
2261 int vm_flags;
2262 vm_map_kernel_flags_t vmk_flags;
2263 vm_map_offset_t map_addr;
2264 void *slide_info_entry = NULL;
2265 int error;
2266
2267 assert(sr->sr_slide_in_progress);
2268
2269 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2270 return KERN_INVALID_ARGUMENT;
2271 }
2272
2273 /*
2274 * Copy in and verify the relocation information.
2275 */
2276 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2277 printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2278 return KERN_FAILURE;
2279 }
2280 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2281 printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2282 return KERN_FAILURE;
2283 }
2284
2285 slide_info_entry = kheap_alloc(KHEAP_DATA_BUFFERS, (vm_size_t)slide_info_size, Z_WAITOK);
2286 if (slide_info_entry == NULL) {
2287 return KERN_RESOURCE_SHORTAGE;
2288 }
2289 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2290 if (error) {
2291 printf("copyin of slide_info failed\n");
2292 kr = KERN_INVALID_ADDRESS;
2293 goto done;
2294 }
2295
2296 if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2297 printf("Sanity Check failed for slide_info\n");
2298 goto done;
2299 }
2300
2301 /*
2302 * Allocate and fill in a vm_shared_region_slide_info.
2303 * This will either be used by a new pager, or used to find
2304 * a pre-existing matching pager.
2305 */
2306 object = memory_object_control_to_vm_object(sr_file_control);
2307 if (object == VM_OBJECT_NULL || object->internal) {
2308 object = VM_OBJECT_NULL;
2309 kr = KERN_INVALID_ADDRESS;
2310 goto done;
2311 }
2312
2313 si = kalloc(sizeof(*si));
2314 if (si == NULL) {
2315 kr = KERN_RESOURCE_SHORTAGE;
2316 goto done;
2317 }
2318 vm_object_lock(object);
2319
2320 vm_object_reference_locked(object); /* for si->slide_object */
2321 object->object_is_shared_cache = TRUE;
2322 vm_object_unlock(object);
2323
2324 si->si_slide_info_entry = slide_info_entry;
2325 si->si_slide_info_size = slide_info_size;
2326
2327 assert(slid_mapping != (mach_vm_offset_t) -1);
2328 si->si_slid_address = slid_mapping + sr->sr_base_address;
2329 si->si_slide_object = object;
2330 si->si_start = start;
2331 si->si_end = si->si_start + size;
2332 si->si_slide = slide;
2333 #if __has_feature(ptrauth_calls)
2334 /*
2335 * If there is authenticated pointer data in this slid mapping,
2336 * then just add the information needed to create new pagers for
2337 * different shared_region_id's later.
2338 */
2339 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2340 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2341 !(prot & VM_PROT_NOAUTH)) {
2342 if (sr->sr_num_auth_section == NUM_SR_AUTH_SECTIONS) {
2343 printf("Too many auth/private sections for shared region!!\n");
2344 kr = KERN_INVALID_ARGUMENT;
2345 goto done;
2346 }
2347 si->si_ptrauth = TRUE;
2348 sr->sr_auth_section[sr->sr_num_auth_section++] = si;
2349 /*
2350 * Remember the shared region, since that's where we'll
2351 * stash this info for all auth pagers to share. Each pager
2352 * will need to take a reference to it.
2353 */
2354 si->si_shared_region = sr;
2355 kr = KERN_SUCCESS;
2356 goto done;
2357 }
2358 si->si_shared_region = NULL;
2359 si->si_ptrauth = FALSE;
2360 #else /* __has_feature(ptrauth_calls) */
2361 (void)prot; /* silence unused warning */
2362 #endif /* __has_feature(ptrauth_calls) */
2363
2364 /*
2365 * find the pre-existing shared region's map entry to slide
2366 */
2367 sr_map = vm_shared_region_vm_map(sr);
2368 kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2369 if (kr != KERN_SUCCESS) {
2370 goto done;
2371 }
2372 tmp_entry = &tmp_entry_store;
2373
2374 /*
2375 * The object must exactly cover the region to slide.
2376 */
2377 assert(VME_OFFSET(tmp_entry) == start);
2378 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2379
2380 /* create a "shared_region" sliding pager */
2381 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2382 if (sr_pager == MEMORY_OBJECT_NULL) {
2383 kr = KERN_RESOURCE_SHORTAGE;
2384 goto done;
2385 }
2386
2387 /* map that pager over the portion of the mapping that needs sliding */
2388 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
2389 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2390 vmk_flags.vmkf_overwrite_immutable = TRUE;
2391 map_addr = tmp_entry->vme_start;
2392 kr = vm_map_enter_mem_object(sr_map,
2393 &map_addr,
2394 (tmp_entry->vme_end - tmp_entry->vme_start),
2395 (mach_vm_offset_t) 0,
2396 vm_flags,
2397 vmk_flags,
2398 VM_KERN_MEMORY_NONE,
2399 (ipc_port_t)(uintptr_t) sr_pager,
2400 0,
2401 TRUE,
2402 tmp_entry->protection,
2403 tmp_entry->max_protection,
2404 tmp_entry->inheritance);
2405 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2406 assertf(map_addr == tmp_entry->vme_start,
2407 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2408 (uint64_t)map_addr,
2409 (uint64_t) tmp_entry->vme_start,
2410 tmp_entry);
2411
2412 /* success! */
2413 kr = KERN_SUCCESS;
2414
2415 done:
2416 if (sr_pager != NULL) {
2417 /*
2418 * Release the sr_pager reference obtained by shared_region_pager_setup().
2419 * The mapping, if it succeeded, is now holding a reference on the memory object.
2420 */
2421 memory_object_deallocate(sr_pager);
2422 sr_pager = MEMORY_OBJECT_NULL;
2423 }
2424 if (tmp_entry != NULL) {
2425 /* release extra ref on tmp_entry's VM object */
2426 vm_object_deallocate(VME_OBJECT(tmp_entry));
2427 tmp_entry = VM_MAP_ENTRY_NULL;
2428 }
2429
2430 if (kr != KERN_SUCCESS) {
2431 /* cleanup */
2432 if (si != NULL) {
2433 if (si->si_slide_object) {
2434 vm_object_deallocate(si->si_slide_object);
2435 si->si_slide_object = VM_OBJECT_NULL;
2436 }
2437 kfree(si, sizeof(*si));
2438 si = NULL;
2439 }
2440 if (slide_info_entry != NULL) {
2441 kheap_free(KHEAP_DATA_BUFFERS, slide_info_entry, (vm_size_t)slide_info_size);
2442 slide_info_entry = NULL;
2443 }
2444 }
2445 return kr;
2446 }
2447
2448 static kern_return_t
2449 vm_shared_region_slide_sanity_check_v2(
2450 vm_shared_region_slide_info_entry_v2_t s_info,
2451 mach_vm_size_t slide_info_size)
2452 {
2453 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2454 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2455 return KERN_FAILURE;
2456 }
2457 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2458 return KERN_FAILURE;
2459 }
2460
2461 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2462
2463 uint32_t page_starts_count = s_info->page_starts_count;
2464 uint32_t page_extras_count = s_info->page_extras_count;
2465 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2466 if (num_trailing_entries < page_starts_count) {
2467 return KERN_FAILURE;
2468 }
2469
2470 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2471 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2472 if (trailing_size >> 1 != num_trailing_entries) {
2473 return KERN_FAILURE;
2474 }
2475
2476 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2477 if (required_size < sizeof(*s_info)) {
2478 return KERN_FAILURE;
2479 }
2480
2481 if (required_size > slide_info_size) {
2482 return KERN_FAILURE;
2483 }
2484
2485 return KERN_SUCCESS;
2486 }
2487
2488 static kern_return_t
2489 vm_shared_region_slide_sanity_check_v3(
2490 vm_shared_region_slide_info_entry_v3_t s_info,
2491 mach_vm_size_t slide_info_size)
2492 {
2493 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2494 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2495 return KERN_FAILURE;
2496 }
2497 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2498 printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2499 return KERN_FAILURE;
2500 }
2501
2502 uint32_t page_starts_count = s_info->page_starts_count;
2503 mach_vm_size_t num_trailing_entries = page_starts_count;
2504 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2505 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2506 if (required_size < sizeof(*s_info)) {
2507 printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2508 return KERN_FAILURE;
2509 }
2510
2511 if (required_size > slide_info_size) {
2512 printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2513 return KERN_FAILURE;
2514 }
2515
2516 return KERN_SUCCESS;
2517 }
2518
2519 static kern_return_t
2520 vm_shared_region_slide_sanity_check_v4(
2521 vm_shared_region_slide_info_entry_v4_t s_info,
2522 mach_vm_size_t slide_info_size)
2523 {
2524 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2525 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2526 return KERN_FAILURE;
2527 }
2528 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2529 return KERN_FAILURE;
2530 }
2531
2532 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2533
2534 uint32_t page_starts_count = s_info->page_starts_count;
2535 uint32_t page_extras_count = s_info->page_extras_count;
2536 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2537 if (num_trailing_entries < page_starts_count) {
2538 return KERN_FAILURE;
2539 }
2540
2541 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2542 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2543 if (trailing_size >> 1 != num_trailing_entries) {
2544 return KERN_FAILURE;
2545 }
2546
2547 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2548 if (required_size < sizeof(*s_info)) {
2549 return KERN_FAILURE;
2550 }
2551
2552 if (required_size > slide_info_size) {
2553 return KERN_FAILURE;
2554 }
2555
2556 return KERN_SUCCESS;
2557 }
2558
2559
2560 static kern_return_t
2561 vm_shared_region_slide_sanity_check(
2562 vm_shared_region_slide_info_entry_t s_info,
2563 mach_vm_size_t s_info_size)
2564 {
2565 kern_return_t kr;
2566
2567 switch (s_info->version) {
2568 case 2:
2569 kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2570 break;
2571 case 3:
2572 kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2573 break;
2574 case 4:
2575 kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2576 break;
2577 default:
2578 kr = KERN_FAILURE;
2579 }
2580 return kr;
2581 }
2582
2583 static kern_return_t
2584 rebase_chain_32(
2585 uint8_t *page_content,
2586 uint16_t start_offset,
2587 uint32_t slide_amount,
2588 vm_shared_region_slide_info_entry_v2_t s_info)
2589 {
2590 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
2591
2592 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
2593 const uint32_t value_mask = ~delta_mask;
2594 const uint32_t value_add = (uint32_t)(s_info->value_add);
2595 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2596
2597 uint32_t page_offset = start_offset;
2598 uint32_t delta = 1;
2599
2600 while (delta != 0 && page_offset <= last_page_offset) {
2601 uint8_t *loc;
2602 uint32_t value;
2603
2604 loc = page_content + page_offset;
2605 memcpy(&value, loc, sizeof(value));
2606 delta = (value & delta_mask) >> delta_shift;
2607 value &= value_mask;
2608
2609 if (value != 0) {
2610 value += value_add;
2611 value += slide_amount;
2612 }
2613 memcpy(loc, &value, sizeof(value));
2614 page_offset += delta;
2615 }
2616
2617 /* If the offset went past the end of the page, then the slide data is invalid. */
2618 if (page_offset > last_page_offset) {
2619 return KERN_FAILURE;
2620 }
2621 return KERN_SUCCESS;
2622 }
2623
2624 static kern_return_t
2625 rebase_chain_64(
2626 uint8_t *page_content,
2627 uint16_t start_offset,
2628 uint32_t slide_amount,
2629 vm_shared_region_slide_info_entry_v2_t s_info)
2630 {
2631 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
2632
2633 const uint64_t delta_mask = s_info->delta_mask;
2634 const uint64_t value_mask = ~delta_mask;
2635 const uint64_t value_add = s_info->value_add;
2636 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2637
2638 uint32_t page_offset = start_offset;
2639 uint32_t delta = 1;
2640
2641 while (delta != 0 && page_offset <= last_page_offset) {
2642 uint8_t *loc;
2643 uint64_t value;
2644
2645 loc = page_content + page_offset;
2646 memcpy(&value, loc, sizeof(value));
2647 delta = (uint32_t)((value & delta_mask) >> delta_shift);
2648 value &= value_mask;
2649
2650 if (value != 0) {
2651 value += value_add;
2652 value += slide_amount;
2653 }
2654 memcpy(loc, &value, sizeof(value));
2655 page_offset += delta;
2656 }
2657
2658 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
2659 /* If a pointer straddling the page boundary needs to be adjusted, then
2660 * add the slide to the lower half. The encoding guarantees that the upper
2661 * half on the next page will need no masking.
2662 *
2663 * This assumes a little-endian machine and that the region being slid
2664 * never crosses a 4 GB boundary. */
2665
2666 uint8_t *loc = page_content + page_offset;
2667 uint32_t value;
2668
2669 memcpy(&value, loc, sizeof(value));
2670 value += slide_amount;
2671 memcpy(loc, &value, sizeof(value));
2672 } else if (page_offset > last_page_offset) {
2673 return KERN_FAILURE;
2674 }
2675
2676 return KERN_SUCCESS;
2677 }
2678
2679 static kern_return_t
2680 rebase_chain(
2681 boolean_t is_64,
2682 uint32_t pageIndex,
2683 uint8_t *page_content,
2684 uint16_t start_offset,
2685 uint32_t slide_amount,
2686 vm_shared_region_slide_info_entry_v2_t s_info)
2687 {
2688 kern_return_t kr;
2689 if (is_64) {
2690 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
2691 } else {
2692 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
2693 }
2694
2695 if (kr != KERN_SUCCESS) {
2696 printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
2697 pageIndex, start_offset, slide_amount);
2698 }
2699 return kr;
2700 }
2701
2702 static kern_return_t
2703 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2704 {
2705 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
2706 const uint32_t slide_amount = si->si_slide;
2707
2708 /* The high bits of the delta_mask field are nonzero precisely when the shared
2709 * cache is 64-bit. */
2710 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
2711
2712 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
2713 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
2714
2715 uint8_t *page_content = (uint8_t *)vaddr;
2716 uint16_t page_entry;
2717
2718 if (pageIndex >= s_info->page_starts_count) {
2719 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
2720 pageIndex, s_info->page_starts_count);
2721 return KERN_FAILURE;
2722 }
2723 page_entry = page_starts[pageIndex];
2724
2725 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
2726 return KERN_SUCCESS;
2727 }
2728
2729 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
2730 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
2731 uint16_t info;
2732
2733 do {
2734 uint16_t page_start_offset;
2735 kern_return_t kr;
2736
2737 if (chain_index >= s_info->page_extras_count) {
2738 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
2739 chain_index, s_info->page_extras_count);
2740 return KERN_FAILURE;
2741 }
2742 info = page_extras[chain_index];
2743 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2744
2745 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
2746 if (kr != KERN_SUCCESS) {
2747 return KERN_FAILURE;
2748 }
2749
2750 chain_index++;
2751 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
2752 } else {
2753 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2754 kern_return_t kr;
2755
2756 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
2757 if (kr != KERN_SUCCESS) {
2758 return KERN_FAILURE;
2759 }
2760 }
2761
2762 return KERN_SUCCESS;
2763 }
2764
2765
2766 static kern_return_t
2767 vm_shared_region_slide_page_v3(
2768 vm_shared_region_slide_info_t si,
2769 vm_offset_t vaddr,
2770 __unused mach_vm_offset_t uservaddr,
2771 uint32_t pageIndex,
2772 #if !__has_feature(ptrauth_calls)
2773 __unused
2774 #endif /* !__has_feature(ptrauth_calls) */
2775 uint64_t jop_key)
2776 {
2777 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
2778 const uint32_t slide_amount = si->si_slide;
2779
2780 uint8_t *page_content = (uint8_t *)vaddr;
2781 uint16_t page_entry;
2782
2783 if (pageIndex >= s_info->page_starts_count) {
2784 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
2785 pageIndex, s_info->page_starts_count);
2786 return KERN_FAILURE;
2787 }
2788 page_entry = s_info->page_starts[pageIndex];
2789
2790 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
2791 return KERN_SUCCESS;
2792 }
2793
2794 uint8_t* rebaseLocation = page_content;
2795 uint64_t delta = page_entry;
2796 do {
2797 rebaseLocation += delta;
2798 uint64_t value;
2799 memcpy(&value, rebaseLocation, sizeof(value));
2800 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
2801
2802 // A pointer is one of :
2803 // {
2804 // uint64_t pointerValue : 51;
2805 // uint64_t offsetToNextPointer : 11;
2806 // uint64_t isBind : 1 = 0;
2807 // uint64_t authenticated : 1 = 0;
2808 // }
2809 // {
2810 // uint32_t offsetFromSharedCacheBase;
2811 // uint16_t diversityData;
2812 // uint16_t hasAddressDiversity : 1;
2813 // uint16_t hasDKey : 1;
2814 // uint16_t hasBKey : 1;
2815 // uint16_t offsetToNextPointer : 11;
2816 // uint16_t isBind : 1;
2817 // uint16_t authenticated : 1 = 1;
2818 // }
2819
2820 bool isBind = (value & (1ULL << 62)) == 1;
2821 if (isBind) {
2822 return KERN_FAILURE;
2823 }
2824
2825 #if __has_feature(ptrauth_calls)
2826 uint16_t diversity_data = (uint16_t)(value >> 32);
2827 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
2828 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
2829 #endif /* __has_feature(ptrauth_calls) */
2830 bool isAuthenticated = (value & (1ULL << 63)) != 0;
2831
2832 if (isAuthenticated) {
2833 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
2834 value = (value & 0xFFFFFFFF) + slide_amount;
2835 // Add in the offset from the mach_header
2836 const uint64_t value_add = s_info->value_add;
2837 value += value_add;
2838
2839 #if __has_feature(ptrauth_calls)
2840 uint64_t discriminator = diversity_data;
2841 if (hasAddressDiversity) {
2842 // First calculate a new discriminator using the address of where we are trying to store the value
2843 uintptr_t pageOffset = rebaseLocation - page_content;
2844 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
2845 }
2846
2847 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
2848 /*
2849 * these pointers are used in user mode. disable the kernel key diversification
2850 * so we can sign them for use in user mode.
2851 */
2852 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
2853 }
2854 #endif /* __has_feature(ptrauth_calls) */
2855 } else {
2856 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
2857 // Regular pointer which needs to fit in 51-bits of value.
2858 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
2859 // and the bottom 43-bits to be fit in to 51-bits.
2860 uint64_t top8Bits = value & 0x0007F80000000000ULL;
2861 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
2862 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
2863 value = targetValue + slide_amount;
2864 }
2865
2866 memcpy(rebaseLocation, &value, sizeof(value));
2867 } while (delta != 0);
2868
2869 return KERN_SUCCESS;
2870 }
2871
2872 static kern_return_t
2873 rebase_chainv4(
2874 uint8_t *page_content,
2875 uint16_t start_offset,
2876 uint32_t slide_amount,
2877 vm_shared_region_slide_info_entry_v4_t s_info)
2878 {
2879 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
2880
2881 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
2882 const uint32_t value_mask = ~delta_mask;
2883 const uint32_t value_add = (uint32_t)(s_info->value_add);
2884 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2885
2886 uint32_t page_offset = start_offset;
2887 uint32_t delta = 1;
2888
2889 while (delta != 0 && page_offset <= last_page_offset) {
2890 uint8_t *loc;
2891 uint32_t value;
2892
2893 loc = page_content + page_offset;
2894 memcpy(&value, loc, sizeof(value));
2895 delta = (value & delta_mask) >> delta_shift;
2896 value &= value_mask;
2897
2898 if ((value & 0xFFFF8000) == 0) {
2899 // small positive non-pointer, use as-is
2900 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
2901 // small negative non-pointer
2902 value |= 0xC0000000;
2903 } else {
2904 // pointer that needs rebasing
2905 value += value_add;
2906 value += slide_amount;
2907 }
2908 memcpy(loc, &value, sizeof(value));
2909 page_offset += delta;
2910 }
2911
2912 /* If the offset went past the end of the page, then the slide data is invalid. */
2913 if (page_offset > last_page_offset) {
2914 return KERN_FAILURE;
2915 }
2916 return KERN_SUCCESS;
2917 }
2918
2919 static kern_return_t
2920 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2921 {
2922 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
2923 const uint32_t slide_amount = si->si_slide;
2924
2925 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
2926 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
2927
2928 uint8_t *page_content = (uint8_t *)vaddr;
2929 uint16_t page_entry;
2930
2931 if (pageIndex >= s_info->page_starts_count) {
2932 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
2933 pageIndex, s_info->page_starts_count);
2934 return KERN_FAILURE;
2935 }
2936 page_entry = page_starts[pageIndex];
2937
2938 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
2939 return KERN_SUCCESS;
2940 }
2941
2942 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
2943 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
2944 uint16_t info;
2945
2946 do {
2947 uint16_t page_start_offset;
2948 kern_return_t kr;
2949
2950 if (chain_index >= s_info->page_extras_count) {
2951 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
2952 chain_index, s_info->page_extras_count);
2953 return KERN_FAILURE;
2954 }
2955 info = page_extras[chain_index];
2956 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2957
2958 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
2959 if (kr != KERN_SUCCESS) {
2960 return KERN_FAILURE;
2961 }
2962
2963 chain_index++;
2964 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
2965 } else {
2966 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2967 kern_return_t kr;
2968
2969 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
2970 if (kr != KERN_SUCCESS) {
2971 return KERN_FAILURE;
2972 }
2973 }
2974
2975 return KERN_SUCCESS;
2976 }
2977
2978
2979
2980 kern_return_t
2981 vm_shared_region_slide_page(
2982 vm_shared_region_slide_info_t si,
2983 vm_offset_t vaddr,
2984 mach_vm_offset_t uservaddr,
2985 uint32_t pageIndex,
2986 uint64_t jop_key)
2987 {
2988 switch (si->si_slide_info_entry->version) {
2989 case 2:
2990 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
2991 case 3:
2992 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
2993 case 4:
2994 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
2995 default:
2996 return KERN_FAILURE;
2997 }
2998 }
2999
3000 /******************************************************************************/
3001 /* Comm page support */
3002 /******************************************************************************/
3003
3004 ipc_port_t commpage32_handle = IPC_PORT_NULL;
3005 ipc_port_t commpage64_handle = IPC_PORT_NULL;
3006 vm_named_entry_t commpage32_entry = NULL;
3007 vm_named_entry_t commpage64_entry = NULL;
3008 vm_map_t commpage32_map = VM_MAP_NULL;
3009 vm_map_t commpage64_map = VM_MAP_NULL;
3010
3011 ipc_port_t commpage_text32_handle = IPC_PORT_NULL;
3012 ipc_port_t commpage_text64_handle = IPC_PORT_NULL;
3013 vm_named_entry_t commpage_text32_entry = NULL;
3014 vm_named_entry_t commpage_text64_entry = NULL;
3015 vm_map_t commpage_text32_map = VM_MAP_NULL;
3016 vm_map_t commpage_text64_map = VM_MAP_NULL;
3017
3018 user32_addr_t commpage_text32_location = 0;
3019 user64_addr_t commpage_text64_location = 0;
3020
3021 #if defined(__i386__) || defined(__x86_64__)
3022 /*
3023 * Create a memory entry, VM submap and pmap for one commpage.
3024 */
3025 static void
3026 _vm_commpage_init(
3027 ipc_port_t *handlep,
3028 vm_map_size_t size)
3029 {
3030 kern_return_t kr;
3031 vm_named_entry_t mem_entry;
3032 vm_map_t new_map;
3033
3034 SHARED_REGION_TRACE_DEBUG(
3035 ("commpage: -> _init(0x%llx)\n",
3036 (long long)size));
3037
3038 kr = mach_memory_entry_allocate(&mem_entry,
3039 handlep);
3040 if (kr != KERN_SUCCESS) {
3041 panic("_vm_commpage_init: could not allocate mem_entry");
3042 }
3043 new_map = vm_map_create(pmap_create_options(NULL, 0, 0), 0, size, PMAP_CREATE_64BIT);
3044 if (new_map == VM_MAP_NULL) {
3045 panic("_vm_commpage_init: could not allocate VM map");
3046 }
3047 mem_entry->backing.map = new_map;
3048 mem_entry->internal = TRUE;
3049 mem_entry->is_sub_map = TRUE;
3050 mem_entry->offset = 0;
3051 mem_entry->protection = VM_PROT_ALL;
3052 mem_entry->size = size;
3053
3054 SHARED_REGION_TRACE_DEBUG(
3055 ("commpage: _init(0x%llx) <- %p\n",
3056 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3057 }
3058 #endif
3059
3060
3061 /*
3062 * Initialize the comm text pages at boot time
3063 */
3064 void
3065 vm_commpage_text_init(void)
3066 {
3067 SHARED_REGION_TRACE_DEBUG(
3068 ("commpage text: ->init()\n"));
3069 #if defined(__i386__) || defined(__x86_64__)
3070 /* create the 32 bit comm text page */
3071 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3072 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3073 commpage_text32_entry = (vm_named_entry_t) ip_get_kobject(commpage_text32_handle);
3074 commpage_text32_map = commpage_text32_entry->backing.map;
3075 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3076 /* XXX if (cpu_is_64bit_capable()) ? */
3077 /* create the 64-bit comm page */
3078 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3079 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3080 commpage_text64_entry = (vm_named_entry_t) ip_get_kobject(commpage_text64_handle);
3081 commpage_text64_map = commpage_text64_entry->backing.map;
3082 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3083 #endif
3084
3085 commpage_text_populate();
3086
3087 /* populate the routines in here */
3088 SHARED_REGION_TRACE_DEBUG(
3089 ("commpage text: init() <-\n"));
3090 }
3091
3092 /*
3093 * Initialize the comm pages at boot time.
3094 */
3095 void
3096 vm_commpage_init(void)
3097 {
3098 SHARED_REGION_TRACE_DEBUG(
3099 ("commpage: -> init()\n"));
3100
3101 #if defined(__i386__) || defined(__x86_64__)
3102 /* create the 32-bit comm page */
3103 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3104 commpage32_entry = (vm_named_entry_t) ip_get_kobject(commpage32_handle);
3105 commpage32_map = commpage32_entry->backing.map;
3106
3107 /* XXX if (cpu_is_64bit_capable()) ? */
3108 /* create the 64-bit comm page */
3109 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3110 commpage64_entry = (vm_named_entry_t) ip_get_kobject(commpage64_handle);
3111 commpage64_map = commpage64_entry->backing.map;
3112
3113 #endif /* __i386__ || __x86_64__ */
3114
3115 /* populate them according to this specific platform */
3116 commpage_populate();
3117 __commpage_setup = 1;
3118 #if !CONFIG_EMBEDDED
3119 if (__system_power_source == 0) {
3120 post_sys_powersource_internal(0, 1);
3121 }
3122 #endif
3123
3124 SHARED_REGION_TRACE_DEBUG(
3125 ("commpage: init() <-\n"));
3126 }
3127
3128 /*
3129 * Enter the appropriate comm page into the task's address space.
3130 * This is called at exec() time via vm_map_exec().
3131 */
3132 kern_return_t
3133 vm_commpage_enter(
3134 vm_map_t map,
3135 task_t task,
3136 boolean_t is64bit)
3137 {
3138 #if defined(__arm__)
3139 #pragma unused(is64bit)
3140 (void)task;
3141 (void)map;
3142 return KERN_SUCCESS;
3143 #elif defined(__arm64__)
3144 #pragma unused(is64bit)
3145 (void)task;
3146 (void)map;
3147 pmap_insert_sharedpage(vm_map_pmap(map));
3148 return KERN_SUCCESS;
3149 #else
3150 ipc_port_t commpage_handle, commpage_text_handle;
3151 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3152 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3153 int vm_flags;
3154 vm_map_kernel_flags_t vmk_flags;
3155 kern_return_t kr;
3156
3157 SHARED_REGION_TRACE_DEBUG(
3158 ("commpage: -> enter(%p,%p)\n",
3159 (void *)VM_KERNEL_ADDRPERM(map),
3160 (void *)VM_KERNEL_ADDRPERM(task)));
3161
3162 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3163 /* the comm page is likely to be beyond the actual end of the VM map */
3164 vm_flags = VM_FLAGS_FIXED;
3165 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
3166 vmk_flags.vmkf_beyond_max = TRUE;
3167
3168 /* select the appropriate comm page for this task */
3169 assert(!(is64bit ^ vm_map_is_64bit(map)));
3170 if (is64bit) {
3171 commpage_handle = commpage64_handle;
3172 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3173 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3174 objc_size = _COMM_PAGE64_OBJC_SIZE;
3175 objc_address = _COMM_PAGE64_OBJC_BASE;
3176 commpage_text_handle = commpage_text64_handle;
3177 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3178 } else {
3179 commpage_handle = commpage32_handle;
3180 commpage_address =
3181 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3182 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3183 objc_size = _COMM_PAGE32_OBJC_SIZE;
3184 objc_address = _COMM_PAGE32_OBJC_BASE;
3185 commpage_text_handle = commpage_text32_handle;
3186 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3187 }
3188
3189 vm_tag_t tag = VM_KERN_MEMORY_NONE;
3190 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3191 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3192 /* the commpage is properly aligned or sized for pmap-nesting */
3193 tag = VM_MEMORY_SHARED_PMAP;
3194 vmk_flags.vmkf_nested_pmap = TRUE;
3195 }
3196 /* map the comm page in the task's address space */
3197 assert(commpage_handle != IPC_PORT_NULL);
3198 kr = vm_map_enter_mem_object(
3199 map,
3200 &commpage_address,
3201 commpage_size,
3202 0,
3203 vm_flags,
3204 vmk_flags,
3205 tag,
3206 commpage_handle,
3207 0,
3208 FALSE,
3209 VM_PROT_READ,
3210 VM_PROT_READ,
3211 VM_INHERIT_SHARE);
3212 if (kr != KERN_SUCCESS) {
3213 SHARED_REGION_TRACE_ERROR(
3214 ("commpage: enter(%p,0x%llx,0x%llx) "
3215 "commpage %p mapping failed 0x%x\n",
3216 (void *)VM_KERNEL_ADDRPERM(map),
3217 (long long)commpage_address,
3218 (long long)commpage_size,
3219 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3220 }
3221
3222 /* map the comm text page in the task's address space */
3223 assert(commpage_text_handle != IPC_PORT_NULL);
3224 kr = vm_map_enter_mem_object(
3225 map,
3226 &commpage_text_address,
3227 commpage_text_size,
3228 0,
3229 vm_flags,
3230 vmk_flags,
3231 tag,
3232 commpage_text_handle,
3233 0,
3234 FALSE,
3235 VM_PROT_READ | VM_PROT_EXECUTE,
3236 VM_PROT_READ | VM_PROT_EXECUTE,
3237 VM_INHERIT_SHARE);
3238 if (kr != KERN_SUCCESS) {
3239 SHARED_REGION_TRACE_ERROR(
3240 ("commpage text: enter(%p,0x%llx,0x%llx) "
3241 "commpage text %p mapping failed 0x%x\n",
3242 (void *)VM_KERNEL_ADDRPERM(map),
3243 (long long)commpage_text_address,
3244 (long long)commpage_text_size,
3245 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3246 }
3247
3248 /*
3249 * Since we're here, we also pre-allocate some virtual space for the
3250 * Objective-C run-time, if needed...
3251 */
3252 if (objc_size != 0) {
3253 kr = vm_map_enter_mem_object(
3254 map,
3255 &objc_address,
3256 objc_size,
3257 0,
3258 VM_FLAGS_FIXED,
3259 vmk_flags,
3260 tag,
3261 IPC_PORT_NULL,
3262 0,
3263 FALSE,
3264 VM_PROT_ALL,
3265 VM_PROT_ALL,
3266 VM_INHERIT_DEFAULT);
3267 if (kr != KERN_SUCCESS) {
3268 SHARED_REGION_TRACE_ERROR(
3269 ("commpage: enter(%p,0x%llx,0x%llx) "
3270 "objc mapping failed 0x%x\n",
3271 (void *)VM_KERNEL_ADDRPERM(map),
3272 (long long)objc_address,
3273 (long long)objc_size, kr));
3274 }
3275 }
3276
3277 SHARED_REGION_TRACE_DEBUG(
3278 ("commpage: enter(%p,%p) <- 0x%x\n",
3279 (void *)VM_KERNEL_ADDRPERM(map),
3280 (void *)VM_KERNEL_ADDRPERM(task), kr));
3281 return kr;
3282 #endif
3283 }
3284
3285 int
3286 vm_shared_region_slide(
3287 uint32_t slide,
3288 mach_vm_offset_t entry_start_address,
3289 mach_vm_size_t entry_size,
3290 mach_vm_offset_t slide_start,
3291 mach_vm_size_t slide_size,
3292 mach_vm_offset_t slid_mapping,
3293 memory_object_control_t sr_file_control,
3294 vm_prot_t prot)
3295 {
3296 vm_shared_region_t sr;
3297 kern_return_t error;
3298
3299 SHARED_REGION_TRACE_DEBUG(
3300 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3301 slide, entry_start_address, entry_size, slide_start, slide_size));
3302
3303 sr = vm_shared_region_get(current_task());
3304 if (sr == NULL) {
3305 printf("%s: no shared region?\n", __FUNCTION__);
3306 SHARED_REGION_TRACE_DEBUG(
3307 ("vm_shared_region_slide: <- %d (no shared region)\n",
3308 KERN_FAILURE));
3309 return KERN_FAILURE;
3310 }
3311
3312 /*
3313 * Protect from concurrent access.
3314 */
3315 vm_shared_region_lock();
3316 while (sr->sr_slide_in_progress) {
3317 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3318 }
3319
3320 sr->sr_slide_in_progress = TRUE;
3321 vm_shared_region_unlock();
3322
3323 error = vm_shared_region_slide_mapping(sr,
3324 (user_addr_t)slide_start,
3325 slide_size,
3326 entry_start_address,
3327 entry_size,
3328 slid_mapping,
3329 slide,
3330 sr_file_control,
3331 prot);
3332 if (error) {
3333 printf("slide_info initialization failed with kr=%d\n", error);
3334 }
3335
3336 vm_shared_region_lock();
3337
3338 assert(sr->sr_slide_in_progress);
3339 sr->sr_slide_in_progress = FALSE;
3340 thread_wakeup(&sr->sr_slide_in_progress);
3341
3342 #ifndef CONFIG_EMBEDDED
3343 if (error == KERN_SUCCESS) {
3344 shared_region_completed_slide = TRUE;
3345 }
3346 #endif
3347 vm_shared_region_unlock();
3348
3349 vm_shared_region_deallocate(sr);
3350
3351 SHARED_REGION_TRACE_DEBUG(
3352 ("vm_shared_region_slide: <- %d\n",
3353 error));
3354
3355 return error;
3356 }
3357
3358 /*
3359 * Used during Authenticated Root Volume macOS boot.
3360 * Launchd re-execs itself and wants the new launchd to use
3361 * the shared cache from the new root volume. This call
3362 * makes all the existing shared caches stale to allow
3363 * that to happen.
3364 */
3365 void
3366 vm_shared_region_pivot(void)
3367 {
3368 vm_shared_region_t shared_region = NULL;
3369
3370 vm_shared_region_lock();
3371
3372 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3373 assert(shared_region->sr_ref_count > 0);
3374 shared_region->sr_stale = TRUE;
3375 if (shared_region->sr_timer_call) {
3376 /*
3377 * We have a shared region ready to be destroyed
3378 * and just waiting for a delayed timer to fire.
3379 * Marking it stale cements its ineligibility to
3380 * be used ever again. So let's shorten the timer
3381 * aggressively down to 10 milliseconds and get rid of it.
3382 * This is a single quantum and we don't need to go
3383 * shorter than this duration. We want it to be short
3384 * enough, however, because we could have an unmount
3385 * of the volume hosting this shared region just behind
3386 * us.
3387 */
3388 uint64_t deadline;
3389 assert(shared_region->sr_ref_count == 1);
3390
3391 /*
3392 * Free the old timer call. Returns with a reference held.
3393 * If the old timer has fired and is waiting for the vm_shared_region_lock
3394 * lock, we will just return with an additional ref_count i.e. 2.
3395 * The old timer will then fire and just drop the ref count down to 1
3396 * with no other modifications.
3397 */
3398 vm_shared_region_reference_locked(shared_region);
3399
3400 /* set up the timer. Keep the reference from above for this timer.*/
3401 shared_region->sr_timer_call = thread_call_allocate(
3402 (thread_call_func_t) vm_shared_region_timeout,
3403 (thread_call_param_t) shared_region);
3404
3405 /* schedule the timer */
3406 clock_interval_to_deadline(10, /* 10 milliseconds */
3407 NSEC_PER_MSEC,
3408 &deadline);
3409 thread_call_enter_delayed(shared_region->sr_timer_call,
3410 deadline);
3411
3412 SHARED_REGION_TRACE_DEBUG(
3413 ("shared_region: pivot(%p): armed timer\n",
3414 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3415 }
3416 }
3417
3418 vm_shared_region_unlock();
3419 }
3420
3421 /*
3422 * Routine to mark any non-standard slide shared cache region as stale.
3423 * This causes the next "reslide" spawn to create a new shared region.
3424 */
3425 void
3426 vm_shared_region_reslide_stale(void)
3427 {
3428 #if __has_feature(ptrauth_calls)
3429 vm_shared_region_t shared_region = NULL;
3430
3431 vm_shared_region_lock();
3432
3433 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3434 assert(shared_region->sr_ref_count > 0);
3435 if (!shared_region->sr_stale && shared_region->sr_reslide) {
3436 shared_region->sr_stale = TRUE;
3437 vm_shared_region_reslide_count++;
3438 }
3439 }
3440
3441 vm_shared_region_unlock();
3442 #endif /* __has_feature(ptrauth_calls) */
3443 }
3444
3445 /*
3446 * report if the task is using a reslide shared cache region.
3447 */
3448 bool
3449 vm_shared_region_is_reslide(__unused struct task *task)
3450 {
3451 bool is_reslide = FALSE;
3452 #if !XNU_TARGET_OS_OSX && __has_feature(ptrauth_calls)
3453 vm_shared_region_t sr = vm_shared_region_get(task);
3454
3455 if (sr != NULL) {
3456 is_reslide = sr->sr_reslide;
3457 vm_shared_region_deallocate(sr);
3458 }
3459 #endif /* !XNU_TARGET_OS_OSX && __has_feature(ptrauth_calls) */
3460 return is_reslide;
3461 }
3462
3463 /*
3464 * This is called from powermanagement code to let kernel know the current source of power.
3465 * 0 if it is external source (connected to power )
3466 * 1 if it is internal power source ie battery
3467 */
3468 void
3469 #if !CONFIG_EMBEDDED
3470 post_sys_powersource(int i)
3471 #else
3472 post_sys_powersource(__unused int i)
3473 #endif
3474 {
3475 #if !CONFIG_EMBEDDED
3476 post_sys_powersource_internal(i, 0);
3477 #endif
3478 }
3479
3480
3481 #if !CONFIG_EMBEDDED
3482 static void
3483 post_sys_powersource_internal(int i, int internal)
3484 {
3485 if (internal == 0) {
3486 __system_power_source = i;
3487 }
3488 }
3489 #endif