]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/dir.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
64 #include <sys/vm.h>
65 #include <sys/file.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
71 #include <sys/user.h>
72 #include <sys/syslog.h>
73 #include <sys/stat.h>
74 #include <sys/sysproto.h>
75 #include <sys/mman.h>
76 #include <sys/sysctl.h>
77 #include <sys/cprotect.h>
78 #include <sys/kpi_socket.h>
79 #include <sys/kas_info.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #if NECP
83 #include <net/necp.h>
84 #endif /* NECP */
85
86 #include <security/audit/audit.h>
87 #include <security/mac.h>
88 #include <bsm/audit_kevents.h>
89
90 #include <kern/kalloc.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_kern.h>
93 #include <vm/vm_pageout.h>
94
95 #include <mach/shared_region.h>
96 #include <vm/vm_shared_region.h>
97
98 #include <vm/vm_protos.h>
99
100 #include <sys/kern_memorystatus.h>
101 #include <sys/kern_memorystatus_freeze.h>
102
103 #if CONFIG_MACF
104 #include <security/mac_framework.h>
105 #endif
106
107 #if CONFIG_CSR
108 #include <sys/csr.h>
109 #endif /* CONFIG_CSR */
110 #include <IOKit/IOBSD.h>
111
112 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
113 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
114
115 #if VM_MAP_DEBUG_APPLE_PROTECT
116 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
117 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
118
119 #if VM_MAP_DEBUG_FOURK
120 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
121 #endif /* VM_MAP_DEBUG_FOURK */
122
123 #if DEVELOPMENT || DEBUG
124
125 static int
126 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
127 {
128 #pragma unused(arg1, arg2)
129 vm_offset_t kaddr;
130 kern_return_t kr;
131 int error = 0;
132 int size = 0;
133
134 error = sysctl_handle_int(oidp, &size, 0, req);
135 if (error || !req->newptr) {
136 return error;
137 }
138
139 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT);
140
141 if (kr == KERN_SUCCESS) {
142 kmem_free(kernel_map, kaddr, size);
143 }
144
145 return error;
146 }
147
148 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
149 0, 0, &sysctl_kmem_alloc_contig, "I", "");
150
151 extern int vm_region_footprint;
152 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
153 static int
154 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
155 {
156 #pragma unused(arg1, arg2, oidp)
157 int error = 0;
158 int value;
159
160 value = task_self_region_footprint();
161 error = SYSCTL_OUT(req, &value, sizeof(int));
162 if (error) {
163 return error;
164 }
165
166 if (!req->newptr) {
167 return 0;
168 }
169
170 error = SYSCTL_IN(req, &value, sizeof(int));
171 if (error) {
172 return error;
173 }
174 task_self_region_footprint_set(value);
175 return 0;
176 }
177 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
178
179 #endif /* DEVELOPMENT || DEBUG */
180
181
182 #if CONFIG_EMBEDDED
183
184 #if DEVELOPMENT || DEBUG
185 extern int panic_on_unsigned_execute;
186 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
187 #endif /* DEVELOPMENT || DEBUG */
188
189 extern int log_executable_mem_entry;
190 extern int cs_executable_create_upl;
191 extern int cs_executable_mem_entry;
192 extern int cs_executable_wire;
193 SYSCTL_INT(_vm, OID_AUTO, log_executable_mem_entry, CTLFLAG_RD | CTLFLAG_LOCKED, &log_executable_mem_entry, 0, "");
194 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
195 SYSCTL_INT(_vm, OID_AUTO, cs_executable_mem_entry, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_mem_entry, 0, "");
196 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
197 #endif /* CONFIG_EMBEDDED */
198
199 #if DEVELOPMENT || DEBUG
200 extern int radar_20146450;
201 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
202
203 extern int macho_printf;
204 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
205
206 extern int apple_protect_pager_data_request_debug;
207 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
208
209 #if __arm__ || __arm64__
210 /* These are meant to support the page table accounting unit test. */
211 extern unsigned int arm_hardware_page_size;
212 extern unsigned int arm_pt_desc_size;
213 extern unsigned int arm_pt_root_size;
214 extern unsigned int free_page_size_tt_count;
215 extern unsigned int free_two_page_size_tt_count;
216 extern unsigned int free_tt_count;
217 extern unsigned int inuse_user_tteroot_count;
218 extern unsigned int inuse_kernel_tteroot_count;
219 extern unsigned int inuse_user_ttepages_count;
220 extern unsigned int inuse_kernel_ttepages_count;
221 extern unsigned int inuse_user_ptepages_count;
222 extern unsigned int inuse_kernel_ptepages_count;
223 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
224 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
225 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
226 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
227 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
228 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
229 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
230 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
231 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
232 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
233 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
234 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
235 #if DEVELOPMENT || DEBUG
236 extern unsigned long pmap_asid_flushes;
237 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
238 #endif
239 #endif /* __arm__ || __arm64__ */
240
241 #if __arm64__
242 extern int fourk_pager_data_request_debug;
243 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
244 #endif /* __arm64__ */
245 #endif /* DEVELOPMENT || DEBUG */
246
247 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
248 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
249 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
250 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
251 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
252 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
253 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
254 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
255 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
256 #if VM_SCAN_FOR_SHADOW_CHAIN
257 static int vm_shadow_max_enabled = 0; /* Disabled by default */
258 extern int proc_shadow_max(void);
259 static int
260 vm_shadow_max SYSCTL_HANDLER_ARGS
261 {
262 #pragma unused(arg1, arg2, oidp)
263 int value = 0;
264
265 if (vm_shadow_max_enabled) {
266 value = proc_shadow_max();
267 }
268
269 return SYSCTL_OUT(req, &value, sizeof(value));
270 }
271 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
272 0, 0, &vm_shadow_max, "I", "");
273
274 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
275
276 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
277
278 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
279
280 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
281 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
282 /*
283 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
284 */
285
286 #if DEVELOPMENT || DEBUG
287 extern int allow_stack_exec, allow_data_exec;
288
289 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
290 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
291
292 #endif /* DEVELOPMENT || DEBUG */
293
294 static const char *prot_values[] = {
295 "none",
296 "read-only",
297 "write-only",
298 "read-write",
299 "execute-only",
300 "read-execute",
301 "write-execute",
302 "read-write-execute"
303 };
304
305 void
306 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
307 {
308 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
309 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
310 }
311
312 /*
313 * shared_region_unnest_logging: level of logging of unnesting events
314 * 0 - no logging
315 * 1 - throttled logging of unexpected unnesting events (default)
316 * 2 - unthrottled logging of unexpected unnesting events
317 * 3+ - unthrottled logging of all unnesting events
318 */
319 int shared_region_unnest_logging = 1;
320
321 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
322 &shared_region_unnest_logging, 0, "");
323
324 int vm_shared_region_unnest_log_interval = 10;
325 int shared_region_unnest_log_count_threshold = 5;
326
327 /*
328 * Shared cache path enforcement.
329 */
330
331 #ifndef CONFIG_EMBEDDED
332 static int scdir_enforce = 1;
333 static char scdir_path[] = "/var/db/dyld/";
334 #else
335 static int scdir_enforce = 0;
336 static char scdir_path[] = "/System/Library/Caches/com.apple.dyld/";
337 #endif
338
339 #ifndef SECURE_KERNEL
340 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
341 {
342 #if CONFIG_CSR
343 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
344 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
345 return EPERM;
346 }
347 #endif /* CONFIG_CSR */
348 return sysctl_handle_int(oidp, arg1, arg2, req);
349 }
350
351 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
352 #endif
353
354 /* These log rate throttling state variables aren't thread safe, but
355 * are sufficient unto the task.
356 */
357 static int64_t last_unnest_log_time = 0;
358 static int shared_region_unnest_log_count = 0;
359
360 void
361 log_unnest_badness(
362 vm_map_t m,
363 vm_map_offset_t s,
364 vm_map_offset_t e,
365 boolean_t is_nested_map,
366 vm_map_offset_t lowest_unnestable_addr)
367 {
368 struct timeval tv;
369
370 if (shared_region_unnest_logging == 0) {
371 return;
372 }
373
374 if (shared_region_unnest_logging <= 2 &&
375 is_nested_map &&
376 s >= lowest_unnestable_addr) {
377 /*
378 * Unnesting of writable map entries is fine.
379 */
380 return;
381 }
382
383 if (shared_region_unnest_logging <= 1) {
384 microtime(&tv);
385 if ((tv.tv_sec - last_unnest_log_time) <
386 vm_shared_region_unnest_log_interval) {
387 if (shared_region_unnest_log_count++ >
388 shared_region_unnest_log_count_threshold) {
389 return;
390 }
391 } else {
392 last_unnest_log_time = tv.tv_sec;
393 shared_region_unnest_log_count = 0;
394 }
395 }
396
397 DTRACE_VM4(log_unnest_badness,
398 vm_map_t, m,
399 vm_map_offset_t, s,
400 vm_map_offset_t, e,
401 vm_map_offset_t, lowest_unnestable_addr);
402 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
403 }
404
405 int
406 useracc(
407 user_addr_t addr,
408 user_size_t len,
409 int prot)
410 {
411 vm_map_t map;
412
413 map = current_map();
414 return vm_map_check_protection(
415 map,
416 vm_map_trunc_page(addr,
417 vm_map_page_mask(map)),
418 vm_map_round_page(addr + len,
419 vm_map_page_mask(map)),
420 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
421 }
422
423 int
424 vslock(
425 user_addr_t addr,
426 user_size_t len)
427 {
428 kern_return_t kret;
429 vm_map_t map;
430
431 map = current_map();
432 kret = vm_map_wire_kernel(map,
433 vm_map_trunc_page(addr,
434 vm_map_page_mask(map)),
435 vm_map_round_page(addr + len,
436 vm_map_page_mask(map)),
437 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
438 FALSE);
439
440 switch (kret) {
441 case KERN_SUCCESS:
442 return 0;
443 case KERN_INVALID_ADDRESS:
444 case KERN_NO_SPACE:
445 return ENOMEM;
446 case KERN_PROTECTION_FAILURE:
447 return EACCES;
448 default:
449 return EINVAL;
450 }
451 }
452
453 int
454 vsunlock(
455 user_addr_t addr,
456 user_size_t len,
457 __unused int dirtied)
458 {
459 #if FIXME /* [ */
460 pmap_t pmap;
461 vm_page_t pg;
462 vm_map_offset_t vaddr;
463 ppnum_t paddr;
464 #endif /* FIXME ] */
465 kern_return_t kret;
466 vm_map_t map;
467
468 map = current_map();
469
470 #if FIXME /* [ */
471 if (dirtied) {
472 pmap = get_task_pmap(current_task());
473 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
474 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
475 vaddr += PAGE_SIZE) {
476 paddr = pmap_extract(pmap, vaddr);
477 pg = PHYS_TO_VM_PAGE(paddr);
478 vm_page_set_modified(pg);
479 }
480 }
481 #endif /* FIXME ] */
482 #ifdef lint
483 dirtied++;
484 #endif /* lint */
485 kret = vm_map_unwire(map,
486 vm_map_trunc_page(addr,
487 vm_map_page_mask(map)),
488 vm_map_round_page(addr + len,
489 vm_map_page_mask(map)),
490 FALSE);
491 switch (kret) {
492 case KERN_SUCCESS:
493 return 0;
494 case KERN_INVALID_ADDRESS:
495 case KERN_NO_SPACE:
496 return ENOMEM;
497 case KERN_PROTECTION_FAILURE:
498 return EACCES;
499 default:
500 return EINVAL;
501 }
502 }
503
504 int
505 subyte(
506 user_addr_t addr,
507 int byte)
508 {
509 char character;
510
511 character = (char)byte;
512 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
513 }
514
515 int
516 suibyte(
517 user_addr_t addr,
518 int byte)
519 {
520 char character;
521
522 character = (char)byte;
523 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
524 }
525
526 int
527 fubyte(user_addr_t addr)
528 {
529 unsigned char byte;
530
531 if (copyin(addr, (void *) &byte, sizeof(char))) {
532 return -1;
533 }
534 return byte;
535 }
536
537 int
538 fuibyte(user_addr_t addr)
539 {
540 unsigned char byte;
541
542 if (copyin(addr, (void *) &(byte), sizeof(char))) {
543 return -1;
544 }
545 return byte;
546 }
547
548 int
549 suword(
550 user_addr_t addr,
551 long word)
552 {
553 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
554 }
555
556 long
557 fuword(user_addr_t addr)
558 {
559 long word = 0;
560
561 if (copyin(addr, (void *) &word, sizeof(int))) {
562 return -1;
563 }
564 return word;
565 }
566
567 /* suiword and fuiword are the same as suword and fuword, respectively */
568
569 int
570 suiword(
571 user_addr_t addr,
572 long word)
573 {
574 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
575 }
576
577 long
578 fuiword(user_addr_t addr)
579 {
580 long word = 0;
581
582 if (copyin(addr, (void *) &word, sizeof(int))) {
583 return -1;
584 }
585 return word;
586 }
587
588 /*
589 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
590 * fetching and setting of process-sized size_t and pointer values.
591 */
592 int
593 sulong(user_addr_t addr, int64_t word)
594 {
595 if (IS_64BIT_PROCESS(current_proc())) {
596 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
597 } else {
598 return suiword(addr, (long)word);
599 }
600 }
601
602 int64_t
603 fulong(user_addr_t addr)
604 {
605 int64_t longword;
606
607 if (IS_64BIT_PROCESS(current_proc())) {
608 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
609 return -1;
610 }
611 return longword;
612 } else {
613 return (int64_t)fuiword(addr);
614 }
615 }
616
617 int
618 suulong(user_addr_t addr, uint64_t uword)
619 {
620 if (IS_64BIT_PROCESS(current_proc())) {
621 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
622 } else {
623 return suiword(addr, (uint32_t)uword);
624 }
625 }
626
627 uint64_t
628 fuulong(user_addr_t addr)
629 {
630 uint64_t ulongword;
631
632 if (IS_64BIT_PROCESS(current_proc())) {
633 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
634 return -1ULL;
635 }
636 return ulongword;
637 } else {
638 return (uint64_t)fuiword(addr);
639 }
640 }
641
642 int
643 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
644 {
645 return ENOTSUP;
646 }
647
648 /*
649 * pid_for_task
650 *
651 * Find the BSD process ID for the Mach task associated with the given Mach port
652 * name
653 *
654 * Parameters: args User argument descriptor (see below)
655 *
656 * Indirect parameters: args->t Mach port name
657 * args->pid Process ID (returned value; see below)
658 *
659 * Returns: KERL_SUCCESS Success
660 * KERN_FAILURE Not success
661 *
662 * Implicit returns: args->pid Process ID
663 *
664 */
665 kern_return_t
666 pid_for_task(
667 struct pid_for_task_args *args)
668 {
669 mach_port_name_t t = args->t;
670 user_addr_t pid_addr = args->pid;
671 proc_t p;
672 task_t t1;
673 int pid = -1;
674 kern_return_t err = KERN_SUCCESS;
675
676 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
677 AUDIT_ARG(mach_port1, t);
678
679 t1 = port_name_to_task_inspect(t);
680
681 if (t1 == TASK_NULL) {
682 err = KERN_FAILURE;
683 goto pftout;
684 } else {
685 p = get_bsdtask_info(t1);
686 if (p) {
687 pid = proc_pid(p);
688 err = KERN_SUCCESS;
689 } else if (is_corpsetask(t1)) {
690 pid = task_pid(t1);
691 err = KERN_SUCCESS;
692 } else {
693 err = KERN_FAILURE;
694 }
695 }
696 task_deallocate(t1);
697 pftout:
698 AUDIT_ARG(pid, pid);
699 (void) copyout((char *) &pid, pid_addr, sizeof(int));
700 AUDIT_MACH_SYSCALL_EXIT(err);
701 return err;
702 }
703
704 /*
705 *
706 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
707 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
708 *
709 */
710 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
711
712 /*
713 * Routine: task_for_pid_posix_check
714 * Purpose:
715 * Verify that the current process should be allowed to
716 * get the target process's task port. This is only
717 * permitted if:
718 * - The current process is root
719 * OR all of the following are true:
720 * - The target process's real, effective, and saved uids
721 * are the same as the current proc's euid,
722 * - The target process's group set is a subset of the
723 * calling process's group set, and
724 * - The target process hasn't switched credentials.
725 *
726 * Returns: TRUE: permitted
727 * FALSE: denied
728 */
729 static int
730 task_for_pid_posix_check(proc_t target)
731 {
732 kauth_cred_t targetcred, mycred;
733 uid_t myuid;
734 int allowed;
735
736 /* No task_for_pid on bad targets */
737 if (target->p_stat == SZOMB) {
738 return FALSE;
739 }
740
741 mycred = kauth_cred_get();
742 myuid = kauth_cred_getuid(mycred);
743
744 /* If we're running as root, the check passes */
745 if (kauth_cred_issuser(mycred)) {
746 return TRUE;
747 }
748
749 /* We're allowed to get our own task port */
750 if (target == current_proc()) {
751 return TRUE;
752 }
753
754 /*
755 * Under DENY, only root can get another proc's task port,
756 * so no more checks are needed.
757 */
758 if (tfp_policy == KERN_TFP_POLICY_DENY) {
759 return FALSE;
760 }
761
762 targetcred = kauth_cred_proc_ref(target);
763 allowed = TRUE;
764
765 /* Do target's ruid, euid, and saved uid match my euid? */
766 if ((kauth_cred_getuid(targetcred) != myuid) ||
767 (kauth_cred_getruid(targetcred) != myuid) ||
768 (kauth_cred_getsvuid(targetcred) != myuid)) {
769 allowed = FALSE;
770 goto out;
771 }
772
773 /* Are target's groups a subset of my groups? */
774 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
775 allowed == 0) {
776 allowed = FALSE;
777 goto out;
778 }
779
780 /* Has target switched credentials? */
781 if (target->p_flag & P_SUGID) {
782 allowed = FALSE;
783 goto out;
784 }
785
786 out:
787 kauth_cred_unref(&targetcred);
788 return allowed;
789 }
790
791 /*
792 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
793 *
794 * Description: Waits for the user space daemon to respond to the request
795 * we made. Function declared non inline to be visible in
796 * stackshots and spindumps as well as debugging.
797 */
798 __attribute__((noinline)) int
799 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
800 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
801 {
802 return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
803 }
804
805 /*
806 * Routine: task_for_pid
807 * Purpose:
808 * Get the task port for another "process", named by its
809 * process ID on the same host as "target_task".
810 *
811 * Only permitted to privileged processes, or processes
812 * with the same user ID.
813 *
814 * Note: if pid == 0, an error is return no matter who is calling.
815 *
816 * XXX This should be a BSD system call, not a Mach trap!!!
817 */
818 kern_return_t
819 task_for_pid(
820 struct task_for_pid_args *args)
821 {
822 mach_port_name_t target_tport = args->target_tport;
823 int pid = args->pid;
824 user_addr_t task_addr = args->t;
825 proc_t p = PROC_NULL;
826 task_t t1 = TASK_NULL;
827 task_t task = TASK_NULL;
828 mach_port_name_t tret = MACH_PORT_NULL;
829 ipc_port_t tfpport = MACH_PORT_NULL;
830 void * sright;
831 int error = 0;
832
833 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
834 AUDIT_ARG(pid, pid);
835 AUDIT_ARG(mach_port1, target_tport);
836
837 /* Always check if pid == 0 */
838 if (pid == 0) {
839 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
840 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
841 return KERN_FAILURE;
842 }
843
844 t1 = port_name_to_task(target_tport);
845 if (t1 == TASK_NULL) {
846 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
847 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
848 return KERN_FAILURE;
849 }
850
851
852 p = proc_find(pid);
853 if (p == PROC_NULL) {
854 error = KERN_FAILURE;
855 goto tfpout;
856 }
857
858 #if CONFIG_AUDIT
859 AUDIT_ARG(process, p);
860 #endif
861
862 if (!(task_for_pid_posix_check(p))) {
863 error = KERN_FAILURE;
864 goto tfpout;
865 }
866
867 if (p->task == TASK_NULL) {
868 error = KERN_SUCCESS;
869 goto tfpout;
870 }
871
872 #if CONFIG_MACF
873 error = mac_proc_check_get_task(kauth_cred_get(), p);
874 if (error) {
875 error = KERN_FAILURE;
876 goto tfpout;
877 }
878 #endif
879
880 /* Grab a task reference since the proc ref might be dropped if an upcall to task access server is made */
881 task = p->task;
882 task_reference(task);
883
884 /* If we aren't root and target's task access port is set... */
885 if (!kauth_cred_issuser(kauth_cred_get()) &&
886 p != current_proc() &&
887 (task_get_task_access_port(task, &tfpport) == 0) &&
888 (tfpport != IPC_PORT_NULL)) {
889 if (tfpport == IPC_PORT_DEAD) {
890 error = KERN_PROTECTION_FAILURE;
891 goto tfpout;
892 }
893
894 /*
895 * Drop the proc_find proc ref before making an upcall
896 * to taskgated, since holding a proc_find
897 * ref while making an upcall can cause deadlock.
898 */
899 proc_rele(p);
900 p = PROC_NULL;
901
902 /* Call up to the task access server */
903 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
904
905 if (error != MACH_MSG_SUCCESS) {
906 if (error == MACH_RCV_INTERRUPTED) {
907 error = KERN_ABORTED;
908 } else {
909 error = KERN_FAILURE;
910 }
911 goto tfpout;
912 }
913 }
914
915 /* Grant task port access */
916 extmod_statistics_incr_task_for_pid(task);
917 sright = (void *) convert_task_to_port(task);
918
919 /* Check if the task has been corpsified */
920 if (is_corpsetask(task)) {
921 /* task ref consumed by convert_task_to_port */
922 task = TASK_NULL;
923 ipc_port_release_send(sright);
924 error = KERN_FAILURE;
925 goto tfpout;
926 }
927
928 /* task ref consumed by convert_task_to_port */
929 task = TASK_NULL;
930 tret = ipc_port_copyout_send(
931 sright,
932 get_task_ipcspace(current_task()));
933
934 error = KERN_SUCCESS;
935
936 tfpout:
937 task_deallocate(t1);
938 AUDIT_ARG(mach_port2, tret);
939 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
940
941 if (tfpport != IPC_PORT_NULL) {
942 ipc_port_release_send(tfpport);
943 }
944 if (task != TASK_NULL) {
945 task_deallocate(task);
946 }
947 if (p != PROC_NULL) {
948 proc_rele(p);
949 }
950 AUDIT_MACH_SYSCALL_EXIT(error);
951 return error;
952 }
953
954 /*
955 * Routine: task_name_for_pid
956 * Purpose:
957 * Get the task name port for another "process", named by its
958 * process ID on the same host as "target_task".
959 *
960 * Only permitted to privileged processes, or processes
961 * with the same user ID.
962 *
963 * XXX This should be a BSD system call, not a Mach trap!!!
964 */
965
966 kern_return_t
967 task_name_for_pid(
968 struct task_name_for_pid_args *args)
969 {
970 mach_port_name_t target_tport = args->target_tport;
971 int pid = args->pid;
972 user_addr_t task_addr = args->t;
973 proc_t p = PROC_NULL;
974 task_t t1;
975 mach_port_name_t tret;
976 void * sright;
977 int error = 0, refheld = 0;
978 kauth_cred_t target_cred;
979
980 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
981 AUDIT_ARG(pid, pid);
982 AUDIT_ARG(mach_port1, target_tport);
983
984 t1 = port_name_to_task(target_tport);
985 if (t1 == TASK_NULL) {
986 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
987 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
988 return KERN_FAILURE;
989 }
990
991 p = proc_find(pid);
992 if (p != PROC_NULL) {
993 AUDIT_ARG(process, p);
994 target_cred = kauth_cred_proc_ref(p);
995 refheld = 1;
996
997 if ((p->p_stat != SZOMB)
998 && ((current_proc() == p)
999 || kauth_cred_issuser(kauth_cred_get())
1000 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1001 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
1002 if (p->task != TASK_NULL) {
1003 task_reference(p->task);
1004 #if CONFIG_MACF
1005 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
1006 if (error) {
1007 task_deallocate(p->task);
1008 goto noperm;
1009 }
1010 #endif
1011 sright = (void *)convert_task_name_to_port(p->task);
1012 tret = ipc_port_copyout_send(sright,
1013 get_task_ipcspace(current_task()));
1014 } else {
1015 tret = MACH_PORT_NULL;
1016 }
1017
1018 AUDIT_ARG(mach_port2, tret);
1019 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1020 task_deallocate(t1);
1021 error = KERN_SUCCESS;
1022 goto tnfpout;
1023 }
1024 }
1025
1026 #if CONFIG_MACF
1027 noperm:
1028 #endif
1029 task_deallocate(t1);
1030 tret = MACH_PORT_NULL;
1031 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1032 error = KERN_FAILURE;
1033 tnfpout:
1034 if (refheld != 0) {
1035 kauth_cred_unref(&target_cred);
1036 }
1037 if (p != PROC_NULL) {
1038 proc_rele(p);
1039 }
1040 AUDIT_MACH_SYSCALL_EXIT(error);
1041 return error;
1042 }
1043
1044 kern_return_t
1045 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1046 {
1047 task_t target = NULL;
1048 proc_t targetproc = PROC_NULL;
1049 int pid = args->pid;
1050 int error = 0;
1051 mach_port_t tfpport = MACH_PORT_NULL;
1052
1053 #if CONFIG_MACF
1054 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
1055 if (error) {
1056 error = EPERM;
1057 goto out;
1058 }
1059 #endif
1060
1061 if (pid == 0) {
1062 error = EPERM;
1063 goto out;
1064 }
1065
1066 targetproc = proc_find(pid);
1067 if (targetproc == PROC_NULL) {
1068 error = ESRCH;
1069 goto out;
1070 }
1071
1072 if (!task_for_pid_posix_check(targetproc) &&
1073 !IOTaskHasEntitlement(current_task(), PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1074 error = EPERM;
1075 goto out;
1076 }
1077
1078 target = targetproc->task;
1079 #ifndef CONFIG_EMBEDDED
1080 if (target != TASK_NULL) {
1081 /* If we aren't root and target's task access port is set... */
1082 if (!kauth_cred_issuser(kauth_cred_get()) &&
1083 targetproc != current_proc() &&
1084 (task_get_task_access_port(target, &tfpport) == 0) &&
1085 (tfpport != IPC_PORT_NULL)) {
1086 if (tfpport == IPC_PORT_DEAD) {
1087 error = EACCES;
1088 goto out;
1089 }
1090
1091 /* Call up to the task access server */
1092 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
1093
1094 if (error != MACH_MSG_SUCCESS) {
1095 if (error == MACH_RCV_INTERRUPTED) {
1096 error = EINTR;
1097 } else {
1098 error = EPERM;
1099 }
1100 goto out;
1101 }
1102 }
1103 }
1104 #endif
1105
1106 task_reference(target);
1107 error = task_pidsuspend(target);
1108 if (error) {
1109 if (error == KERN_INVALID_ARGUMENT) {
1110 error = EINVAL;
1111 } else {
1112 error = EPERM;
1113 }
1114 }
1115 #if CONFIG_MEMORYSTATUS
1116 else {
1117 memorystatus_on_suspend(targetproc);
1118 }
1119 #endif
1120
1121 task_deallocate(target);
1122
1123 out:
1124 if (tfpport != IPC_PORT_NULL) {
1125 ipc_port_release_send(tfpport);
1126 }
1127
1128 if (targetproc != PROC_NULL) {
1129 proc_rele(targetproc);
1130 }
1131 *ret = error;
1132 return error;
1133 }
1134
1135 kern_return_t
1136 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1137 {
1138 mach_port_name_t target_tport = args->target_tport;
1139 int pid = args->pid;
1140 user_addr_t task_addr = args->t;
1141 proc_t p = PROC_NULL;
1142 task_t t1 = TASK_NULL;
1143 task_t task = TASK_NULL;
1144 mach_port_name_t tret = MACH_PORT_NULL;
1145 ipc_port_t tfpport = MACH_PORT_NULL;
1146 ipc_port_t sright = NULL;
1147 int error = 0;
1148
1149
1150 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1151 AUDIT_ARG(pid, pid);
1152 AUDIT_ARG(mach_port1, target_tport);
1153
1154 /* Always check if pid == 0 */
1155 if (pid == 0) {
1156 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
1157 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1158 return KERN_FAILURE;
1159 }
1160
1161 t1 = port_name_to_task(target_tport);
1162 if (t1 == TASK_NULL) {
1163 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
1164 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1165 return KERN_FAILURE;
1166 }
1167
1168
1169 p = proc_find(pid);
1170 if (p == PROC_NULL) {
1171 error = KERN_FAILURE;
1172 goto tfpout;
1173 }
1174
1175 #if CONFIG_AUDIT
1176 AUDIT_ARG(process, p);
1177 #endif
1178
1179 if (!(task_for_pid_posix_check(p))) {
1180 error = KERN_FAILURE;
1181 goto tfpout;
1182 }
1183
1184 if (p->task == TASK_NULL) {
1185 error = KERN_SUCCESS;
1186 goto tfpout;
1187 }
1188
1189 /* Grab a task reference since the proc ref might be dropped if an upcall to task access server is made */
1190 task = p->task;
1191 task_reference(task);
1192
1193
1194 if (!IOTaskHasEntitlement(current_task(), DEBUG_PORT_ENTITLEMENT)) {
1195 #if CONFIG_MACF
1196 error = mac_proc_check_get_task(kauth_cred_get(), p);
1197 if (error) {
1198 error = KERN_FAILURE;
1199 goto tfpout;
1200 }
1201 #endif
1202
1203 /* If we aren't root and target's task access port is set... */
1204 if (!kauth_cred_issuser(kauth_cred_get()) &&
1205 p != current_proc() &&
1206 (task_get_task_access_port(task, &tfpport) == 0) &&
1207 (tfpport != IPC_PORT_NULL)) {
1208 if (tfpport == IPC_PORT_DEAD) {
1209 error = KERN_PROTECTION_FAILURE;
1210 goto tfpout;
1211 }
1212
1213 /*
1214 * Drop the proc_find proc ref before making an upcall
1215 * to taskgated, since holding a proc_find
1216 * ref while making an upcall can cause deadlock.
1217 */
1218 proc_rele(p);
1219 p = PROC_NULL;
1220
1221 /* Call up to the task access server */
1222 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
1223
1224 if (error != MACH_MSG_SUCCESS) {
1225 if (error == MACH_RCV_INTERRUPTED) {
1226 error = KERN_ABORTED;
1227 } else {
1228 error = KERN_FAILURE;
1229 }
1230 goto tfpout;
1231 }
1232 }
1233 }
1234
1235 /* Check if the task has been corpsified */
1236 if (is_corpsetask(task)) {
1237 error = KERN_FAILURE;
1238 goto tfpout;
1239 }
1240
1241 error = task_get_debug_control_port(task, &sright);
1242 if (error != KERN_SUCCESS) {
1243 goto tfpout;
1244 }
1245
1246 tret = ipc_port_copyout_send(
1247 sright,
1248 get_task_ipcspace(current_task()));
1249
1250 error = KERN_SUCCESS;
1251
1252 tfpout:
1253 task_deallocate(t1);
1254 AUDIT_ARG(mach_port2, tret);
1255 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1256
1257 if (tfpport != IPC_PORT_NULL) {
1258 ipc_port_release_send(tfpport);
1259 }
1260 if (task != TASK_NULL) {
1261 task_deallocate(task);
1262 }
1263 if (p != PROC_NULL) {
1264 proc_rele(p);
1265 }
1266 AUDIT_MACH_SYSCALL_EXIT(error);
1267 return error;
1268 }
1269
1270 kern_return_t
1271 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1272 {
1273 task_t target = NULL;
1274 proc_t targetproc = PROC_NULL;
1275 int pid = args->pid;
1276 int error = 0;
1277 mach_port_t tfpport = MACH_PORT_NULL;
1278
1279 #if CONFIG_MACF
1280 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
1281 if (error) {
1282 error = EPERM;
1283 goto out;
1284 }
1285 #endif
1286
1287 if (pid == 0) {
1288 error = EPERM;
1289 goto out;
1290 }
1291
1292 targetproc = proc_find(pid);
1293 if (targetproc == PROC_NULL) {
1294 error = ESRCH;
1295 goto out;
1296 }
1297
1298 if (!task_for_pid_posix_check(targetproc) &&
1299 !IOTaskHasEntitlement(current_task(), PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1300 error = EPERM;
1301 goto out;
1302 }
1303
1304 target = targetproc->task;
1305 #ifndef CONFIG_EMBEDDED
1306 if (target != TASK_NULL) {
1307 /* If we aren't root and target's task access port is set... */
1308 if (!kauth_cred_issuser(kauth_cred_get()) &&
1309 targetproc != current_proc() &&
1310 (task_get_task_access_port(target, &tfpport) == 0) &&
1311 (tfpport != IPC_PORT_NULL)) {
1312 if (tfpport == IPC_PORT_DEAD) {
1313 error = EACCES;
1314 goto out;
1315 }
1316
1317 /* Call up to the task access server */
1318 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
1319
1320 if (error != MACH_MSG_SUCCESS) {
1321 if (error == MACH_RCV_INTERRUPTED) {
1322 error = EINTR;
1323 } else {
1324 error = EPERM;
1325 }
1326 goto out;
1327 }
1328 }
1329 }
1330 #endif
1331
1332 #if CONFIG_EMBEDDED
1333 #if SOCKETS
1334 resume_proc_sockets(targetproc);
1335 #endif /* SOCKETS */
1336 #endif /* CONFIG_EMBEDDED */
1337
1338 task_reference(target);
1339
1340 #if CONFIG_MEMORYSTATUS
1341 memorystatus_on_resume(targetproc);
1342 #endif
1343
1344 error = task_pidresume(target);
1345 if (error) {
1346 if (error == KERN_INVALID_ARGUMENT) {
1347 error = EINVAL;
1348 } else {
1349 if (error == KERN_MEMORY_ERROR) {
1350 psignal(targetproc, SIGKILL);
1351 error = EIO;
1352 } else {
1353 error = EPERM;
1354 }
1355 }
1356 }
1357
1358 task_deallocate(target);
1359
1360 out:
1361 if (tfpport != IPC_PORT_NULL) {
1362 ipc_port_release_send(tfpport);
1363 }
1364
1365 if (targetproc != PROC_NULL) {
1366 proc_rele(targetproc);
1367 }
1368
1369 *ret = error;
1370 return error;
1371 }
1372
1373 #if CONFIG_EMBEDDED
1374 /*
1375 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1376 * When a process is specified, this call is blocking, otherwise we wake up the
1377 * freezer thread and do not block on a process being frozen.
1378 */
1379 kern_return_t
1380 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1381 {
1382 int error = 0;
1383 proc_t targetproc = PROC_NULL;
1384 int pid = args->pid;
1385
1386 #ifndef CONFIG_FREEZE
1387 #pragma unused(pid)
1388 #else
1389
1390 #if CONFIG_MACF
1391 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
1392 if (error) {
1393 error = EPERM;
1394 goto out;
1395 }
1396 #endif
1397
1398 /*
1399 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1400 */
1401
1402 if (pid >= 0) {
1403 targetproc = proc_find(pid);
1404
1405 if (targetproc == PROC_NULL) {
1406 error = ESRCH;
1407 goto out;
1408 }
1409
1410 if (!task_for_pid_posix_check(targetproc)) {
1411 error = EPERM;
1412 goto out;
1413 }
1414 }
1415
1416 if (pid == -2) {
1417 vm_pageout_anonymous_pages();
1418 } else if (pid == -1) {
1419 memorystatus_on_inactivity(targetproc);
1420 } else {
1421 error = memorystatus_freeze_process_sync(targetproc);
1422 }
1423
1424 out:
1425
1426 #endif /* CONFIG_FREEZE */
1427
1428 if (targetproc != PROC_NULL) {
1429 proc_rele(targetproc);
1430 }
1431 *ret = error;
1432 return error;
1433 }
1434 #endif /* CONFIG_EMBEDDED */
1435
1436 #if SOCKETS
1437 int
1438 networking_memstatus_callout(proc_t p, uint32_t status)
1439 {
1440 struct filedesc *fdp;
1441 int i;
1442
1443 /*
1444 * proc list lock NOT held
1445 * proc lock NOT held
1446 * a reference on the proc has been held / shall be dropped by the caller.
1447 */
1448 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1449 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1450
1451 proc_fdlock(p);
1452 fdp = p->p_fd;
1453 for (i = 0; i < fdp->fd_nfiles; i++) {
1454 struct fileproc *fp;
1455
1456 fp = fdp->fd_ofiles[i];
1457 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0) {
1458 continue;
1459 }
1460 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
1461 #if NECP
1462 case DTYPE_NETPOLICY:
1463 necp_fd_memstatus(p, status,
1464 (struct necp_fd_data *)fp->f_fglob->fg_data);
1465 break;
1466 #endif /* NECP */
1467 default:
1468 break;
1469 }
1470 }
1471 proc_fdunlock(p);
1472
1473 return 1;
1474 }
1475
1476
1477 static int
1478 networking_defunct_callout(proc_t p, void *arg)
1479 {
1480 struct pid_shutdown_sockets_args *args = arg;
1481 int pid = args->pid;
1482 int level = args->level;
1483 struct filedesc *fdp;
1484 int i;
1485
1486 proc_fdlock(p);
1487 fdp = p->p_fd;
1488 for (i = 0; i < fdp->fd_nfiles; i++) {
1489 struct fileproc *fp = fdp->fd_ofiles[i];
1490 struct fileglob *fg;
1491
1492 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0) {
1493 continue;
1494 }
1495
1496 fg = fp->f_fglob;
1497 switch (FILEGLOB_DTYPE(fg)) {
1498 case DTYPE_SOCKET: {
1499 struct socket *so = (struct socket *)fg->fg_data;
1500 if (p->p_pid == pid || so->last_pid == pid ||
1501 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1502 /* Call networking stack with socket and level */
1503 (void) socket_defunct(p, so, level);
1504 }
1505 break;
1506 }
1507 #if NECP
1508 case DTYPE_NETPOLICY:
1509 /* first pass: defunct necp and get stats for ntstat */
1510 if (p->p_pid == pid) {
1511 necp_fd_defunct(p,
1512 (struct necp_fd_data *)fg->fg_data);
1513 }
1514 break;
1515 #endif /* NECP */
1516 default:
1517 break;
1518 }
1519 }
1520
1521 proc_fdunlock(p);
1522
1523 return PROC_RETURNED;
1524 }
1525
1526 int
1527 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1528 {
1529 int error = 0;
1530 proc_t targetproc = PROC_NULL;
1531 int pid = args->pid;
1532 int level = args->level;
1533
1534 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1535 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1536 error = EINVAL;
1537 goto out;
1538 }
1539
1540 #if CONFIG_MACF
1541 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1542 if (error) {
1543 error = EPERM;
1544 goto out;
1545 }
1546 #endif
1547
1548 targetproc = proc_find(pid);
1549 if (targetproc == PROC_NULL) {
1550 error = ESRCH;
1551 goto out;
1552 }
1553
1554 if (!task_for_pid_posix_check(targetproc) &&
1555 !IOTaskHasEntitlement(current_task(), PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1556 error = EPERM;
1557 goto out;
1558 }
1559
1560 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1561 networking_defunct_callout, args, NULL, NULL);
1562
1563 out:
1564 if (targetproc != PROC_NULL) {
1565 proc_rele(targetproc);
1566 }
1567 *ret = error;
1568 return error;
1569 }
1570
1571 #endif /* SOCKETS */
1572
1573 static int
1574 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1575 __unused int arg2, struct sysctl_req *req)
1576 {
1577 int error = 0;
1578 int new_value;
1579
1580 error = SYSCTL_OUT(req, arg1, sizeof(int));
1581 if (error || req->newptr == USER_ADDR_NULL) {
1582 return error;
1583 }
1584
1585 if (!kauth_cred_issuser(kauth_cred_get())) {
1586 return EPERM;
1587 }
1588
1589 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1590 goto out;
1591 }
1592 if ((new_value == KERN_TFP_POLICY_DENY)
1593 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
1594 tfp_policy = new_value;
1595 } else {
1596 error = EINVAL;
1597 }
1598 out:
1599 return error;
1600 }
1601
1602 #if defined(SECURE_KERNEL)
1603 static int kern_secure_kernel = 1;
1604 #else
1605 static int kern_secure_kernel = 0;
1606 #endif
1607
1608 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1609
1610 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1611 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1612 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
1613
1614 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1615 &shared_region_trace_level, 0, "");
1616 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1617 &shared_region_version, 0, "");
1618 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1619 &shared_region_persistence, 0, "");
1620
1621 /*
1622 * shared_region_check_np:
1623 *
1624 * This system call is intended for dyld.
1625 *
1626 * dyld calls this when any process starts to see if the process's shared
1627 * region is already set up and ready to use.
1628 * This call returns the base address of the first mapping in the
1629 * process's shared region's first mapping.
1630 * dyld will then check what's mapped at that address.
1631 *
1632 * If the shared region is empty, dyld will then attempt to map the shared
1633 * cache file in the shared region via the shared_region_map_np() system call.
1634 *
1635 * If something's already mapped in the shared region, dyld will check if it
1636 * matches the shared cache it would like to use for that process.
1637 * If it matches, evrything's ready and the process can proceed and use the
1638 * shared region.
1639 * If it doesn't match, dyld will unmap the shared region and map the shared
1640 * cache into the process's address space via mmap().
1641 *
1642 * ERROR VALUES
1643 * EINVAL no shared region
1644 * ENOMEM shared region is empty
1645 * EFAULT bad address for "start_address"
1646 */
1647 int
1648 shared_region_check_np(
1649 __unused struct proc *p,
1650 struct shared_region_check_np_args *uap,
1651 __unused int *retvalp)
1652 {
1653 vm_shared_region_t shared_region;
1654 mach_vm_offset_t start_address = 0;
1655 int error;
1656 kern_return_t kr;
1657
1658 SHARED_REGION_TRACE_DEBUG(
1659 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1660 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1661 p->p_pid, p->p_comm,
1662 (uint64_t)uap->start_address));
1663
1664 /* retrieve the current tasks's shared region */
1665 shared_region = vm_shared_region_get(current_task());
1666 if (shared_region != NULL) {
1667 /* retrieve address of its first mapping... */
1668 kr = vm_shared_region_start_address(shared_region,
1669 &start_address);
1670 if (kr != KERN_SUCCESS) {
1671 error = ENOMEM;
1672 } else {
1673 /* ... and give it to the caller */
1674 error = copyout(&start_address,
1675 (user_addr_t) uap->start_address,
1676 sizeof(start_address));
1677 if (error) {
1678 SHARED_REGION_TRACE_ERROR(
1679 ("shared_region: %p [%d(%s)] "
1680 "check_np(0x%llx) "
1681 "copyout(0x%llx) error %d\n",
1682 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1683 p->p_pid, p->p_comm,
1684 (uint64_t)uap->start_address, (uint64_t)start_address,
1685 error));
1686 }
1687 }
1688 vm_shared_region_deallocate(shared_region);
1689 } else {
1690 /* no shared region ! */
1691 error = EINVAL;
1692 }
1693
1694 SHARED_REGION_TRACE_DEBUG(
1695 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1696 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1697 p->p_pid, p->p_comm,
1698 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1699
1700 return error;
1701 }
1702
1703
1704 int
1705 shared_region_copyin_mappings(
1706 struct proc *p,
1707 user_addr_t user_mappings,
1708 unsigned int mappings_count,
1709 struct shared_file_mapping_np *mappings)
1710 {
1711 int error = 0;
1712 vm_size_t mappings_size = 0;
1713
1714 /* get the list of mappings the caller wants us to establish */
1715 mappings_size = (vm_size_t) (mappings_count * sizeof(mappings[0]));
1716 error = copyin(user_mappings,
1717 mappings,
1718 mappings_size);
1719 if (error) {
1720 SHARED_REGION_TRACE_ERROR(
1721 ("shared_region: %p [%d(%s)] map(): "
1722 "copyin(0x%llx, %d) failed (error=%d)\n",
1723 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1724 p->p_pid, p->p_comm,
1725 (uint64_t)user_mappings, mappings_count, error));
1726 }
1727 return error;
1728 }
1729 /*
1730 * shared_region_map_np()
1731 *
1732 * This system call is intended for dyld.
1733 *
1734 * dyld uses this to map a shared cache file into a shared region.
1735 * This is usually done only the first time a shared cache is needed.
1736 * Subsequent processes will just use the populated shared region without
1737 * requiring any further setup.
1738 */
1739 int
1740 _shared_region_map_and_slide(
1741 struct proc *p,
1742 int fd,
1743 uint32_t mappings_count,
1744 struct shared_file_mapping_np *mappings,
1745 uint32_t slide,
1746 user_addr_t slide_start,
1747 user_addr_t slide_size)
1748 {
1749 int error;
1750 kern_return_t kr;
1751 struct fileproc *fp;
1752 struct vnode *vp, *root_vp, *scdir_vp;
1753 struct vnode_attr va;
1754 off_t fs;
1755 memory_object_size_t file_size;
1756 #if CONFIG_MACF
1757 vm_prot_t maxprot = VM_PROT_ALL;
1758 #endif
1759 memory_object_control_t file_control;
1760 struct vm_shared_region *shared_region;
1761 uint32_t i;
1762
1763 SHARED_REGION_TRACE_DEBUG(
1764 ("shared_region: %p [%d(%s)] -> map\n",
1765 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1766 p->p_pid, p->p_comm));
1767
1768 shared_region = NULL;
1769 fp = NULL;
1770 vp = NULL;
1771 scdir_vp = NULL;
1772
1773 /* get file structure from file descriptor */
1774 error = fp_lookup(p, fd, &fp, 0);
1775 if (error) {
1776 SHARED_REGION_TRACE_ERROR(
1777 ("shared_region: %p [%d(%s)] map: "
1778 "fd=%d lookup failed (error=%d)\n",
1779 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1780 p->p_pid, p->p_comm, fd, error));
1781 goto done;
1782 }
1783
1784 /* make sure we're attempting to map a vnode */
1785 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1786 SHARED_REGION_TRACE_ERROR(
1787 ("shared_region: %p [%d(%s)] map: "
1788 "fd=%d not a vnode (type=%d)\n",
1789 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1790 p->p_pid, p->p_comm,
1791 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1792 error = EINVAL;
1793 goto done;
1794 }
1795
1796 /* we need at least read permission on the file */
1797 if (!(fp->f_fglob->fg_flag & FREAD)) {
1798 SHARED_REGION_TRACE_ERROR(
1799 ("shared_region: %p [%d(%s)] map: "
1800 "fd=%d not readable\n",
1801 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1802 p->p_pid, p->p_comm, fd));
1803 error = EPERM;
1804 goto done;
1805 }
1806
1807 /* get vnode from file structure */
1808 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1809 if (error) {
1810 SHARED_REGION_TRACE_ERROR(
1811 ("shared_region: %p [%d(%s)] map: "
1812 "fd=%d getwithref failed (error=%d)\n",
1813 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1814 p->p_pid, p->p_comm, fd, error));
1815 goto done;
1816 }
1817 vp = (struct vnode *) fp->f_fglob->fg_data;
1818
1819 /* make sure the vnode is a regular file */
1820 if (vp->v_type != VREG) {
1821 SHARED_REGION_TRACE_ERROR(
1822 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1823 "not a file (type=%d)\n",
1824 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1825 p->p_pid, p->p_comm,
1826 (void *)VM_KERNEL_ADDRPERM(vp),
1827 vp->v_name, vp->v_type));
1828 error = EINVAL;
1829 goto done;
1830 }
1831
1832 #if CONFIG_MACF
1833 /* pass in 0 for the offset argument because AMFI does not need the offset
1834 * of the shared cache */
1835 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1836 fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
1837 if (error) {
1838 goto done;
1839 }
1840 #endif /* MAC */
1841
1842 /* The calling process cannot be chroot-ed. */
1843 root_vp = p->p_fd->fd_rdir;
1844 if (root_vp == NULL) {
1845 root_vp = rootvnode;
1846 } else {
1847 SHARED_REGION_TRACE_ERROR(
1848 ("calling process [%d(%s)] is chroot-ed, permission denied\n",
1849 p->p_pid, p->p_comm));
1850 error = EPERM;
1851 goto done;
1852 }
1853
1854 /* The shared cache file must be owned by root */
1855 VATTR_INIT(&va);
1856 VATTR_WANTED(&va, va_uid);
1857 VATTR_WANTED(&va, va_flags);
1858 error = vnode_getattr(vp, &va, vfs_context_current());
1859 if (error) {
1860 SHARED_REGION_TRACE_ERROR(
1861 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1862 "vnode_getattr(%p) failed (error=%d)\n",
1863 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1864 p->p_pid, p->p_comm,
1865 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1866 (void *)VM_KERNEL_ADDRPERM(vp), error));
1867 goto done;
1868 }
1869 if (va.va_uid != 0) {
1870 SHARED_REGION_TRACE_ERROR(
1871 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1872 "owned by uid=%d instead of 0\n",
1873 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1874 p->p_pid, p->p_comm,
1875 (void *)VM_KERNEL_ADDRPERM(vp),
1876 vp->v_name, va.va_uid));
1877 error = EPERM;
1878 goto done;
1879 }
1880
1881 #if CONFIG_CSR
1882 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0 &&
1883 !(va.va_flags & SF_RESTRICTED)) {
1884 /*
1885 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
1886 * the shared cache file is NOT SIP-protected, so reject the
1887 * mapping request
1888 */
1889 SHARED_REGION_TRACE_ERROR(
1890 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
1891 "vnode is not SIP-protected. \n",
1892 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1893 p->p_pid, p->p_comm, (void *)VM_KERNEL_ADDRPERM(vp),
1894 vp->v_name));
1895 error = EPERM;
1896 goto done;
1897 }
1898 #else
1899 /* Devices without SIP/ROSP need to make sure that the shared cache is on the root volume. */
1900 if (vp->v_mount != root_vp->v_mount) {
1901 SHARED_REGION_TRACE_ERROR(
1902 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1903 "not on process's root volume\n",
1904 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1905 p->p_pid, p->p_comm,
1906 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1907 error = EPERM;
1908 goto done;
1909 }
1910 #endif /* CONFIG_CSR */
1911
1912 if (scdir_enforce) {
1913 /* get vnode for scdir_path */
1914 error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
1915 if (error) {
1916 SHARED_REGION_TRACE_ERROR(
1917 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1918 "vnode_lookup(%s) failed (error=%d)\n",
1919 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1920 p->p_pid, p->p_comm,
1921 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1922 scdir_path, error));
1923 goto done;
1924 }
1925
1926 /* ensure parent is scdir_vp */
1927 if (vnode_parent(vp) != scdir_vp) {
1928 SHARED_REGION_TRACE_ERROR(
1929 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1930 "shared cache file not in %s\n",
1931 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1932 p->p_pid, p->p_comm,
1933 (void *)VM_KERNEL_ADDRPERM(vp),
1934 vp->v_name, scdir_path));
1935 error = EPERM;
1936 goto done;
1937 }
1938 }
1939
1940 /* get vnode size */
1941 error = vnode_size(vp, &fs, vfs_context_current());
1942 if (error) {
1943 SHARED_REGION_TRACE_ERROR(
1944 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1945 "vnode_size(%p) failed (error=%d)\n",
1946 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1947 p->p_pid, p->p_comm,
1948 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1949 (void *)VM_KERNEL_ADDRPERM(vp), error));
1950 goto done;
1951 }
1952 file_size = fs;
1953
1954 /* get the file's memory object handle */
1955 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1956 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1957 SHARED_REGION_TRACE_ERROR(
1958 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1959 "no memory object\n",
1960 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1961 p->p_pid, p->p_comm,
1962 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1963 error = EINVAL;
1964 goto done;
1965 }
1966
1967 /* check that the mappings are properly covered by code signatures */
1968 if (!cs_system_enforcement()) {
1969 /* code signing is not enforced: no need to check */
1970 } else {
1971 for (i = 0; i < mappings_count; i++) {
1972 if (mappings[i].sfm_init_prot & VM_PROT_ZF) {
1973 /* zero-filled mapping: not backed by the file */
1974 continue;
1975 }
1976 if (ubc_cs_is_range_codesigned(vp,
1977 mappings[i].sfm_file_offset,
1978 mappings[i].sfm_size)) {
1979 /* this mapping is fully covered by code signatures */
1980 continue;
1981 }
1982 SHARED_REGION_TRACE_ERROR(
1983 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1984 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
1985 "is not code-signed\n",
1986 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1987 p->p_pid, p->p_comm,
1988 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1989 i, mappings_count,
1990 mappings[i].sfm_address,
1991 mappings[i].sfm_size,
1992 mappings[i].sfm_file_offset,
1993 mappings[i].sfm_max_prot,
1994 mappings[i].sfm_init_prot));
1995 error = EINVAL;
1996 goto done;
1997 }
1998 }
1999
2000 /* get the process's shared region (setup in vm_map_exec()) */
2001 shared_region = vm_shared_region_trim_and_get(current_task());
2002 if (shared_region == NULL) {
2003 SHARED_REGION_TRACE_ERROR(
2004 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2005 "no shared region\n",
2006 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2007 p->p_pid, p->p_comm,
2008 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
2009 error = EINVAL;
2010 goto done;
2011 }
2012
2013 /* map the file into that shared region's submap */
2014 kr = vm_shared_region_map_file(shared_region,
2015 mappings_count,
2016 mappings,
2017 file_control,
2018 file_size,
2019 (void *) p->p_fd->fd_rdir,
2020 slide,
2021 slide_start,
2022 slide_size);
2023 if (kr != KERN_SUCCESS) {
2024 SHARED_REGION_TRACE_ERROR(
2025 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2026 "vm_shared_region_map_file() failed kr=0x%x\n",
2027 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2028 p->p_pid, p->p_comm,
2029 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
2030 switch (kr) {
2031 case KERN_INVALID_ADDRESS:
2032 error = EFAULT;
2033 break;
2034 case KERN_PROTECTION_FAILURE:
2035 error = EPERM;
2036 break;
2037 case KERN_NO_SPACE:
2038 error = ENOMEM;
2039 break;
2040 case KERN_FAILURE:
2041 case KERN_INVALID_ARGUMENT:
2042 default:
2043 error = EINVAL;
2044 break;
2045 }
2046 goto done;
2047 }
2048
2049 error = 0;
2050
2051 vnode_lock_spin(vp);
2052
2053 vp->v_flag |= VSHARED_DYLD;
2054
2055 vnode_unlock(vp);
2056
2057 /* update the vnode's access time */
2058 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
2059 VATTR_INIT(&va);
2060 nanotime(&va.va_access_time);
2061 VATTR_SET_ACTIVE(&va, va_access_time);
2062 vnode_setattr(vp, &va, vfs_context_current());
2063 }
2064
2065 if (p->p_flag & P_NOSHLIB) {
2066 /* signal that this process is now using split libraries */
2067 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2068 }
2069
2070 done:
2071 if (vp != NULL) {
2072 /*
2073 * release the vnode...
2074 * ubc_map() still holds it for us in the non-error case
2075 */
2076 (void) vnode_put(vp);
2077 vp = NULL;
2078 }
2079 if (fp != NULL) {
2080 /* release the file descriptor */
2081 fp_drop(p, fd, fp, 0);
2082 fp = NULL;
2083 }
2084 if (scdir_vp != NULL) {
2085 (void)vnode_put(scdir_vp);
2086 scdir_vp = NULL;
2087 }
2088
2089 if (shared_region != NULL) {
2090 vm_shared_region_deallocate(shared_region);
2091 }
2092
2093 SHARED_REGION_TRACE_DEBUG(
2094 ("shared_region: %p [%d(%s)] <- map\n",
2095 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2096 p->p_pid, p->p_comm));
2097
2098 return error;
2099 }
2100
2101 int
2102 shared_region_map_and_slide_np(
2103 struct proc *p,
2104 struct shared_region_map_and_slide_np_args *uap,
2105 __unused int *retvalp)
2106 {
2107 struct shared_file_mapping_np *mappings;
2108 unsigned int mappings_count = uap->count;
2109 kern_return_t kr = KERN_SUCCESS;
2110 uint32_t slide = uap->slide;
2111
2112 #define SFM_MAX_STACK 8
2113 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
2114
2115 /* Is the process chrooted?? */
2116 if (p->p_fd->fd_rdir != NULL) {
2117 kr = EINVAL;
2118 goto done;
2119 }
2120
2121 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
2122 if (kr == KERN_INVALID_ARGUMENT) {
2123 /*
2124 * This will happen if we request sliding again
2125 * with the same slide value that was used earlier
2126 * for the very first sliding.
2127 */
2128 kr = KERN_SUCCESS;
2129 }
2130 goto done;
2131 }
2132
2133 if (mappings_count == 0) {
2134 SHARED_REGION_TRACE_INFO(
2135 ("shared_region: %p [%d(%s)] map(): "
2136 "no mappings\n",
2137 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2138 p->p_pid, p->p_comm));
2139 kr = 0; /* no mappings: we're done ! */
2140 goto done;
2141 } else if (mappings_count <= SFM_MAX_STACK) {
2142 mappings = &stack_mappings[0];
2143 } else {
2144 SHARED_REGION_TRACE_ERROR(
2145 ("shared_region: %p [%d(%s)] map(): "
2146 "too many mappings (%d)\n",
2147 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2148 p->p_pid, p->p_comm,
2149 mappings_count));
2150 kr = KERN_FAILURE;
2151 goto done;
2152 }
2153
2154 if ((kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
2155 goto done;
2156 }
2157
2158
2159 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
2160 slide,
2161 uap->slide_start, uap->slide_size);
2162 if (kr != KERN_SUCCESS) {
2163 return kr;
2164 }
2165
2166 done:
2167 return kr;
2168 }
2169
2170 /* sysctl overflow room */
2171
2172 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2173 (int *) &page_size, 0, "vm page size");
2174
2175 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2176 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2177 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2178 extern unsigned int vm_page_free_target;
2179 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2180 &vm_page_free_target, 0, "Pageout daemon free target");
2181
2182 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2183 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2184
2185 static int
2186 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2187 {
2188 #pragma unused(oidp, arg1, arg2)
2189 unsigned int page_free_wanted;
2190
2191 page_free_wanted = mach_vm_ctl_page_free_wanted();
2192 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2193 }
2194 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2195 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2196 0, 0, vm_ctl_page_free_wanted, "I", "");
2197
2198 extern unsigned int vm_page_purgeable_count;
2199 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2200 &vm_page_purgeable_count, 0, "Purgeable page count");
2201
2202 extern unsigned int vm_page_purgeable_wired_count;
2203 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2204 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2205
2206 extern unsigned int vm_page_kern_lpage_count;
2207 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2208 &vm_page_kern_lpage_count, 0, "kernel used large pages");
2209
2210 #if DEVELOPMENT || DEBUG
2211 extern uint64_t get_pages_grabbed_count(void);
2212
2213 static int
2214 pages_grabbed SYSCTL_HANDLER_ARGS
2215 {
2216 #pragma unused(arg1, arg2, oidp)
2217 uint64_t value = get_pages_grabbed_count();
2218 return SYSCTL_OUT(req, &value, sizeof(value));
2219 }
2220
2221 SYSCTL_PROC(_vm, OID_AUTO, pages_grabbed, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
2222 0, 0, &pages_grabbed, "QU", "Total pages grabbed");
2223 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
2224 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
2225
2226 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
2227 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
2228 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
2229 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
2230 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
2231 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
2232
2233 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2234 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
2235 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2236 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
2237 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2238 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
2239 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2240 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
2241 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2242 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
2243 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2244 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
2245 #endif
2246
2247 extern int madvise_free_debug;
2248 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
2249 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
2250
2251 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2252 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
2253 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2254 &vm_page_stats_reusable.reusable_pages_success, "");
2255 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2256 &vm_page_stats_reusable.reusable_pages_failure, "");
2257 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2258 &vm_page_stats_reusable.reusable_pages_shared, "");
2259 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2260 &vm_page_stats_reusable.all_reusable_calls, "");
2261 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2262 &vm_page_stats_reusable.partial_reusable_calls, "");
2263 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2264 &vm_page_stats_reusable.reuse_pages_success, "");
2265 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2266 &vm_page_stats_reusable.reuse_pages_failure, "");
2267 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2268 &vm_page_stats_reusable.all_reuse_calls, "");
2269 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2270 &vm_page_stats_reusable.partial_reuse_calls, "");
2271 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2272 &vm_page_stats_reusable.can_reuse_success, "");
2273 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2274 &vm_page_stats_reusable.can_reuse_failure, "");
2275 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
2276 &vm_page_stats_reusable.reusable_reclaimed, "");
2277 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
2278 &vm_page_stats_reusable.reusable_nonwritable, "");
2279 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2280 &vm_page_stats_reusable.reusable_shared, "");
2281 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2282 &vm_page_stats_reusable.free_shared, "");
2283
2284
2285 extern unsigned int vm_page_free_count, vm_page_speculative_count;
2286 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
2287 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
2288
2289 extern unsigned int vm_page_cleaned_count;
2290 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
2291
2292 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
2293 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
2294 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
2295
2296 /* pageout counts */
2297 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
2298 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
2299
2300 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
2301 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
2302 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2303 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
2304 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2305 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
2306
2307
2308 /* counts of pages prefaulted when entering a memory object */
2309 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
2310 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
2311 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
2312
2313 #if defined (__x86_64__)
2314 extern unsigned int vm_clump_promote_threshold;
2315 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
2316 #if DEVELOPMENT || DEBUG
2317 extern unsigned long vm_clump_stats[];
2318 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
2319 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
2320 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
2321 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
2322 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
2323 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
2324 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
2325 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
2326 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
2327 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
2328 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
2329 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
2330 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
2331 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
2332 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
2333 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
2334 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
2335 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
2336 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
2337 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
2338 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
2339 #endif /* if DEVELOPMENT || DEBUG */
2340 #endif /* #if defined (__x86_64__) */
2341
2342 #if CONFIG_SECLUDED_MEMORY
2343
2344 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
2345 extern unsigned int vm_page_secluded_target;
2346 extern unsigned int vm_page_secluded_count;
2347 extern unsigned int vm_page_secluded_count_free;
2348 extern unsigned int vm_page_secluded_count_inuse;
2349 extern unsigned int vm_page_secluded_count_over_target;
2350 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
2351 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
2352 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
2353 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
2354 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
2355
2356 extern struct vm_page_secluded_data vm_page_secluded;
2357 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
2358 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
2359 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
2360 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
2361 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
2362 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
2363 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
2364 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
2365
2366 #endif /* CONFIG_SECLUDED_MEMORY */
2367
2368 #include <kern/thread.h>
2369 #include <sys/user.h>
2370
2371 void vm_pageout_io_throttle(void);
2372
2373 void
2374 vm_pageout_io_throttle(void)
2375 {
2376 struct uthread *uthread = get_bsdthread_info(current_thread());
2377
2378 /*
2379 * thread is marked as a low priority I/O type
2380 * and the I/O we issued while in this cleaning operation
2381 * collided with normal I/O operations... we'll
2382 * delay in order to mitigate the impact of this
2383 * task on the normal operation of the system
2384 */
2385
2386 if (uthread->uu_lowpri_window) {
2387 throttle_lowpri_io(1);
2388 }
2389 }
2390
2391 int
2392 vm_pressure_monitor(
2393 __unused struct proc *p,
2394 struct vm_pressure_monitor_args *uap,
2395 int *retval)
2396 {
2397 kern_return_t kr;
2398 uint32_t pages_reclaimed;
2399 uint32_t pages_wanted;
2400
2401 kr = mach_vm_pressure_monitor(
2402 (boolean_t) uap->wait_for_pressure,
2403 uap->nsecs_monitored,
2404 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
2405 &pages_wanted);
2406
2407 switch (kr) {
2408 case KERN_SUCCESS:
2409 break;
2410 case KERN_ABORTED:
2411 return EINTR;
2412 default:
2413 return EINVAL;
2414 }
2415
2416 if (uap->pages_reclaimed) {
2417 if (copyout((void *)&pages_reclaimed,
2418 uap->pages_reclaimed,
2419 sizeof(pages_reclaimed)) != 0) {
2420 return EFAULT;
2421 }
2422 }
2423
2424 *retval = (int) pages_wanted;
2425 return 0;
2426 }
2427
2428 int
2429 kas_info(struct proc *p,
2430 struct kas_info_args *uap,
2431 int *retval __unused)
2432 {
2433 #ifdef SECURE_KERNEL
2434 (void)p;
2435 (void)uap;
2436 return ENOTSUP;
2437 #else /* !SECURE_KERNEL */
2438 int selector = uap->selector;
2439 user_addr_t valuep = uap->value;
2440 user_addr_t sizep = uap->size;
2441 user_size_t size;
2442 int error;
2443
2444 if (!kauth_cred_issuser(kauth_cred_get())) {
2445 return EPERM;
2446 }
2447
2448 #if CONFIG_MACF
2449 error = mac_system_check_kas_info(kauth_cred_get(), selector);
2450 if (error) {
2451 return error;
2452 }
2453 #endif
2454
2455 if (IS_64BIT_PROCESS(p)) {
2456 user64_size_t size64;
2457 error = copyin(sizep, &size64, sizeof(size64));
2458 size = (user_size_t)size64;
2459 } else {
2460 user32_size_t size32;
2461 error = copyin(sizep, &size32, sizeof(size32));
2462 size = (user_size_t)size32;
2463 }
2464 if (error) {
2465 return error;
2466 }
2467
2468 switch (selector) {
2469 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
2470 {
2471 uint64_t slide = vm_kernel_slide;
2472
2473 if (sizeof(slide) != size) {
2474 return EINVAL;
2475 }
2476
2477 if (IS_64BIT_PROCESS(p)) {
2478 user64_size_t size64 = (user64_size_t)size;
2479 error = copyout(&size64, sizep, sizeof(size64));
2480 } else {
2481 user32_size_t size32 = (user32_size_t)size;
2482 error = copyout(&size32, sizep, sizeof(size32));
2483 }
2484 if (error) {
2485 return error;
2486 }
2487
2488 error = copyout(&slide, valuep, sizeof(slide));
2489 if (error) {
2490 return error;
2491 }
2492 }
2493 break;
2494 default:
2495 return EINVAL;
2496 }
2497
2498 return 0;
2499 #endif /* !SECURE_KERNEL */
2500 }
2501
2502
2503 #pragma clang diagnostic push
2504 #pragma clang diagnostic ignored "-Wcast-qual"
2505 #pragma clang diagnostic ignored "-Wunused-function"
2506
2507 static void
2508 asserts()
2509 {
2510 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
2511 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
2512 }
2513
2514 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
2515 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
2516 #pragma clang diagnostic pop
2517
2518 extern uint32_t vm_page_pages;
2519 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
2520
2521 extern uint32_t vm_page_busy_absent_skipped;
2522 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
2523
2524 extern uint32_t vm_page_upl_tainted;
2525 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
2526
2527 extern uint32_t vm_page_iopl_tainted;
2528 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
2529
2530 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
2531 extern int vm_footprint_suspend_allowed;
2532 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
2533
2534 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
2535 static int
2536 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
2537 {
2538 #pragma unused(oidp, arg1, arg2)
2539 int error = 0;
2540 int new_value;
2541
2542 if (req->newptr == USER_ADDR_NULL) {
2543 return 0;
2544 }
2545 error = SYSCTL_IN(req, &new_value, sizeof(int));
2546 if (error) {
2547 return error;
2548 }
2549 if (!vm_footprint_suspend_allowed) {
2550 if (new_value != 0) {
2551 /* suspends are not allowed... */
2552 return 0;
2553 }
2554 /* ... but let resumes proceed */
2555 }
2556 DTRACE_VM2(footprint_suspend,
2557 vm_map_t, current_map(),
2558 int, new_value);
2559
2560 pmap_footprint_suspend(current_map(), new_value);
2561
2562 return 0;
2563 }
2564 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
2565 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
2566 0, 0, &sysctl_vm_footprint_suspend, "I", "");
2567 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
2568
2569 extern uint64_t vm_map_corpse_footprint_count;
2570 extern uint64_t vm_map_corpse_footprint_size_avg;
2571 extern uint64_t vm_map_corpse_footprint_size_max;
2572 extern uint64_t vm_map_corpse_footprint_full;
2573 extern uint64_t vm_map_corpse_footprint_no_buf;
2574 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
2575 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
2576 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
2577 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
2578 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
2579 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
2580 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
2581 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
2582 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
2583 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
2584
2585 #if PMAP_CS
2586 extern uint64_t vm_cs_defer_to_pmap_cs;
2587 extern uint64_t vm_cs_defer_to_pmap_cs_not;
2588 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_pmap_cs,
2589 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_pmap_cs, "");
2590 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_pmap_cs_not,
2591 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_pmap_cs_not, "");
2592 #endif /* PMAP_CS */
2593
2594 extern uint64_t shared_region_pager_copied;
2595 extern uint64_t shared_region_pager_slid;
2596 extern uint64_t shared_region_pager_slid_error;
2597 extern uint64_t shared_region_pager_reclaimed;
2598 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
2599 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
2600 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
2601 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
2602 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
2603 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
2604 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
2605 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
2606
2607 #if MACH_ASSERT
2608 extern int pmap_ledgers_panic_leeway;
2609 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
2610 #endif /* MACH_ASSERT */
2611
2612 extern int vm_protect_privileged_from_untrusted;
2613 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
2614 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
2615 extern uint64_t vm_copied_on_read;
2616 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
2617 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");