]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
6d2010ae | 2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Mach Operating System | |
30 | * Copyright (c) 1987 Carnegie-Mellon University | |
31 | * All rights reserved. The CMU software License Agreement specifies | |
32 | * the terms and conditions for use and redistribution. | |
33 | */ | |
1c79356b | 34 | /* |
2d21ac55 A |
35 | * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce |
36 | * support for mandatory and extensible security protections. This notice | |
37 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
38 | * Version 2.0. | |
1c79356b | 39 | */ |
9bccf70c | 40 | |
1c79356b A |
41 | #include <meta_features.h> |
42 | ||
fe8ab488 A |
43 | #include <vm/vm_options.h> |
44 | ||
1c79356b A |
45 | #include <kern/task.h> |
46 | #include <kern/thread.h> | |
47 | #include <kern/debug.h> | |
6d2010ae | 48 | #include <kern/extmod_statistics.h> |
91447636 | 49 | #include <mach/mach_traps.h> |
2d21ac55 A |
50 | #include <mach/port.h> |
51 | #include <mach/task.h> | |
52 | #include <mach/task_access.h> | |
53 | #include <mach/task_special_ports.h> | |
1c79356b | 54 | #include <mach/time_value.h> |
91447636 | 55 | #include <mach/vm_map.h> |
1c79356b A |
56 | #include <mach/vm_param.h> |
57 | #include <mach/vm_prot.h> | |
1c79356b | 58 | |
91447636 | 59 | #include <sys/file_internal.h> |
1c79356b A |
60 | #include <sys/param.h> |
61 | #include <sys/systm.h> | |
62 | #include <sys/dir.h> | |
63 | #include <sys/namei.h> | |
91447636 A |
64 | #include <sys/proc_internal.h> |
65 | #include <sys/kauth.h> | |
1c79356b A |
66 | #include <sys/vm.h> |
67 | #include <sys/file.h> | |
91447636 | 68 | #include <sys/vnode_internal.h> |
1c79356b A |
69 | #include <sys/mount.h> |
70 | #include <sys/trace.h> | |
71 | #include <sys/kernel.h> | |
91447636 A |
72 | #include <sys/ubc_internal.h> |
73 | #include <sys/user.h> | |
0c530ab8 | 74 | #include <sys/syslog.h> |
9bccf70c | 75 | #include <sys/stat.h> |
91447636 A |
76 | #include <sys/sysproto.h> |
77 | #include <sys/mman.h> | |
0c530ab8 | 78 | #include <sys/sysctl.h> |
6d2010ae A |
79 | #include <sys/cprotect.h> |
80 | #include <sys/kpi_socket.h> | |
316670eb | 81 | #include <sys/kas_info.h> |
fe8ab488 A |
82 | #include <sys/socket.h> |
83 | #include <sys/socketvar.h> | |
1c79356b | 84 | |
b0d623f7 | 85 | #include <security/audit/audit.h> |
6d2010ae | 86 | #include <security/mac.h> |
e5568f75 A |
87 | #include <bsm/audit_kevents.h> |
88 | ||
1c79356b | 89 | #include <kern/kalloc.h> |
1c79356b A |
90 | #include <vm/vm_map.h> |
91 | #include <vm/vm_kern.h> | |
b0d623f7 | 92 | #include <vm/vm_pageout.h> |
1c79356b A |
93 | |
94 | #include <machine/spl.h> | |
9bccf70c | 95 | |
2d21ac55 A |
96 | #include <mach/shared_region.h> |
97 | #include <vm/vm_shared_region.h> | |
9bccf70c | 98 | |
91447636 | 99 | #include <vm/vm_protos.h> |
9bccf70c | 100 | |
6d2010ae | 101 | #include <sys/kern_memorystatus.h> |
6d2010ae A |
102 | |
103 | ||
15129b1c | 104 | int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); |
6d2010ae A |
105 | int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); |
106 | ||
3e170ce0 A |
107 | #if DEVELOPMENT || DEBUG |
108 | extern int radar_20146450; | |
109 | SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, ""); | |
110 | ||
111 | extern int macho_printf; | |
112 | SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, ""); | |
113 | ||
114 | extern int apple_protect_pager_data_request_debug; | |
115 | SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, ""); | |
116 | ||
117 | #endif /* DEVELOPMENT || DEBUG */ | |
118 | ||
fe8ab488 A |
119 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, ""); |
120 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, ""); | |
121 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, ""); | |
122 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, ""); | |
123 | SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, ""); | |
124 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, ""); | |
125 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, ""); | |
126 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, ""); | |
127 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, ""); | |
128 | #if VM_SCAN_FOR_SHADOW_CHAIN | |
129 | static int vm_shadow_max_enabled = 0; /* Disabled by default */ | |
130 | extern int proc_shadow_max(void); | |
131 | static int | |
132 | vm_shadow_max SYSCTL_HANDLER_ARGS | |
133 | { | |
134 | #pragma unused(arg1, arg2, oidp) | |
135 | int value = 0; | |
136 | ||
137 | if (vm_shadow_max_enabled) | |
138 | value = proc_shadow_max(); | |
139 | ||
140 | return SYSCTL_OUT(req, &value, sizeof(value)); | |
141 | } | |
142 | SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, | |
143 | 0, 0, &vm_shadow_max, "I", ""); | |
144 | ||
145 | SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, ""); | |
146 | ||
147 | #endif /* VM_SCAN_FOR_SHADOW_CHAIN */ | |
6d2010ae | 148 | |
fe8ab488 | 149 | SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); |
6d2010ae | 150 | |
fe8ab488 A |
151 | __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( |
152 | mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid); | |
2d21ac55 A |
153 | /* |
154 | * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c | |
155 | */ | |
156 | ||
4a3eedf9 | 157 | #ifndef SECURE_KERNEL |
2d21ac55 A |
158 | extern int allow_stack_exec, allow_data_exec; |
159 | ||
6d2010ae A |
160 | SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); |
161 | SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); | |
3e170ce0 | 162 | |
4a3eedf9 | 163 | #endif /* !SECURE_KERNEL */ |
2d21ac55 | 164 | |
2d21ac55 A |
165 | static const char *prot_values[] = { |
166 | "none", | |
167 | "read-only", | |
168 | "write-only", | |
169 | "read-write", | |
170 | "execute-only", | |
171 | "read-execute", | |
172 | "write-execute", | |
173 | "read-write-execute" | |
174 | }; | |
175 | ||
0c530ab8 | 176 | void |
2d21ac55 | 177 | log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) |
0c530ab8 | 178 | { |
2d21ac55 A |
179 | printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", |
180 | current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); | |
0c530ab8 A |
181 | } |
182 | ||
b0d623f7 A |
183 | int shared_region_unnest_logging = 1; |
184 | ||
6d2010ae | 185 | SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, |
b0d623f7 A |
186 | &shared_region_unnest_logging, 0, ""); |
187 | ||
188 | int vm_shared_region_unnest_log_interval = 10; | |
189 | int shared_region_unnest_log_count_threshold = 5; | |
190 | ||
fe8ab488 A |
191 | /* |
192 | * Shared cache path enforcement. | |
193 | */ | |
194 | ||
195 | static int scdir_enforce = 1; | |
196 | static char scdir_path[] = "/var/db/dyld/"; | |
197 | ||
198 | #ifndef SECURE_KERNEL | |
199 | SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, ""); | |
200 | #endif | |
201 | ||
b0d623f7 A |
202 | /* These log rate throttling state variables aren't thread safe, but |
203 | * are sufficient unto the task. | |
204 | */ | |
205 | static int64_t last_unnest_log_time = 0; | |
206 | static int shared_region_unnest_log_count = 0; | |
207 | ||
3e170ce0 A |
208 | void log_unnest_badness( |
209 | vm_map_t m, | |
210 | vm_map_offset_t s, | |
211 | vm_map_offset_t e) { | |
212 | struct timeval tv; | |
b0d623f7 A |
213 | |
214 | if (shared_region_unnest_logging == 0) | |
215 | return; | |
216 | ||
217 | if (shared_region_unnest_logging == 1) { | |
218 | microtime(&tv); | |
219 | if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) { | |
220 | if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold) | |
221 | return; | |
222 | } | |
223 | else { | |
224 | last_unnest_log_time = tv.tv_sec; | |
225 | shared_region_unnest_log_count = 0; | |
226 | } | |
227 | } | |
228 | ||
3e170ce0 | 229 | printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m)); |
b0d623f7 | 230 | } |
1c79356b | 231 | |
91447636 A |
232 | int |
233 | useracc( | |
234 | user_addr_t addr, | |
235 | user_size_t len, | |
236 | int prot) | |
1c79356b | 237 | { |
39236c6e A |
238 | vm_map_t map; |
239 | ||
240 | map = current_map(); | |
1c79356b | 241 | return (vm_map_check_protection( |
39236c6e A |
242 | map, |
243 | vm_map_trunc_page(addr, | |
244 | vm_map_page_mask(map)), | |
245 | vm_map_round_page(addr+len, | |
246 | vm_map_page_mask(map)), | |
1c79356b A |
247 | prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); |
248 | } | |
249 | ||
91447636 A |
250 | int |
251 | vslock( | |
252 | user_addr_t addr, | |
253 | user_size_t len) | |
1c79356b | 254 | { |
39236c6e A |
255 | kern_return_t kret; |
256 | vm_map_t map; | |
257 | ||
258 | map = current_map(); | |
259 | kret = vm_map_wire(map, | |
260 | vm_map_trunc_page(addr, | |
261 | vm_map_page_mask(map)), | |
262 | vm_map_round_page(addr+len, | |
263 | vm_map_page_mask(map)), | |
3e170ce0 | 264 | VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD), |
39236c6e | 265 | FALSE); |
0b4e3aa0 A |
266 | |
267 | switch (kret) { | |
268 | case KERN_SUCCESS: | |
269 | return (0); | |
270 | case KERN_INVALID_ADDRESS: | |
271 | case KERN_NO_SPACE: | |
272 | return (ENOMEM); | |
273 | case KERN_PROTECTION_FAILURE: | |
274 | return (EACCES); | |
275 | default: | |
276 | return (EINVAL); | |
277 | } | |
1c79356b A |
278 | } |
279 | ||
91447636 A |
280 | int |
281 | vsunlock( | |
282 | user_addr_t addr, | |
283 | user_size_t len, | |
284 | __unused int dirtied) | |
1c79356b | 285 | { |
1c79356b | 286 | #if FIXME /* [ */ |
91447636 | 287 | pmap_t pmap; |
1c79356b | 288 | vm_page_t pg; |
91447636 A |
289 | vm_map_offset_t vaddr; |
290 | ppnum_t paddr; | |
1c79356b | 291 | #endif /* FIXME ] */ |
39236c6e A |
292 | kern_return_t kret; |
293 | vm_map_t map; | |
294 | ||
295 | map = current_map(); | |
1c79356b A |
296 | |
297 | #if FIXME /* [ */ | |
298 | if (dirtied) { | |
299 | pmap = get_task_pmap(current_task()); | |
39236c6e A |
300 | for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); |
301 | vaddr < vm_map_round_page(addr+len, PAGE_MASK); | |
302 | vaddr += PAGE_SIZE) { | |
1c79356b A |
303 | paddr = pmap_extract(pmap, vaddr); |
304 | pg = PHYS_TO_VM_PAGE(paddr); | |
305 | vm_page_set_modified(pg); | |
306 | } | |
307 | } | |
308 | #endif /* FIXME ] */ | |
309 | #ifdef lint | |
310 | dirtied++; | |
311 | #endif /* lint */ | |
39236c6e A |
312 | kret = vm_map_unwire(map, |
313 | vm_map_trunc_page(addr, | |
314 | vm_map_page_mask(map)), | |
315 | vm_map_round_page(addr+len, | |
316 | vm_map_page_mask(map)), | |
317 | FALSE); | |
0b4e3aa0 A |
318 | switch (kret) { |
319 | case KERN_SUCCESS: | |
320 | return (0); | |
321 | case KERN_INVALID_ADDRESS: | |
322 | case KERN_NO_SPACE: | |
323 | return (ENOMEM); | |
324 | case KERN_PROTECTION_FAILURE: | |
325 | return (EACCES); | |
326 | default: | |
327 | return (EINVAL); | |
328 | } | |
1c79356b A |
329 | } |
330 | ||
91447636 A |
331 | int |
332 | subyte( | |
333 | user_addr_t addr, | |
334 | int byte) | |
1c79356b A |
335 | { |
336 | char character; | |
337 | ||
338 | character = (char)byte; | |
339 | return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); | |
340 | } | |
341 | ||
91447636 A |
342 | int |
343 | suibyte( | |
344 | user_addr_t addr, | |
345 | int byte) | |
1c79356b A |
346 | { |
347 | char character; | |
348 | ||
349 | character = (char)byte; | |
91447636 | 350 | return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); |
1c79356b A |
351 | } |
352 | ||
91447636 | 353 | int fubyte(user_addr_t addr) |
1c79356b A |
354 | { |
355 | unsigned char byte; | |
356 | ||
357 | if (copyin(addr, (void *) &byte, sizeof(char))) | |
358 | return(-1); | |
359 | return(byte); | |
360 | } | |
361 | ||
91447636 | 362 | int fuibyte(user_addr_t addr) |
1c79356b A |
363 | { |
364 | unsigned char byte; | |
365 | ||
366 | if (copyin(addr, (void *) &(byte), sizeof(char))) | |
367 | return(-1); | |
368 | return(byte); | |
369 | } | |
370 | ||
91447636 A |
371 | int |
372 | suword( | |
373 | user_addr_t addr, | |
374 | long word) | |
1c79356b A |
375 | { |
376 | return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); | |
377 | } | |
378 | ||
91447636 | 379 | long fuword(user_addr_t addr) |
1c79356b | 380 | { |
b0d623f7 | 381 | long word = 0; |
1c79356b A |
382 | |
383 | if (copyin(addr, (void *) &word, sizeof(int))) | |
384 | return(-1); | |
385 | return(word); | |
386 | } | |
387 | ||
388 | /* suiword and fuiword are the same as suword and fuword, respectively */ | |
389 | ||
91447636 A |
390 | int |
391 | suiword( | |
392 | user_addr_t addr, | |
393 | long word) | |
1c79356b A |
394 | { |
395 | return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); | |
396 | } | |
397 | ||
91447636 | 398 | long fuiword(user_addr_t addr) |
1c79356b | 399 | { |
b0d623f7 | 400 | long word = 0; |
1c79356b A |
401 | |
402 | if (copyin(addr, (void *) &word, sizeof(int))) | |
403 | return(-1); | |
404 | return(word); | |
405 | } | |
91447636 A |
406 | |
407 | /* | |
408 | * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the | |
409 | * fetching and setting of process-sized size_t and pointer values. | |
410 | */ | |
411 | int | |
412 | sulong(user_addr_t addr, int64_t word) | |
413 | { | |
414 | ||
415 | if (IS_64BIT_PROCESS(current_proc())) { | |
416 | return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); | |
417 | } else { | |
418 | return(suiword(addr, (long)word)); | |
419 | } | |
420 | } | |
421 | ||
422 | int64_t | |
423 | fulong(user_addr_t addr) | |
424 | { | |
425 | int64_t longword; | |
426 | ||
427 | if (IS_64BIT_PROCESS(current_proc())) { | |
428 | if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) | |
429 | return(-1); | |
430 | return(longword); | |
431 | } else { | |
432 | return((int64_t)fuiword(addr)); | |
433 | } | |
434 | } | |
1c79356b A |
435 | |
436 | int | |
91447636 A |
437 | suulong(user_addr_t addr, uint64_t uword) |
438 | { | |
439 | ||
440 | if (IS_64BIT_PROCESS(current_proc())) { | |
441 | return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); | |
442 | } else { | |
b0d623f7 | 443 | return(suiword(addr, (uint32_t)uword)); |
91447636 A |
444 | } |
445 | } | |
446 | ||
447 | uint64_t | |
448 | fuulong(user_addr_t addr) | |
1c79356b | 449 | { |
91447636 A |
450 | uint64_t ulongword; |
451 | ||
452 | if (IS_64BIT_PROCESS(current_proc())) { | |
453 | if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) | |
454 | return(-1ULL); | |
455 | return(ulongword); | |
456 | } else { | |
457 | return((uint64_t)fuiword(addr)); | |
458 | } | |
459 | } | |
460 | ||
461 | int | |
2d21ac55 | 462 | swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) |
91447636 A |
463 | { |
464 | return(ENOTSUP); | |
1c79356b A |
465 | } |
466 | ||
b0d623f7 A |
467 | /* |
468 | * pid_for_task | |
469 | * | |
470 | * Find the BSD process ID for the Mach task associated with the given Mach port | |
471 | * name | |
472 | * | |
473 | * Parameters: args User argument descriptor (see below) | |
474 | * | |
475 | * Indirect parameters: args->t Mach port name | |
476 | * args->pid Process ID (returned value; see below) | |
477 | * | |
478 | * Returns: KERL_SUCCESS Success | |
479 | * KERN_FAILURE Not success | |
480 | * | |
481 | * Implicit returns: args->pid Process ID | |
482 | * | |
483 | */ | |
1c79356b | 484 | kern_return_t |
91447636 A |
485 | pid_for_task( |
486 | struct pid_for_task_args *args) | |
1c79356b | 487 | { |
91447636 A |
488 | mach_port_name_t t = args->t; |
489 | user_addr_t pid_addr = args->pid; | |
2d21ac55 | 490 | proc_t p; |
1c79356b | 491 | task_t t1; |
1c79356b | 492 | int pid = -1; |
0b4e3aa0 | 493 | kern_return_t err = KERN_SUCCESS; |
1c79356b | 494 | |
e5568f75 A |
495 | AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); |
496 | AUDIT_ARG(mach_port1, t); | |
497 | ||
1c79356b A |
498 | t1 = port_name_to_task(t); |
499 | ||
500 | if (t1 == TASK_NULL) { | |
501 | err = KERN_FAILURE; | |
0b4e3aa0 | 502 | goto pftout; |
1c79356b A |
503 | } else { |
504 | p = get_bsdtask_info(t1); | |
505 | if (p) { | |
91447636 | 506 | pid = proc_pid(p); |
1c79356b A |
507 | err = KERN_SUCCESS; |
508 | } else { | |
509 | err = KERN_FAILURE; | |
510 | } | |
511 | } | |
512 | task_deallocate(t1); | |
1c79356b | 513 | pftout: |
e5568f75 | 514 | AUDIT_ARG(pid, pid); |
91447636 | 515 | (void) copyout((char *) &pid, pid_addr, sizeof(int)); |
e5568f75 | 516 | AUDIT_MACH_SYSCALL_EXIT(err); |
1c79356b A |
517 | return(err); |
518 | } | |
519 | ||
2d21ac55 A |
520 | /* |
521 | * | |
522 | * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self | |
523 | * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication | |
524 | * | |
525 | */ | |
526 | static int tfp_policy = KERN_TFP_POLICY_DEFAULT; | |
527 | ||
528 | /* | |
529 | * Routine: task_for_pid_posix_check | |
530 | * Purpose: | |
531 | * Verify that the current process should be allowed to | |
532 | * get the target process's task port. This is only | |
533 | * permitted if: | |
534 | * - The current process is root | |
535 | * OR all of the following are true: | |
536 | * - The target process's real, effective, and saved uids | |
537 | * are the same as the current proc's euid, | |
538 | * - The target process's group set is a subset of the | |
539 | * calling process's group set, and | |
540 | * - The target process hasn't switched credentials. | |
541 | * | |
542 | * Returns: TRUE: permitted | |
543 | * FALSE: denied | |
544 | */ | |
545 | static int | |
546 | task_for_pid_posix_check(proc_t target) | |
547 | { | |
548 | kauth_cred_t targetcred, mycred; | |
549 | uid_t myuid; | |
550 | int allowed; | |
551 | ||
552 | /* No task_for_pid on bad targets */ | |
316670eb | 553 | if (target->p_stat == SZOMB) { |
2d21ac55 A |
554 | return FALSE; |
555 | } | |
556 | ||
557 | mycred = kauth_cred_get(); | |
558 | myuid = kauth_cred_getuid(mycred); | |
559 | ||
560 | /* If we're running as root, the check passes */ | |
561 | if (kauth_cred_issuser(mycred)) | |
562 | return TRUE; | |
563 | ||
564 | /* We're allowed to get our own task port */ | |
565 | if (target == current_proc()) | |
566 | return TRUE; | |
567 | ||
568 | /* | |
569 | * Under DENY, only root can get another proc's task port, | |
570 | * so no more checks are needed. | |
571 | */ | |
572 | if (tfp_policy == KERN_TFP_POLICY_DENY) { | |
573 | return FALSE; | |
574 | } | |
575 | ||
576 | targetcred = kauth_cred_proc_ref(target); | |
577 | allowed = TRUE; | |
578 | ||
579 | /* Do target's ruid, euid, and saved uid match my euid? */ | |
580 | if ((kauth_cred_getuid(targetcred) != myuid) || | |
6d2010ae A |
581 | (kauth_cred_getruid(targetcred) != myuid) || |
582 | (kauth_cred_getsvuid(targetcred) != myuid)) { | |
2d21ac55 A |
583 | allowed = FALSE; |
584 | goto out; | |
585 | } | |
586 | ||
587 | /* Are target's groups a subset of my groups? */ | |
588 | if (kauth_cred_gid_subset(targetcred, mycred, &allowed) || | |
589 | allowed == 0) { | |
590 | allowed = FALSE; | |
591 | goto out; | |
592 | } | |
593 | ||
594 | /* Has target switched credentials? */ | |
595 | if (target->p_flag & P_SUGID) { | |
596 | allowed = FALSE; | |
597 | goto out; | |
598 | } | |
599 | ||
600 | out: | |
601 | kauth_cred_unref(&targetcred); | |
602 | return allowed; | |
603 | } | |
604 | ||
fe8ab488 A |
605 | /* |
606 | * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__ | |
607 | * | |
608 | * Description: Waits for the user space daemon to respond to the request | |
609 | * we made. Function declared non inline to be visible in | |
610 | * stackshots and spindumps as well as debugging. | |
611 | */ | |
612 | __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( | |
613 | mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid) | |
614 | { | |
615 | return check_task_access(task_access_port, calling_pid, calling_gid, target_pid); | |
616 | } | |
617 | ||
1c79356b A |
618 | /* |
619 | * Routine: task_for_pid | |
620 | * Purpose: | |
621 | * Get the task port for another "process", named by its | |
622 | * process ID on the same host as "target_task". | |
623 | * | |
624 | * Only permitted to privileged processes, or processes | |
625 | * with the same user ID. | |
91447636 | 626 | * |
b0d623f7 A |
627 | * Note: if pid == 0, an error is return no matter who is calling. |
628 | * | |
91447636 | 629 | * XXX This should be a BSD system call, not a Mach trap!!! |
1c79356b A |
630 | */ |
631 | kern_return_t | |
91447636 A |
632 | task_for_pid( |
633 | struct task_for_pid_args *args) | |
1c79356b | 634 | { |
91447636 A |
635 | mach_port_name_t target_tport = args->target_tport; |
636 | int pid = args->pid; | |
637 | user_addr_t task_addr = args->t; | |
2d21ac55 A |
638 | proc_t p = PROC_NULL; |
639 | task_t t1 = TASK_NULL; | |
640 | mach_port_name_t tret = MACH_PORT_NULL; | |
641 | ipc_port_t tfpport; | |
1c79356b A |
642 | void * sright; |
643 | int error = 0; | |
1c79356b | 644 | |
e5568f75 A |
645 | AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); |
646 | AUDIT_ARG(pid, pid); | |
647 | AUDIT_ARG(mach_port1, target_tport); | |
648 | ||
b0d623f7 A |
649 | /* Always check if pid == 0 */ |
650 | if (pid == 0) { | |
2d21ac55 A |
651 | (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); |
652 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); | |
653 | return(KERN_FAILURE); | |
654 | } | |
2d21ac55 | 655 | |
1c79356b A |
656 | t1 = port_name_to_task(target_tport); |
657 | if (t1 == TASK_NULL) { | |
2d21ac55 | 658 | (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); |
e5568f75 | 659 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
0b4e3aa0 | 660 | return(KERN_FAILURE); |
1c79356b A |
661 | } |
662 | ||
91447636 | 663 | |
2d21ac55 | 664 | p = proc_find(pid); |
316670eb A |
665 | if (p == PROC_NULL) { |
666 | error = KERN_FAILURE; | |
667 | goto tfpout; | |
668 | } | |
669 | ||
b0d623f7 | 670 | #if CONFIG_AUDIT |
316670eb | 671 | AUDIT_ARG(process, p); |
b0d623f7 | 672 | #endif |
91447636 | 673 | |
2d21ac55 A |
674 | if (!(task_for_pid_posix_check(p))) { |
675 | error = KERN_FAILURE; | |
676 | goto tfpout; | |
677 | } | |
0c530ab8 | 678 | |
2d21ac55 A |
679 | if (p->task != TASK_NULL) { |
680 | /* If we aren't root and target's task access port is set... */ | |
681 | if (!kauth_cred_issuser(kauth_cred_get()) && | |
cf7d32b8 | 682 | p != current_proc() && |
2d21ac55 A |
683 | (task_get_task_access_port(p->task, &tfpport) == 0) && |
684 | (tfpport != IPC_PORT_NULL)) { | |
0c530ab8 | 685 | |
2d21ac55 A |
686 | if (tfpport == IPC_PORT_DEAD) { |
687 | error = KERN_PROTECTION_FAILURE; | |
688 | goto tfpout; | |
689 | } | |
0c530ab8 | 690 | |
2d21ac55 | 691 | /* Call up to the task access server */ |
fe8ab488 | 692 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); |
0c530ab8 | 693 | |
2d21ac55 A |
694 | if (error != MACH_MSG_SUCCESS) { |
695 | if (error == MACH_RCV_INTERRUPTED) | |
696 | error = KERN_ABORTED; | |
697 | else | |
698 | error = KERN_FAILURE; | |
699 | goto tfpout; | |
700 | } | |
701 | } | |
702 | #if CONFIG_MACF | |
703 | error = mac_proc_check_get_task(kauth_cred_get(), p); | |
704 | if (error) { | |
705 | error = KERN_FAILURE; | |
1c79356b | 706 | goto tfpout; |
2d21ac55 A |
707 | } |
708 | #endif | |
709 | ||
710 | /* Grant task port access */ | |
711 | task_reference(p->task); | |
6d2010ae A |
712 | extmod_statistics_incr_task_for_pid(p->task); |
713 | ||
2d21ac55 A |
714 | sright = (void *) convert_task_to_port(p->task); |
715 | tret = ipc_port_copyout_send( | |
716 | sright, | |
717 | get_task_ipcspace(current_task())); | |
718 | } | |
719 | error = KERN_SUCCESS; | |
0c530ab8 | 720 | |
1c79356b | 721 | tfpout: |
2d21ac55 A |
722 | task_deallocate(t1); |
723 | AUDIT_ARG(mach_port2, tret); | |
724 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); | |
725 | if (p != PROC_NULL) | |
726 | proc_rele(p); | |
e5568f75 | 727 | AUDIT_MACH_SYSCALL_EXIT(error); |
1c79356b A |
728 | return(error); |
729 | } | |
730 | ||
0c530ab8 A |
731 | /* |
732 | * Routine: task_name_for_pid | |
733 | * Purpose: | |
734 | * Get the task name port for another "process", named by its | |
735 | * process ID on the same host as "target_task". | |
736 | * | |
737 | * Only permitted to privileged processes, or processes | |
738 | * with the same user ID. | |
739 | * | |
740 | * XXX This should be a BSD system call, not a Mach trap!!! | |
741 | */ | |
742 | ||
743 | kern_return_t | |
744 | task_name_for_pid( | |
745 | struct task_name_for_pid_args *args) | |
746 | { | |
747 | mach_port_name_t target_tport = args->target_tport; | |
748 | int pid = args->pid; | |
749 | user_addr_t task_addr = args->t; | |
2d21ac55 | 750 | proc_t p = PROC_NULL; |
0c530ab8 A |
751 | task_t t1; |
752 | mach_port_name_t tret; | |
753 | void * sright; | |
2d21ac55 A |
754 | int error = 0, refheld = 0; |
755 | kauth_cred_t target_cred; | |
0c530ab8 A |
756 | |
757 | AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); | |
758 | AUDIT_ARG(pid, pid); | |
759 | AUDIT_ARG(mach_port1, target_tport); | |
760 | ||
761 | t1 = port_name_to_task(target_tport); | |
762 | if (t1 == TASK_NULL) { | |
2d21ac55 | 763 | (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); |
0c530ab8 A |
764 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
765 | return(KERN_FAILURE); | |
766 | } | |
767 | ||
2d21ac55 | 768 | p = proc_find(pid); |
2d21ac55 | 769 | if (p != PROC_NULL) { |
b0d623f7 | 770 | AUDIT_ARG(process, p); |
2d21ac55 A |
771 | target_cred = kauth_cred_proc_ref(p); |
772 | refheld = 1; | |
773 | ||
774 | if ((p->p_stat != SZOMB) | |
775 | && ((current_proc() == p) | |
776 | || kauth_cred_issuser(kauth_cred_get()) | |
777 | || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && | |
6d2010ae | 778 | ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { |
2d21ac55 A |
779 | |
780 | if (p->task != TASK_NULL) { | |
781 | task_reference(p->task); | |
782 | #if CONFIG_MACF | |
783 | error = mac_proc_check_get_task_name(kauth_cred_get(), p); | |
784 | if (error) { | |
785 | task_deallocate(p->task); | |
786 | goto noperm; | |
787 | } | |
788 | #endif | |
789 | sright = (void *)convert_task_name_to_port(p->task); | |
790 | tret = ipc_port_copyout_send(sright, | |
0c530ab8 | 791 | get_task_ipcspace(current_task())); |
2d21ac55 A |
792 | } else |
793 | tret = MACH_PORT_NULL; | |
794 | ||
795 | AUDIT_ARG(mach_port2, tret); | |
796 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); | |
797 | task_deallocate(t1); | |
798 | error = KERN_SUCCESS; | |
799 | goto tnfpout; | |
800 | } | |
0c530ab8 A |
801 | } |
802 | ||
2d21ac55 A |
803 | #if CONFIG_MACF |
804 | noperm: | |
805 | #endif | |
806 | task_deallocate(t1); | |
0c530ab8 A |
807 | tret = MACH_PORT_NULL; |
808 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); | |
809 | error = KERN_FAILURE; | |
810 | tnfpout: | |
2d21ac55 A |
811 | if (refheld != 0) |
812 | kauth_cred_unref(&target_cred); | |
813 | if (p != PROC_NULL) | |
814 | proc_rele(p); | |
0c530ab8 A |
815 | AUDIT_MACH_SYSCALL_EXIT(error); |
816 | return(error); | |
817 | } | |
818 | ||
d1ecb069 A |
819 | kern_return_t |
820 | pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) | |
821 | { | |
822 | task_t target = NULL; | |
823 | proc_t targetproc = PROC_NULL; | |
824 | int pid = args->pid; | |
825 | int error = 0; | |
826 | ||
827 | #if CONFIG_MACF | |
6d2010ae | 828 | error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); |
d1ecb069 | 829 | if (error) { |
6d2010ae | 830 | error = EPERM; |
d1ecb069 A |
831 | goto out; |
832 | } | |
833 | #endif | |
834 | ||
835 | if (pid == 0) { | |
6d2010ae | 836 | error = EPERM; |
d1ecb069 A |
837 | goto out; |
838 | } | |
839 | ||
840 | targetproc = proc_find(pid); | |
316670eb A |
841 | if (targetproc == PROC_NULL) { |
842 | error = ESRCH; | |
843 | goto out; | |
844 | } | |
845 | ||
d1ecb069 | 846 | if (!task_for_pid_posix_check(targetproc)) { |
6d2010ae | 847 | error = EPERM; |
d1ecb069 A |
848 | goto out; |
849 | } | |
850 | ||
851 | target = targetproc->task; | |
d1ecb069 A |
852 | if (target != TASK_NULL) { |
853 | mach_port_t tfpport; | |
854 | ||
855 | /* If we aren't root and target's task access port is set... */ | |
856 | if (!kauth_cred_issuser(kauth_cred_get()) && | |
857 | targetproc != current_proc() && | |
858 | (task_get_task_access_port(target, &tfpport) == 0) && | |
859 | (tfpport != IPC_PORT_NULL)) { | |
860 | ||
861 | if (tfpport == IPC_PORT_DEAD) { | |
6d2010ae | 862 | error = EACCES; |
d1ecb069 A |
863 | goto out; |
864 | } | |
865 | ||
866 | /* Call up to the task access server */ | |
fe8ab488 | 867 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); |
d1ecb069 A |
868 | |
869 | if (error != MACH_MSG_SUCCESS) { | |
870 | if (error == MACH_RCV_INTERRUPTED) | |
6d2010ae | 871 | error = EINTR; |
d1ecb069 | 872 | else |
6d2010ae | 873 | error = EPERM; |
d1ecb069 A |
874 | goto out; |
875 | } | |
876 | } | |
877 | } | |
d1ecb069 A |
878 | |
879 | task_reference(target); | |
316670eb | 880 | error = task_pidsuspend(target); |
6d2010ae A |
881 | if (error) { |
882 | if (error == KERN_INVALID_ARGUMENT) { | |
883 | error = EINVAL; | |
884 | } else { | |
885 | error = EPERM; | |
886 | } | |
887 | } | |
316670eb A |
888 | #if CONFIG_MEMORYSTATUS |
889 | else { | |
39236c6e A |
890 | memorystatus_on_suspend(targetproc); |
891 | } | |
6d2010ae A |
892 | #endif |
893 | ||
316670eb A |
894 | task_deallocate(target); |
895 | ||
d1ecb069 A |
896 | out: |
897 | if (targetproc != PROC_NULL) | |
898 | proc_rele(targetproc); | |
899 | *ret = error; | |
900 | return error; | |
901 | } | |
902 | ||
903 | kern_return_t | |
904 | pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) | |
905 | { | |
906 | task_t target = NULL; | |
907 | proc_t targetproc = PROC_NULL; | |
908 | int pid = args->pid; | |
909 | int error = 0; | |
910 | ||
911 | #if CONFIG_MACF | |
6d2010ae | 912 | error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); |
d1ecb069 | 913 | if (error) { |
6d2010ae | 914 | error = EPERM; |
d1ecb069 A |
915 | goto out; |
916 | } | |
917 | #endif | |
918 | ||
919 | if (pid == 0) { | |
6d2010ae | 920 | error = EPERM; |
d1ecb069 A |
921 | goto out; |
922 | } | |
923 | ||
924 | targetproc = proc_find(pid); | |
316670eb A |
925 | if (targetproc == PROC_NULL) { |
926 | error = ESRCH; | |
927 | goto out; | |
928 | } | |
929 | ||
d1ecb069 | 930 | if (!task_for_pid_posix_check(targetproc)) { |
6d2010ae | 931 | error = EPERM; |
d1ecb069 A |
932 | goto out; |
933 | } | |
934 | ||
935 | target = targetproc->task; | |
d1ecb069 A |
936 | if (target != TASK_NULL) { |
937 | mach_port_t tfpport; | |
938 | ||
939 | /* If we aren't root and target's task access port is set... */ | |
940 | if (!kauth_cred_issuser(kauth_cred_get()) && | |
941 | targetproc != current_proc() && | |
942 | (task_get_task_access_port(target, &tfpport) == 0) && | |
943 | (tfpport != IPC_PORT_NULL)) { | |
944 | ||
945 | if (tfpport == IPC_PORT_DEAD) { | |
6d2010ae | 946 | error = EACCES; |
d1ecb069 A |
947 | goto out; |
948 | } | |
949 | ||
950 | /* Call up to the task access server */ | |
fe8ab488 | 951 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); |
d1ecb069 A |
952 | |
953 | if (error != MACH_MSG_SUCCESS) { | |
954 | if (error == MACH_RCV_INTERRUPTED) | |
6d2010ae | 955 | error = EINTR; |
d1ecb069 | 956 | else |
6d2010ae | 957 | error = EPERM; |
d1ecb069 A |
958 | goto out; |
959 | } | |
960 | } | |
961 | } | |
d1ecb069 | 962 | |
3e170ce0 | 963 | |
d1ecb069 | 964 | task_reference(target); |
6d2010ae | 965 | |
316670eb | 966 | #if CONFIG_MEMORYSTATUS |
39236c6e | 967 | memorystatus_on_resume(targetproc); |
6d2010ae A |
968 | #endif |
969 | ||
316670eb | 970 | error = task_pidresume(target); |
6d2010ae A |
971 | if (error) { |
972 | if (error == KERN_INVALID_ARGUMENT) { | |
973 | error = EINVAL; | |
974 | } else { | |
39236c6e A |
975 | if (error == KERN_MEMORY_ERROR) { |
976 | psignal(targetproc, SIGKILL); | |
977 | error = EIO; | |
978 | } else | |
979 | error = EPERM; | |
6d2010ae A |
980 | } |
981 | } | |
316670eb | 982 | |
d1ecb069 A |
983 | task_deallocate(target); |
984 | ||
985 | out: | |
986 | if (targetproc != PROC_NULL) | |
987 | proc_rele(targetproc); | |
316670eb | 988 | |
d1ecb069 A |
989 | *ret = error; |
990 | return error; | |
d1ecb069 A |
991 | } |
992 | ||
6d2010ae | 993 | |
0c530ab8 A |
994 | static int |
995 | sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, | |
996 | __unused int arg2, struct sysctl_req *req) | |
997 | { | |
998 | int error = 0; | |
999 | int new_value; | |
1000 | ||
1001 | error = SYSCTL_OUT(req, arg1, sizeof(int)); | |
1002 | if (error || req->newptr == USER_ADDR_NULL) | |
1003 | return(error); | |
1004 | ||
39236c6e | 1005 | if (!kauth_cred_issuser(kauth_cred_get())) |
0c530ab8 A |
1006 | return(EPERM); |
1007 | ||
1008 | if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { | |
1009 | goto out; | |
1010 | } | |
1011 | if ((new_value == KERN_TFP_POLICY_DENY) | |
2d21ac55 | 1012 | || (new_value == KERN_TFP_POLICY_DEFAULT)) |
0c530ab8 A |
1013 | tfp_policy = new_value; |
1014 | else | |
1015 | error = EINVAL; | |
1016 | out: | |
1017 | return(error); | |
1018 | ||
1019 | } | |
1020 | ||
2d21ac55 A |
1021 | #if defined(SECURE_KERNEL) |
1022 | static int kern_secure_kernel = 1; | |
1023 | #else | |
1024 | static int kern_secure_kernel = 0; | |
1025 | #endif | |
0c530ab8 | 1026 | |
6d2010ae | 1027 | SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, ""); |
0c530ab8 | 1028 | |
6d2010ae A |
1029 | SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp"); |
1030 | SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, | |
0c530ab8 | 1031 | &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); |
0c530ab8 | 1032 | |
6d2010ae | 1033 | SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, |
2d21ac55 | 1034 | &shared_region_trace_level, 0, ""); |
6d2010ae | 1035 | SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, |
2d21ac55 | 1036 | &shared_region_version, 0, ""); |
6d2010ae | 1037 | SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, |
2d21ac55 | 1038 | &shared_region_persistence, 0, ""); |
1c79356b | 1039 | |
91447636 | 1040 | /* |
2d21ac55 | 1041 | * shared_region_check_np: |
91447636 | 1042 | * |
2d21ac55 A |
1043 | * This system call is intended for dyld. |
1044 | * | |
1045 | * dyld calls this when any process starts to see if the process's shared | |
1046 | * region is already set up and ready to use. | |
1047 | * This call returns the base address of the first mapping in the | |
1048 | * process's shared region's first mapping. | |
1049 | * dyld will then check what's mapped at that address. | |
1050 | * | |
1051 | * If the shared region is empty, dyld will then attempt to map the shared | |
1052 | * cache file in the shared region via the shared_region_map_np() system call. | |
1053 | * | |
1054 | * If something's already mapped in the shared region, dyld will check if it | |
1055 | * matches the shared cache it would like to use for that process. | |
1056 | * If it matches, evrything's ready and the process can proceed and use the | |
1057 | * shared region. | |
1058 | * If it doesn't match, dyld will unmap the shared region and map the shared | |
1059 | * cache into the process's address space via mmap(). | |
1060 | * | |
1061 | * ERROR VALUES | |
1062 | * EINVAL no shared region | |
1063 | * ENOMEM shared region is empty | |
1064 | * EFAULT bad address for "start_address" | |
91447636 A |
1065 | */ |
1066 | int | |
2d21ac55 A |
1067 | shared_region_check_np( |
1068 | __unused struct proc *p, | |
1069 | struct shared_region_check_np_args *uap, | |
1070 | __unused int *retvalp) | |
91447636 | 1071 | { |
2d21ac55 | 1072 | vm_shared_region_t shared_region; |
316670eb | 1073 | mach_vm_offset_t start_address = 0; |
2d21ac55 A |
1074 | int error; |
1075 | kern_return_t kr; | |
1076 | ||
1077 | SHARED_REGION_TRACE_DEBUG( | |
1078 | ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", | |
fe8ab488 A |
1079 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1080 | p->p_pid, p->p_comm, | |
2d21ac55 A |
1081 | (uint64_t)uap->start_address)); |
1082 | ||
1083 | /* retrieve the current tasks's shared region */ | |
1084 | shared_region = vm_shared_region_get(current_task()); | |
1085 | if (shared_region != NULL) { | |
1086 | /* retrieve address of its first mapping... */ | |
1087 | kr = vm_shared_region_start_address(shared_region, | |
1088 | &start_address); | |
91447636 A |
1089 | if (kr != KERN_SUCCESS) { |
1090 | error = ENOMEM; | |
2d21ac55 A |
1091 | } else { |
1092 | /* ... and give it to the caller */ | |
1093 | error = copyout(&start_address, | |
1094 | (user_addr_t) uap->start_address, | |
1095 | sizeof (start_address)); | |
1096 | if (error) { | |
1097 | SHARED_REGION_TRACE_ERROR( | |
1098 | ("shared_region: %p [%d(%s)] " | |
1099 | "check_np(0x%llx) " | |
1100 | "copyout(0x%llx) error %d\n", | |
fe8ab488 A |
1101 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1102 | p->p_pid, p->p_comm, | |
2d21ac55 A |
1103 | (uint64_t)uap->start_address, (uint64_t)start_address, |
1104 | error)); | |
1105 | } | |
91447636 | 1106 | } |
2d21ac55 | 1107 | vm_shared_region_deallocate(shared_region); |
91447636 | 1108 | } else { |
2d21ac55 | 1109 | /* no shared region ! */ |
91447636 | 1110 | error = EINVAL; |
91447636 | 1111 | } |
0c530ab8 | 1112 | |
2d21ac55 A |
1113 | SHARED_REGION_TRACE_DEBUG( |
1114 | ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", | |
fe8ab488 A |
1115 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1116 | p->p_pid, p->p_comm, | |
2d21ac55 | 1117 | (uint64_t)uap->start_address, (uint64_t)start_address, error)); |
0c530ab8 | 1118 | |
91447636 A |
1119 | return error; |
1120 | } | |
1121 | ||
6d2010ae A |
1122 | |
1123 | int | |
1124 | shared_region_copyin_mappings( | |
1125 | struct proc *p, | |
1126 | user_addr_t user_mappings, | |
1127 | unsigned int mappings_count, | |
1128 | struct shared_file_mapping_np *mappings) | |
1129 | { | |
1130 | int error = 0; | |
1131 | vm_size_t mappings_size = 0; | |
1132 | ||
1133 | /* get the list of mappings the caller wants us to establish */ | |
1134 | mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); | |
1135 | error = copyin(user_mappings, | |
1136 | mappings, | |
1137 | mappings_size); | |
1138 | if (error) { | |
1139 | SHARED_REGION_TRACE_ERROR( | |
1140 | ("shared_region: %p [%d(%s)] map(): " | |
1141 | "copyin(0x%llx, %d) failed (error=%d)\n", | |
fe8ab488 A |
1142 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1143 | p->p_pid, p->p_comm, | |
6d2010ae A |
1144 | (uint64_t)user_mappings, mappings_count, error)); |
1145 | } | |
1146 | return error; | |
1147 | } | |
91447636 | 1148 | /* |
2d21ac55 | 1149 | * shared_region_map_np() |
91447636 | 1150 | * |
2d21ac55 | 1151 | * This system call is intended for dyld. |
91447636 | 1152 | * |
2d21ac55 A |
1153 | * dyld uses this to map a shared cache file into a shared region. |
1154 | * This is usually done only the first time a shared cache is needed. | |
1155 | * Subsequent processes will just use the populated shared region without | |
1156 | * requiring any further setup. | |
91447636 A |
1157 | */ |
1158 | int | |
15129b1c | 1159 | _shared_region_map_and_slide( |
91447636 | 1160 | struct proc *p, |
6d2010ae A |
1161 | int fd, |
1162 | uint32_t mappings_count, | |
1163 | struct shared_file_mapping_np *mappings, | |
15129b1c A |
1164 | uint32_t slide, |
1165 | user_addr_t slide_start, | |
1166 | user_addr_t slide_size) | |
91447636 | 1167 | { |
2d21ac55 A |
1168 | int error; |
1169 | kern_return_t kr; | |
2d21ac55 | 1170 | struct fileproc *fp; |
fe8ab488 | 1171 | struct vnode *vp, *root_vp, *scdir_vp; |
2d21ac55 A |
1172 | struct vnode_attr va; |
1173 | off_t fs; | |
1174 | memory_object_size_t file_size; | |
39236c6e | 1175 | #if CONFIG_MACF |
6d2010ae | 1176 | vm_prot_t maxprot = VM_PROT_ALL; |
39236c6e | 1177 | #endif |
2d21ac55 A |
1178 | memory_object_control_t file_control; |
1179 | struct vm_shared_region *shared_region; | |
1180 | ||
1181 | SHARED_REGION_TRACE_DEBUG( | |
1182 | ("shared_region: %p [%d(%s)] -> map\n", | |
fe8ab488 A |
1183 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1184 | p->p_pid, p->p_comm)); | |
2d21ac55 A |
1185 | |
1186 | shared_region = NULL; | |
91447636 A |
1187 | fp = NULL; |
1188 | vp = NULL; | |
fe8ab488 | 1189 | scdir_vp = NULL; |
91447636 | 1190 | |
91447636 A |
1191 | /* get file structure from file descriptor */ |
1192 | error = fp_lookup(p, fd, &fp, 0); | |
1193 | if (error) { | |
2d21ac55 A |
1194 | SHARED_REGION_TRACE_ERROR( |
1195 | ("shared_region: %p [%d(%s)] map: " | |
0c530ab8 | 1196 | "fd=%d lookup failed (error=%d)\n", |
fe8ab488 A |
1197 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1198 | p->p_pid, p->p_comm, fd, error)); | |
91447636 A |
1199 | goto done; |
1200 | } | |
1201 | ||
1202 | /* make sure we're attempting to map a vnode */ | |
39236c6e | 1203 | if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { |
2d21ac55 A |
1204 | SHARED_REGION_TRACE_ERROR( |
1205 | ("shared_region: %p [%d(%s)] map: " | |
0c530ab8 | 1206 | "fd=%d not a vnode (type=%d)\n", |
fe8ab488 A |
1207 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1208 | p->p_pid, p->p_comm, | |
39236c6e | 1209 | fd, FILEGLOB_DTYPE(fp->f_fglob))); |
91447636 A |
1210 | error = EINVAL; |
1211 | goto done; | |
1212 | } | |
1213 | ||
1214 | /* we need at least read permission on the file */ | |
1215 | if (! (fp->f_fglob->fg_flag & FREAD)) { | |
2d21ac55 A |
1216 | SHARED_REGION_TRACE_ERROR( |
1217 | ("shared_region: %p [%d(%s)] map: " | |
0c530ab8 | 1218 | "fd=%d not readable\n", |
fe8ab488 A |
1219 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1220 | p->p_pid, p->p_comm, fd)); | |
91447636 A |
1221 | error = EPERM; |
1222 | goto done; | |
1223 | } | |
1224 | ||
1225 | /* get vnode from file structure */ | |
2d21ac55 | 1226 | error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); |
91447636 | 1227 | if (error) { |
2d21ac55 A |
1228 | SHARED_REGION_TRACE_ERROR( |
1229 | ("shared_region: %p [%d(%s)] map: " | |
0c530ab8 | 1230 | "fd=%d getwithref failed (error=%d)\n", |
fe8ab488 A |
1231 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1232 | p->p_pid, p->p_comm, fd, error)); | |
91447636 A |
1233 | goto done; |
1234 | } | |
1235 | vp = (struct vnode *) fp->f_fglob->fg_data; | |
1236 | ||
1237 | /* make sure the vnode is a regular file */ | |
1238 | if (vp->v_type != VREG) { | |
2d21ac55 A |
1239 | SHARED_REGION_TRACE_ERROR( |
1240 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
0c530ab8 | 1241 | "not a file (type=%d)\n", |
fe8ab488 A |
1242 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1243 | p->p_pid, p->p_comm, | |
1244 | (void *)VM_KERNEL_ADDRPERM(vp), | |
1245 | vp->v_name, vp->v_type)); | |
91447636 A |
1246 | error = EINVAL; |
1247 | goto done; | |
1248 | } | |
1249 | ||
6d2010ae | 1250 | #if CONFIG_MACF |
3e170ce0 A |
1251 | /* pass in 0 for the offset argument because AMFI does not need the offset |
1252 | of the shared cache */ | |
6d2010ae | 1253 | error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), |
3e170ce0 | 1254 | fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot); |
6d2010ae A |
1255 | if (error) { |
1256 | goto done; | |
1257 | } | |
1258 | #endif /* MAC */ | |
1259 | ||
1260 | #if CONFIG_PROTECT | |
1261 | /* check for content protection access */ | |
1262 | { | |
316670eb A |
1263 | error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); |
1264 | if (error) { | |
6d2010ae | 1265 | goto done; |
316670eb | 1266 | } |
6d2010ae A |
1267 | } |
1268 | #endif /* CONFIG_PROTECT */ | |
1269 | ||
2d21ac55 A |
1270 | /* make sure vnode is on the process's root volume */ |
1271 | root_vp = p->p_fd->fd_rdir; | |
1272 | if (root_vp == NULL) { | |
1273 | root_vp = rootvnode; | |
6d2010ae A |
1274 | } else { |
1275 | /* | |
1276 | * Chroot-ed processes can't use the shared_region. | |
1277 | */ | |
1278 | error = EINVAL; | |
1279 | goto done; | |
91447636 | 1280 | } |
6d2010ae | 1281 | |
2d21ac55 A |
1282 | if (vp->v_mount != root_vp->v_mount) { |
1283 | SHARED_REGION_TRACE_ERROR( | |
1284 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1285 | "not on process's root volume\n", | |
fe8ab488 A |
1286 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1287 | p->p_pid, p->p_comm, | |
1288 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); | |
2d21ac55 | 1289 | error = EPERM; |
91447636 | 1290 | goto done; |
91447636 A |
1291 | } |
1292 | ||
2d21ac55 A |
1293 | /* make sure vnode is owned by "root" */ |
1294 | VATTR_INIT(&va); | |
1295 | VATTR_WANTED(&va, va_uid); | |
1296 | error = vnode_getattr(vp, &va, vfs_context_current()); | |
1297 | if (error) { | |
1298 | SHARED_REGION_TRACE_ERROR( | |
1299 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1300 | "vnode_getattr(%p) failed (error=%d)\n", | |
fe8ab488 A |
1301 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1302 | p->p_pid, p->p_comm, | |
1303 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, | |
1304 | (void *)VM_KERNEL_ADDRPERM(vp), error)); | |
91447636 A |
1305 | goto done; |
1306 | } | |
2d21ac55 A |
1307 | if (va.va_uid != 0) { |
1308 | SHARED_REGION_TRACE_ERROR( | |
1309 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1310 | "owned by uid=%d instead of 0\n", | |
fe8ab488 A |
1311 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1312 | p->p_pid, p->p_comm, | |
1313 | (void *)VM_KERNEL_ADDRPERM(vp), | |
1314 | vp->v_name, va.va_uid)); | |
2d21ac55 A |
1315 | error = EPERM; |
1316 | goto done; | |
91447636 | 1317 | } |
2d21ac55 | 1318 | |
fe8ab488 A |
1319 | if (scdir_enforce) { |
1320 | /* get vnode for scdir_path */ | |
1321 | error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current()); | |
1322 | if (error) { | |
1323 | SHARED_REGION_TRACE_ERROR( | |
1324 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1325 | "vnode_lookup(%s) failed (error=%d)\n", | |
1326 | (void *)VM_KERNEL_ADDRPERM(current_thread()), | |
1327 | p->p_pid, p->p_comm, | |
1328 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, | |
1329 | scdir_path, error)); | |
1330 | goto done; | |
1331 | } | |
1332 | ||
1333 | /* ensure parent is scdir_vp */ | |
1334 | if (vnode_parent(vp) != scdir_vp) { | |
1335 | SHARED_REGION_TRACE_ERROR( | |
1336 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1337 | "shared cache file not in %s\n", | |
1338 | (void *)VM_KERNEL_ADDRPERM(current_thread()), | |
1339 | p->p_pid, p->p_comm, | |
1340 | (void *)VM_KERNEL_ADDRPERM(vp), | |
1341 | vp->v_name, scdir_path)); | |
1342 | error = EPERM; | |
1343 | goto done; | |
1344 | } | |
1345 | } | |
1346 | ||
2d21ac55 A |
1347 | /* get vnode size */ |
1348 | error = vnode_size(vp, &fs, vfs_context_current()); | |
1349 | if (error) { | |
1350 | SHARED_REGION_TRACE_ERROR( | |
1351 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1352 | "vnode_size(%p) failed (error=%d)\n", | |
fe8ab488 A |
1353 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1354 | p->p_pid, p->p_comm, | |
1355 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, | |
1356 | (void *)VM_KERNEL_ADDRPERM(vp), error)); | |
2d21ac55 | 1357 | goto done; |
91447636 | 1358 | } |
2d21ac55 | 1359 | file_size = fs; |
91447636 A |
1360 | |
1361 | /* get the file's memory object handle */ | |
91447636 A |
1362 | file_control = ubc_getobject(vp, UBC_HOLDOBJECT); |
1363 | if (file_control == MEMORY_OBJECT_CONTROL_NULL) { | |
2d21ac55 A |
1364 | SHARED_REGION_TRACE_ERROR( |
1365 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1366 | "no memory object\n", | |
fe8ab488 A |
1367 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1368 | p->p_pid, p->p_comm, | |
1369 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); | |
91447636 A |
1370 | error = EINVAL; |
1371 | goto done; | |
1372 | } | |
91447636 | 1373 | |
91447636 | 1374 | |
2d21ac55 A |
1375 | /* get the process's shared region (setup in vm_map_exec()) */ |
1376 | shared_region = vm_shared_region_get(current_task()); | |
1377 | if (shared_region == NULL) { | |
1378 | SHARED_REGION_TRACE_ERROR( | |
1379 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1380 | "no shared region\n", | |
fe8ab488 A |
1381 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1382 | p->p_pid, p->p_comm, | |
1383 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); | |
2d21ac55 A |
1384 | goto done; |
1385 | } | |
91447636 | 1386 | |
2d21ac55 A |
1387 | /* map the file into that shared region's submap */ |
1388 | kr = vm_shared_region_map_file(shared_region, | |
1389 | mappings_count, | |
1390 | mappings, | |
1391 | file_control, | |
1392 | file_size, | |
6d2010ae | 1393 | (void *) p->p_fd->fd_rdir, |
15129b1c A |
1394 | slide, |
1395 | slide_start, | |
1396 | slide_size); | |
2d21ac55 A |
1397 | if (kr != KERN_SUCCESS) { |
1398 | SHARED_REGION_TRACE_ERROR( | |
1399 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " | |
1400 | "vm_shared_region_map_file() failed kr=0x%x\n", | |
fe8ab488 A |
1401 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1402 | p->p_pid, p->p_comm, | |
1403 | (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr)); | |
0c530ab8 A |
1404 | switch (kr) { |
1405 | case KERN_INVALID_ADDRESS: | |
1406 | error = EFAULT; | |
2d21ac55 | 1407 | break; |
0c530ab8 A |
1408 | case KERN_PROTECTION_FAILURE: |
1409 | error = EPERM; | |
2d21ac55 | 1410 | break; |
0c530ab8 A |
1411 | case KERN_NO_SPACE: |
1412 | error = ENOMEM; | |
2d21ac55 | 1413 | break; |
0c530ab8 A |
1414 | case KERN_FAILURE: |
1415 | case KERN_INVALID_ARGUMENT: | |
1416 | default: | |
1417 | error = EINVAL; | |
2d21ac55 | 1418 | break; |
0c530ab8 | 1419 | } |
2d21ac55 | 1420 | goto done; |
91447636 A |
1421 | } |
1422 | ||
2d21ac55 A |
1423 | error = 0; |
1424 | ||
6d2010ae A |
1425 | vnode_lock_spin(vp); |
1426 | ||
1427 | vp->v_flag |= VSHARED_DYLD; | |
1428 | ||
1429 | vnode_unlock(vp); | |
1430 | ||
2d21ac55 A |
1431 | /* update the vnode's access time */ |
1432 | if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { | |
1433 | VATTR_INIT(&va); | |
1434 | nanotime(&va.va_access_time); | |
1435 | VATTR_SET_ACTIVE(&va, va_access_time); | |
1436 | vnode_setattr(vp, &va, vfs_context_current()); | |
91447636 A |
1437 | } |
1438 | ||
2d21ac55 A |
1439 | if (p->p_flag & P_NOSHLIB) { |
1440 | /* signal that this process is now using split libraries */ | |
b0d623f7 | 1441 | OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); |
91447636 A |
1442 | } |
1443 | ||
1444 | done: | |
1445 | if (vp != NULL) { | |
1446 | /* | |
1447 | * release the vnode... | |
1448 | * ubc_map() still holds it for us in the non-error case | |
1449 | */ | |
1450 | (void) vnode_put(vp); | |
1451 | vp = NULL; | |
1452 | } | |
1453 | if (fp != NULL) { | |
1454 | /* release the file descriptor */ | |
1455 | fp_drop(p, fd, fp, 0); | |
1456 | fp = NULL; | |
1457 | } | |
fe8ab488 A |
1458 | if (scdir_vp != NULL) { |
1459 | (void)vnode_put(scdir_vp); | |
1460 | scdir_vp = NULL; | |
1461 | } | |
9bccf70c | 1462 | |
2d21ac55 A |
1463 | if (shared_region != NULL) { |
1464 | vm_shared_region_deallocate(shared_region); | |
9bccf70c A |
1465 | } |
1466 | ||
2d21ac55 A |
1467 | SHARED_REGION_TRACE_DEBUG( |
1468 | ("shared_region: %p [%d(%s)] <- map\n", | |
fe8ab488 A |
1469 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1470 | p->p_pid, p->p_comm)); | |
9bccf70c | 1471 | |
2d21ac55 | 1472 | return error; |
9bccf70c A |
1473 | } |
1474 | ||
6d2010ae A |
1475 | int |
1476 | shared_region_map_and_slide_np( | |
1477 | struct proc *p, | |
1478 | struct shared_region_map_and_slide_np_args *uap, | |
1479 | __unused int *retvalp) | |
1480 | { | |
6d2010ae | 1481 | struct shared_file_mapping_np *mappings; |
15129b1c | 1482 | unsigned int mappings_count = uap->count; |
6d2010ae A |
1483 | kern_return_t kr = KERN_SUCCESS; |
1484 | uint32_t slide = uap->slide; | |
1485 | ||
1486 | #define SFM_MAX_STACK 8 | |
1487 | struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; | |
1488 | ||
316670eb A |
1489 | /* Is the process chrooted?? */ |
1490 | if (p->p_fd->fd_rdir != NULL) { | |
1491 | kr = EINVAL; | |
1492 | goto done; | |
1493 | } | |
1494 | ||
6d2010ae A |
1495 | if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { |
1496 | if (kr == KERN_INVALID_ARGUMENT) { | |
1497 | /* | |
1498 | * This will happen if we request sliding again | |
1499 | * with the same slide value that was used earlier | |
316670eb | 1500 | * for the very first sliding. |
6d2010ae A |
1501 | */ |
1502 | kr = KERN_SUCCESS; | |
6d2010ae | 1503 | } |
316670eb | 1504 | goto done; |
6d2010ae A |
1505 | } |
1506 | ||
1507 | if (mappings_count == 0) { | |
1508 | SHARED_REGION_TRACE_INFO( | |
1509 | ("shared_region: %p [%d(%s)] map(): " | |
1510 | "no mappings\n", | |
fe8ab488 A |
1511 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1512 | p->p_pid, p->p_comm)); | |
6d2010ae A |
1513 | kr = 0; /* no mappings: we're done ! */ |
1514 | goto done; | |
1515 | } else if (mappings_count <= SFM_MAX_STACK) { | |
1516 | mappings = &stack_mappings[0]; | |
1517 | } else { | |
1518 | SHARED_REGION_TRACE_ERROR( | |
1519 | ("shared_region: %p [%d(%s)] map(): " | |
1520 | "too many mappings (%d)\n", | |
fe8ab488 A |
1521 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
1522 | p->p_pid, p->p_comm, | |
6d2010ae A |
1523 | mappings_count)); |
1524 | kr = KERN_FAILURE; | |
1525 | goto done; | |
1526 | } | |
1527 | ||
1528 | if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { | |
1529 | goto done; | |
1530 | } | |
1531 | ||
1532 | ||
15129b1c A |
1533 | kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, |
1534 | slide, | |
1535 | uap->slide_start, uap->slide_size); | |
6d2010ae A |
1536 | if (kr != KERN_SUCCESS) { |
1537 | return kr; | |
1538 | } | |
1539 | ||
6d2010ae A |
1540 | done: |
1541 | return kr; | |
1542 | } | |
9bccf70c | 1543 | |
2d21ac55 | 1544 | /* sysctl overflow room */ |
9bccf70c | 1545 | |
fe8ab488 A |
1546 | SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, |
1547 | (int *) &page_size, 0, "vm page size"); | |
1548 | ||
2d21ac55 A |
1549 | /* vm_page_free_target is provided as a makeshift solution for applications that want to |
1550 | allocate buffer space, possibly purgeable memory, but not cause inactive pages to be | |
1551 | reclaimed. It allows the app to calculate how much memory is free outside the free target. */ | |
1552 | extern unsigned int vm_page_free_target; | |
6d2010ae | 1553 | SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, |
2d21ac55 | 1554 | &vm_page_free_target, 0, "Pageout daemon free target"); |
9bccf70c | 1555 | |
b0d623f7 | 1556 | extern unsigned int vm_memory_pressure; |
6d2010ae | 1557 | SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 A |
1558 | &vm_memory_pressure, 0, "Memory pressure indicator"); |
1559 | ||
1560 | static int | |
1561 | vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS | |
1562 | { | |
1563 | #pragma unused(oidp, arg1, arg2) | |
1564 | unsigned int page_free_wanted; | |
1565 | ||
1566 | page_free_wanted = mach_vm_ctl_page_free_wanted(); | |
1567 | return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted)); | |
1568 | } | |
1569 | SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, | |
1570 | CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, | |
1571 | 0, 0, vm_ctl_page_free_wanted, "I", ""); | |
1572 | ||
1573 | extern unsigned int vm_page_purgeable_count; | |
6d2010ae | 1574 | SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 A |
1575 | &vm_page_purgeable_count, 0, "Purgeable page count"); |
1576 | ||
1577 | extern unsigned int vm_page_purgeable_wired_count; | |
6d2010ae | 1578 | SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 A |
1579 | &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); |
1580 | ||
3e170ce0 A |
1581 | extern unsigned int vm_pageout_purged_objects; |
1582 | SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED, | |
1583 | &vm_pageout_purged_objects, 0, "System purged object count"); | |
1584 | ||
39236c6e A |
1585 | extern int madvise_free_debug; |
1586 | SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, | |
1587 | &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); | |
1588 | ||
6d2010ae | 1589 | SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1590 | &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); |
6d2010ae | 1591 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1592 | &vm_page_stats_reusable.reusable_pages_success, ""); |
6d2010ae | 1593 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1594 | &vm_page_stats_reusable.reusable_pages_failure, ""); |
6d2010ae | 1595 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1596 | &vm_page_stats_reusable.reusable_pages_shared, ""); |
6d2010ae | 1597 | SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1598 | &vm_page_stats_reusable.all_reusable_calls, ""); |
6d2010ae | 1599 | SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1600 | &vm_page_stats_reusable.partial_reusable_calls, ""); |
6d2010ae | 1601 | SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1602 | &vm_page_stats_reusable.reuse_pages_success, ""); |
6d2010ae | 1603 | SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1604 | &vm_page_stats_reusable.reuse_pages_failure, ""); |
6d2010ae | 1605 | SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1606 | &vm_page_stats_reusable.all_reuse_calls, ""); |
6d2010ae | 1607 | SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1608 | &vm_page_stats_reusable.partial_reuse_calls, ""); |
6d2010ae | 1609 | SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1610 | &vm_page_stats_reusable.can_reuse_success, ""); |
6d2010ae | 1611 | SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, |
b0d623f7 | 1612 | &vm_page_stats_reusable.can_reuse_failure, ""); |
39236c6e A |
1613 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, |
1614 | &vm_page_stats_reusable.reusable_reclaimed, ""); | |
b0d623f7 A |
1615 | |
1616 | ||
316670eb A |
1617 | extern unsigned int vm_page_free_count, vm_page_speculative_count; |
1618 | SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, ""); | |
1619 | SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, ""); | |
1620 | ||
1621 | extern unsigned int vm_page_cleaned_count; | |
1622 | SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size"); | |
1623 | ||
1624 | /* pageout counts */ | |
1625 | extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used; | |
1626 | extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative; | |
1627 | SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, ""); | |
1628 | SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, ""); | |
1629 | SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, ""); | |
1630 | SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, ""); | |
1631 | SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, ""); | |
1632 | SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, ""); | |
1633 | SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, ""); | |
1634 | ||
1635 | extern unsigned int vm_pageout_freed_from_cleaned; | |
1636 | SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, ""); | |
1637 | ||
1638 | /* counts of pages entering the cleaned queue */ | |
1639 | extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty; | |
1640 | SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ | |
1641 | SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, ""); | |
1642 | SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, ""); | |
1643 | ||
1644 | /* counts of pages leaving the cleaned queue */ | |
1645 | extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock; | |
1646 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed"); | |
1647 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ | |
1648 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); | |
1649 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated"); | |
1650 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated"); | |
1651 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated"); | |
1652 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); | |
1653 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); | |
1654 | ||
fe8ab488 A |
1655 | /* counts of pages prefaulted when entering a memory object */ |
1656 | extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout; | |
1657 | SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, ""); | |
1658 | SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, ""); | |
1659 | ||
316670eb A |
1660 | #include <kern/thread.h> |
1661 | #include <sys/user.h> | |
1662 | ||
1663 | void vm_pageout_io_throttle(void); | |
1664 | ||
1665 | void vm_pageout_io_throttle(void) { | |
1666 | struct uthread *uthread = get_bsdthread_info(current_thread()); | |
1667 | ||
1668 | /* | |
1669 | * thread is marked as a low priority I/O type | |
1670 | * and the I/O we issued while in this cleaning operation | |
1671 | * collided with normal I/O operations... we'll | |
1672 | * delay in order to mitigate the impact of this | |
1673 | * task on the normal operation of the system | |
1674 | */ | |
1675 | ||
1676 | if (uthread->uu_lowpri_window) { | |
39236c6e | 1677 | throttle_lowpri_io(1); |
316670eb A |
1678 | } |
1679 | ||
1680 | } | |
1681 | ||
b0d623f7 A |
1682 | int |
1683 | vm_pressure_monitor( | |
1684 | __unused struct proc *p, | |
1685 | struct vm_pressure_monitor_args *uap, | |
1686 | int *retval) | |
1687 | { | |
1688 | kern_return_t kr; | |
1689 | uint32_t pages_reclaimed; | |
1690 | uint32_t pages_wanted; | |
1691 | ||
1692 | kr = mach_vm_pressure_monitor( | |
1693 | (boolean_t) uap->wait_for_pressure, | |
1694 | uap->nsecs_monitored, | |
1695 | (uap->pages_reclaimed) ? &pages_reclaimed : NULL, | |
1696 | &pages_wanted); | |
1697 | ||
1698 | switch (kr) { | |
1699 | case KERN_SUCCESS: | |
1700 | break; | |
1701 | case KERN_ABORTED: | |
1702 | return EINTR; | |
1703 | default: | |
1704 | return EINVAL; | |
1705 | } | |
1706 | ||
1707 | if (uap->pages_reclaimed) { | |
1708 | if (copyout((void *)&pages_reclaimed, | |
1709 | uap->pages_reclaimed, | |
1710 | sizeof (pages_reclaimed)) != 0) { | |
1711 | return EFAULT; | |
1712 | } | |
1713 | } | |
1714 | ||
1715 | *retval = (int) pages_wanted; | |
1716 | return 0; | |
1717 | } | |
316670eb A |
1718 | |
1719 | int | |
1720 | kas_info(struct proc *p, | |
1721 | struct kas_info_args *uap, | |
1722 | int *retval __unused) | |
1723 | { | |
1724 | #ifdef SECURE_KERNEL | |
1725 | (void)p; | |
1726 | (void)uap; | |
1727 | return ENOTSUP; | |
1728 | #else /* !SECURE_KERNEL */ | |
1729 | int selector = uap->selector; | |
1730 | user_addr_t valuep = uap->value; | |
1731 | user_addr_t sizep = uap->size; | |
1732 | user_size_t size; | |
1733 | int error; | |
1734 | ||
1735 | if (!kauth_cred_issuser(kauth_cred_get())) { | |
1736 | return EPERM; | |
1737 | } | |
1738 | ||
1739 | #if CONFIG_MACF | |
1740 | error = mac_system_check_kas_info(kauth_cred_get(), selector); | |
1741 | if (error) { | |
1742 | return error; | |
1743 | } | |
1744 | #endif | |
1745 | ||
1746 | if (IS_64BIT_PROCESS(p)) { | |
1747 | user64_size_t size64; | |
1748 | error = copyin(sizep, &size64, sizeof(size64)); | |
1749 | size = (user_size_t)size64; | |
1750 | } else { | |
1751 | user32_size_t size32; | |
1752 | error = copyin(sizep, &size32, sizeof(size32)); | |
1753 | size = (user_size_t)size32; | |
1754 | } | |
1755 | if (error) { | |
1756 | return error; | |
1757 | } | |
1758 | ||
1759 | switch (selector) { | |
1760 | case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: | |
1761 | { | |
1762 | uint64_t slide = vm_kernel_slide; | |
1763 | ||
1764 | if (sizeof(slide) != size) { | |
1765 | return EINVAL; | |
1766 | } | |
1767 | ||
1768 | if (IS_64BIT_PROCESS(p)) { | |
1769 | user64_size_t size64 = (user64_size_t)size; | |
1770 | error = copyout(&size64, sizep, sizeof(size64)); | |
1771 | } else { | |
1772 | user32_size_t size32 = (user32_size_t)size; | |
1773 | error = copyout(&size32, sizep, sizeof(size32)); | |
1774 | } | |
1775 | if (error) { | |
1776 | return error; | |
1777 | } | |
1778 | ||
1779 | error = copyout(&slide, valuep, sizeof(slide)); | |
1780 | if (error) { | |
1781 | return error; | |
1782 | } | |
1783 | } | |
1784 | break; | |
1785 | default: | |
1786 | return EINVAL; | |
1787 | } | |
1788 | ||
1789 | return 0; | |
1790 | #endif /* !SECURE_KERNEL */ | |
1791 | } |