]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
099a70fb612edb91629ecec103ed899c3cf2d03b
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <vm/vm_options.h>
44
45 #include <kern/task.h>
46 #include <kern/thread.h>
47 #include <kern/debug.h>
48 #include <kern/extmod_statistics.h>
49 #include <mach/mach_traps.h>
50 #include <mach/port.h>
51 #include <mach/task.h>
52 #include <mach/task_access.h>
53 #include <mach/task_special_ports.h>
54 #include <mach/time_value.h>
55 #include <mach/vm_map.h>
56 #include <mach/vm_param.h>
57 #include <mach/vm_prot.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84
85 #include <security/audit/audit.h>
86 #include <security/mac.h>
87 #include <bsm/audit_kevents.h>
88
89 #include <kern/kalloc.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_pageout.h>
93
94 #include <machine/spl.h>
95
96 #include <mach/shared_region.h>
97 #include <vm/vm_shared_region.h>
98
99 #include <vm/vm_protos.h>
100
101 #include <sys/kern_memorystatus.h>
102
103
104 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
105 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
106
107 #if DEVELOPMENT || DEBUG
108 extern int radar_20146450;
109 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
110
111 extern int macho_printf;
112 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
113
114 extern int apple_protect_pager_data_request_debug;
115 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
116
117 #endif /* DEVELOPMENT || DEBUG */
118
119 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
120 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
121 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
122 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
123 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
124 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
125 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
126 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
127 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
128 #if VM_SCAN_FOR_SHADOW_CHAIN
129 static int vm_shadow_max_enabled = 0; /* Disabled by default */
130 extern int proc_shadow_max(void);
131 static int
132 vm_shadow_max SYSCTL_HANDLER_ARGS
133 {
134 #pragma unused(arg1, arg2, oidp)
135 int value = 0;
136
137 if (vm_shadow_max_enabled)
138 value = proc_shadow_max();
139
140 return SYSCTL_OUT(req, &value, sizeof(value));
141 }
142 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
143 0, 0, &vm_shadow_max, "I", "");
144
145 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
146
147 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
148
149 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
150
151 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
152 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
153 /*
154 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
155 */
156
157 #ifndef SECURE_KERNEL
158 extern int allow_stack_exec, allow_data_exec;
159
160 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
161 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
162
163 #endif /* !SECURE_KERNEL */
164
165 static const char *prot_values[] = {
166 "none",
167 "read-only",
168 "write-only",
169 "read-write",
170 "execute-only",
171 "read-execute",
172 "write-execute",
173 "read-write-execute"
174 };
175
176 void
177 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
178 {
179 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
180 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
181 }
182
183 int shared_region_unnest_logging = 1;
184
185 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
186 &shared_region_unnest_logging, 0, "");
187
188 int vm_shared_region_unnest_log_interval = 10;
189 int shared_region_unnest_log_count_threshold = 5;
190
191 /*
192 * Shared cache path enforcement.
193 */
194
195 static int scdir_enforce = 1;
196 static char scdir_path[] = "/var/db/dyld/";
197
198 #ifndef SECURE_KERNEL
199 SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, "");
200 #endif
201
202 /* These log rate throttling state variables aren't thread safe, but
203 * are sufficient unto the task.
204 */
205 static int64_t last_unnest_log_time = 0;
206 static int shared_region_unnest_log_count = 0;
207
208 void log_unnest_badness(
209 vm_map_t m,
210 vm_map_offset_t s,
211 vm_map_offset_t e) {
212 struct timeval tv;
213
214 if (shared_region_unnest_logging == 0)
215 return;
216
217 if (shared_region_unnest_logging == 1) {
218 microtime(&tv);
219 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
220 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
221 return;
222 }
223 else {
224 last_unnest_log_time = tv.tv_sec;
225 shared_region_unnest_log_count = 0;
226 }
227 }
228
229 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
230 }
231
232 int
233 useracc(
234 user_addr_t addr,
235 user_size_t len,
236 int prot)
237 {
238 vm_map_t map;
239
240 map = current_map();
241 return (vm_map_check_protection(
242 map,
243 vm_map_trunc_page(addr,
244 vm_map_page_mask(map)),
245 vm_map_round_page(addr+len,
246 vm_map_page_mask(map)),
247 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
248 }
249
250 int
251 vslock(
252 user_addr_t addr,
253 user_size_t len)
254 {
255 kern_return_t kret;
256 vm_map_t map;
257
258 map = current_map();
259 kret = vm_map_wire(map,
260 vm_map_trunc_page(addr,
261 vm_map_page_mask(map)),
262 vm_map_round_page(addr+len,
263 vm_map_page_mask(map)),
264 VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD),
265 FALSE);
266
267 switch (kret) {
268 case KERN_SUCCESS:
269 return (0);
270 case KERN_INVALID_ADDRESS:
271 case KERN_NO_SPACE:
272 return (ENOMEM);
273 case KERN_PROTECTION_FAILURE:
274 return (EACCES);
275 default:
276 return (EINVAL);
277 }
278 }
279
280 int
281 vsunlock(
282 user_addr_t addr,
283 user_size_t len,
284 __unused int dirtied)
285 {
286 #if FIXME /* [ */
287 pmap_t pmap;
288 vm_page_t pg;
289 vm_map_offset_t vaddr;
290 ppnum_t paddr;
291 #endif /* FIXME ] */
292 kern_return_t kret;
293 vm_map_t map;
294
295 map = current_map();
296
297 #if FIXME /* [ */
298 if (dirtied) {
299 pmap = get_task_pmap(current_task());
300 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
301 vaddr < vm_map_round_page(addr+len, PAGE_MASK);
302 vaddr += PAGE_SIZE) {
303 paddr = pmap_extract(pmap, vaddr);
304 pg = PHYS_TO_VM_PAGE(paddr);
305 vm_page_set_modified(pg);
306 }
307 }
308 #endif /* FIXME ] */
309 #ifdef lint
310 dirtied++;
311 #endif /* lint */
312 kret = vm_map_unwire(map,
313 vm_map_trunc_page(addr,
314 vm_map_page_mask(map)),
315 vm_map_round_page(addr+len,
316 vm_map_page_mask(map)),
317 FALSE);
318 switch (kret) {
319 case KERN_SUCCESS:
320 return (0);
321 case KERN_INVALID_ADDRESS:
322 case KERN_NO_SPACE:
323 return (ENOMEM);
324 case KERN_PROTECTION_FAILURE:
325 return (EACCES);
326 default:
327 return (EINVAL);
328 }
329 }
330
331 int
332 subyte(
333 user_addr_t addr,
334 int byte)
335 {
336 char character;
337
338 character = (char)byte;
339 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
340 }
341
342 int
343 suibyte(
344 user_addr_t addr,
345 int byte)
346 {
347 char character;
348
349 character = (char)byte;
350 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
351 }
352
353 int fubyte(user_addr_t addr)
354 {
355 unsigned char byte;
356
357 if (copyin(addr, (void *) &byte, sizeof(char)))
358 return(-1);
359 return(byte);
360 }
361
362 int fuibyte(user_addr_t addr)
363 {
364 unsigned char byte;
365
366 if (copyin(addr, (void *) &(byte), sizeof(char)))
367 return(-1);
368 return(byte);
369 }
370
371 int
372 suword(
373 user_addr_t addr,
374 long word)
375 {
376 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
377 }
378
379 long fuword(user_addr_t addr)
380 {
381 long word = 0;
382
383 if (copyin(addr, (void *) &word, sizeof(int)))
384 return(-1);
385 return(word);
386 }
387
388 /* suiword and fuiword are the same as suword and fuword, respectively */
389
390 int
391 suiword(
392 user_addr_t addr,
393 long word)
394 {
395 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
396 }
397
398 long fuiword(user_addr_t addr)
399 {
400 long word = 0;
401
402 if (copyin(addr, (void *) &word, sizeof(int)))
403 return(-1);
404 return(word);
405 }
406
407 /*
408 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
409 * fetching and setting of process-sized size_t and pointer values.
410 */
411 int
412 sulong(user_addr_t addr, int64_t word)
413 {
414
415 if (IS_64BIT_PROCESS(current_proc())) {
416 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
417 } else {
418 return(suiword(addr, (long)word));
419 }
420 }
421
422 int64_t
423 fulong(user_addr_t addr)
424 {
425 int64_t longword;
426
427 if (IS_64BIT_PROCESS(current_proc())) {
428 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
429 return(-1);
430 return(longword);
431 } else {
432 return((int64_t)fuiword(addr));
433 }
434 }
435
436 int
437 suulong(user_addr_t addr, uint64_t uword)
438 {
439
440 if (IS_64BIT_PROCESS(current_proc())) {
441 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
442 } else {
443 return(suiword(addr, (uint32_t)uword));
444 }
445 }
446
447 uint64_t
448 fuulong(user_addr_t addr)
449 {
450 uint64_t ulongword;
451
452 if (IS_64BIT_PROCESS(current_proc())) {
453 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
454 return(-1ULL);
455 return(ulongword);
456 } else {
457 return((uint64_t)fuiword(addr));
458 }
459 }
460
461 int
462 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
463 {
464 return(ENOTSUP);
465 }
466
467 /*
468 * pid_for_task
469 *
470 * Find the BSD process ID for the Mach task associated with the given Mach port
471 * name
472 *
473 * Parameters: args User argument descriptor (see below)
474 *
475 * Indirect parameters: args->t Mach port name
476 * args->pid Process ID (returned value; see below)
477 *
478 * Returns: KERL_SUCCESS Success
479 * KERN_FAILURE Not success
480 *
481 * Implicit returns: args->pid Process ID
482 *
483 */
484 kern_return_t
485 pid_for_task(
486 struct pid_for_task_args *args)
487 {
488 mach_port_name_t t = args->t;
489 user_addr_t pid_addr = args->pid;
490 proc_t p;
491 task_t t1;
492 int pid = -1;
493 kern_return_t err = KERN_SUCCESS;
494
495 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
496 AUDIT_ARG(mach_port1, t);
497
498 t1 = port_name_to_task(t);
499
500 if (t1 == TASK_NULL) {
501 err = KERN_FAILURE;
502 goto pftout;
503 } else {
504 p = get_bsdtask_info(t1);
505 if (p) {
506 pid = proc_pid(p);
507 err = KERN_SUCCESS;
508 } else {
509 err = KERN_FAILURE;
510 }
511 }
512 task_deallocate(t1);
513 pftout:
514 AUDIT_ARG(pid, pid);
515 (void) copyout((char *) &pid, pid_addr, sizeof(int));
516 AUDIT_MACH_SYSCALL_EXIT(err);
517 return(err);
518 }
519
520 /*
521 *
522 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
523 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
524 *
525 */
526 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
527
528 /*
529 * Routine: task_for_pid_posix_check
530 * Purpose:
531 * Verify that the current process should be allowed to
532 * get the target process's task port. This is only
533 * permitted if:
534 * - The current process is root
535 * OR all of the following are true:
536 * - The target process's real, effective, and saved uids
537 * are the same as the current proc's euid,
538 * - The target process's group set is a subset of the
539 * calling process's group set, and
540 * - The target process hasn't switched credentials.
541 *
542 * Returns: TRUE: permitted
543 * FALSE: denied
544 */
545 static int
546 task_for_pid_posix_check(proc_t target)
547 {
548 kauth_cred_t targetcred, mycred;
549 uid_t myuid;
550 int allowed;
551
552 /* No task_for_pid on bad targets */
553 if (target->p_stat == SZOMB) {
554 return FALSE;
555 }
556
557 mycred = kauth_cred_get();
558 myuid = kauth_cred_getuid(mycred);
559
560 /* If we're running as root, the check passes */
561 if (kauth_cred_issuser(mycred))
562 return TRUE;
563
564 /* We're allowed to get our own task port */
565 if (target == current_proc())
566 return TRUE;
567
568 /*
569 * Under DENY, only root can get another proc's task port,
570 * so no more checks are needed.
571 */
572 if (tfp_policy == KERN_TFP_POLICY_DENY) {
573 return FALSE;
574 }
575
576 targetcred = kauth_cred_proc_ref(target);
577 allowed = TRUE;
578
579 /* Do target's ruid, euid, and saved uid match my euid? */
580 if ((kauth_cred_getuid(targetcred) != myuid) ||
581 (kauth_cred_getruid(targetcred) != myuid) ||
582 (kauth_cred_getsvuid(targetcred) != myuid)) {
583 allowed = FALSE;
584 goto out;
585 }
586
587 /* Are target's groups a subset of my groups? */
588 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
589 allowed == 0) {
590 allowed = FALSE;
591 goto out;
592 }
593
594 /* Has target switched credentials? */
595 if (target->p_flag & P_SUGID) {
596 allowed = FALSE;
597 goto out;
598 }
599
600 out:
601 kauth_cred_unref(&targetcred);
602 return allowed;
603 }
604
605 /*
606 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
607 *
608 * Description: Waits for the user space daemon to respond to the request
609 * we made. Function declared non inline to be visible in
610 * stackshots and spindumps as well as debugging.
611 */
612 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
613 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
614 {
615 return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
616 }
617
618 /*
619 * Routine: task_for_pid
620 * Purpose:
621 * Get the task port for another "process", named by its
622 * process ID on the same host as "target_task".
623 *
624 * Only permitted to privileged processes, or processes
625 * with the same user ID.
626 *
627 * Note: if pid == 0, an error is return no matter who is calling.
628 *
629 * XXX This should be a BSD system call, not a Mach trap!!!
630 */
631 kern_return_t
632 task_for_pid(
633 struct task_for_pid_args *args)
634 {
635 mach_port_name_t target_tport = args->target_tport;
636 int pid = args->pid;
637 user_addr_t task_addr = args->t;
638 proc_t p = PROC_NULL;
639 task_t t1 = TASK_NULL;
640 mach_port_name_t tret = MACH_PORT_NULL;
641 ipc_port_t tfpport;
642 void * sright;
643 int error = 0;
644
645 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
646 AUDIT_ARG(pid, pid);
647 AUDIT_ARG(mach_port1, target_tport);
648
649 /* Always check if pid == 0 */
650 if (pid == 0) {
651 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
652 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
653 return(KERN_FAILURE);
654 }
655
656 t1 = port_name_to_task(target_tport);
657 if (t1 == TASK_NULL) {
658 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
659 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
660 return(KERN_FAILURE);
661 }
662
663
664 p = proc_find(pid);
665 if (p == PROC_NULL) {
666 error = KERN_FAILURE;
667 goto tfpout;
668 }
669
670 #if CONFIG_AUDIT
671 AUDIT_ARG(process, p);
672 #endif
673
674 if (!(task_for_pid_posix_check(p))) {
675 error = KERN_FAILURE;
676 goto tfpout;
677 }
678
679 if (p->task != TASK_NULL) {
680 /* If we aren't root and target's task access port is set... */
681 if (!kauth_cred_issuser(kauth_cred_get()) &&
682 p != current_proc() &&
683 (task_get_task_access_port(p->task, &tfpport) == 0) &&
684 (tfpport != IPC_PORT_NULL)) {
685
686 if (tfpport == IPC_PORT_DEAD) {
687 error = KERN_PROTECTION_FAILURE;
688 goto tfpout;
689 }
690
691 /* Call up to the task access server */
692 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
693
694 if (error != MACH_MSG_SUCCESS) {
695 if (error == MACH_RCV_INTERRUPTED)
696 error = KERN_ABORTED;
697 else
698 error = KERN_FAILURE;
699 goto tfpout;
700 }
701 }
702 #if CONFIG_MACF
703 error = mac_proc_check_get_task(kauth_cred_get(), p);
704 if (error) {
705 error = KERN_FAILURE;
706 goto tfpout;
707 }
708 #endif
709
710 /* Grant task port access */
711 task_reference(p->task);
712 extmod_statistics_incr_task_for_pid(p->task);
713
714 sright = (void *) convert_task_to_port(p->task);
715 tret = ipc_port_copyout_send(
716 sright,
717 get_task_ipcspace(current_task()));
718 }
719 error = KERN_SUCCESS;
720
721 tfpout:
722 task_deallocate(t1);
723 AUDIT_ARG(mach_port2, tret);
724 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
725 if (p != PROC_NULL)
726 proc_rele(p);
727 AUDIT_MACH_SYSCALL_EXIT(error);
728 return(error);
729 }
730
731 /*
732 * Routine: task_name_for_pid
733 * Purpose:
734 * Get the task name port for another "process", named by its
735 * process ID on the same host as "target_task".
736 *
737 * Only permitted to privileged processes, or processes
738 * with the same user ID.
739 *
740 * XXX This should be a BSD system call, not a Mach trap!!!
741 */
742
743 kern_return_t
744 task_name_for_pid(
745 struct task_name_for_pid_args *args)
746 {
747 mach_port_name_t target_tport = args->target_tport;
748 int pid = args->pid;
749 user_addr_t task_addr = args->t;
750 proc_t p = PROC_NULL;
751 task_t t1;
752 mach_port_name_t tret;
753 void * sright;
754 int error = 0, refheld = 0;
755 kauth_cred_t target_cred;
756
757 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
758 AUDIT_ARG(pid, pid);
759 AUDIT_ARG(mach_port1, target_tport);
760
761 t1 = port_name_to_task(target_tport);
762 if (t1 == TASK_NULL) {
763 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
764 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
765 return(KERN_FAILURE);
766 }
767
768 p = proc_find(pid);
769 if (p != PROC_NULL) {
770 AUDIT_ARG(process, p);
771 target_cred = kauth_cred_proc_ref(p);
772 refheld = 1;
773
774 if ((p->p_stat != SZOMB)
775 && ((current_proc() == p)
776 || kauth_cred_issuser(kauth_cred_get())
777 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
778 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
779
780 if (p->task != TASK_NULL) {
781 task_reference(p->task);
782 #if CONFIG_MACF
783 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
784 if (error) {
785 task_deallocate(p->task);
786 goto noperm;
787 }
788 #endif
789 sright = (void *)convert_task_name_to_port(p->task);
790 tret = ipc_port_copyout_send(sright,
791 get_task_ipcspace(current_task()));
792 } else
793 tret = MACH_PORT_NULL;
794
795 AUDIT_ARG(mach_port2, tret);
796 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
797 task_deallocate(t1);
798 error = KERN_SUCCESS;
799 goto tnfpout;
800 }
801 }
802
803 #if CONFIG_MACF
804 noperm:
805 #endif
806 task_deallocate(t1);
807 tret = MACH_PORT_NULL;
808 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
809 error = KERN_FAILURE;
810 tnfpout:
811 if (refheld != 0)
812 kauth_cred_unref(&target_cred);
813 if (p != PROC_NULL)
814 proc_rele(p);
815 AUDIT_MACH_SYSCALL_EXIT(error);
816 return(error);
817 }
818
819 kern_return_t
820 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
821 {
822 task_t target = NULL;
823 proc_t targetproc = PROC_NULL;
824 int pid = args->pid;
825 int error = 0;
826
827 #if CONFIG_MACF
828 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
829 if (error) {
830 error = EPERM;
831 goto out;
832 }
833 #endif
834
835 if (pid == 0) {
836 error = EPERM;
837 goto out;
838 }
839
840 targetproc = proc_find(pid);
841 if (targetproc == PROC_NULL) {
842 error = ESRCH;
843 goto out;
844 }
845
846 if (!task_for_pid_posix_check(targetproc)) {
847 error = EPERM;
848 goto out;
849 }
850
851 target = targetproc->task;
852 if (target != TASK_NULL) {
853 mach_port_t tfpport;
854
855 /* If we aren't root and target's task access port is set... */
856 if (!kauth_cred_issuser(kauth_cred_get()) &&
857 targetproc != current_proc() &&
858 (task_get_task_access_port(target, &tfpport) == 0) &&
859 (tfpport != IPC_PORT_NULL)) {
860
861 if (tfpport == IPC_PORT_DEAD) {
862 error = EACCES;
863 goto out;
864 }
865
866 /* Call up to the task access server */
867 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
868
869 if (error != MACH_MSG_SUCCESS) {
870 if (error == MACH_RCV_INTERRUPTED)
871 error = EINTR;
872 else
873 error = EPERM;
874 goto out;
875 }
876 }
877 }
878
879 task_reference(target);
880 error = task_pidsuspend(target);
881 if (error) {
882 if (error == KERN_INVALID_ARGUMENT) {
883 error = EINVAL;
884 } else {
885 error = EPERM;
886 }
887 }
888 #if CONFIG_MEMORYSTATUS
889 else {
890 memorystatus_on_suspend(targetproc);
891 }
892 #endif
893
894 task_deallocate(target);
895
896 out:
897 if (targetproc != PROC_NULL)
898 proc_rele(targetproc);
899 *ret = error;
900 return error;
901 }
902
903 kern_return_t
904 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
905 {
906 task_t target = NULL;
907 proc_t targetproc = PROC_NULL;
908 int pid = args->pid;
909 int error = 0;
910
911 #if CONFIG_MACF
912 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
913 if (error) {
914 error = EPERM;
915 goto out;
916 }
917 #endif
918
919 if (pid == 0) {
920 error = EPERM;
921 goto out;
922 }
923
924 targetproc = proc_find(pid);
925 if (targetproc == PROC_NULL) {
926 error = ESRCH;
927 goto out;
928 }
929
930 if (!task_for_pid_posix_check(targetproc)) {
931 error = EPERM;
932 goto out;
933 }
934
935 target = targetproc->task;
936 if (target != TASK_NULL) {
937 mach_port_t tfpport;
938
939 /* If we aren't root and target's task access port is set... */
940 if (!kauth_cred_issuser(kauth_cred_get()) &&
941 targetproc != current_proc() &&
942 (task_get_task_access_port(target, &tfpport) == 0) &&
943 (tfpport != IPC_PORT_NULL)) {
944
945 if (tfpport == IPC_PORT_DEAD) {
946 error = EACCES;
947 goto out;
948 }
949
950 /* Call up to the task access server */
951 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
952
953 if (error != MACH_MSG_SUCCESS) {
954 if (error == MACH_RCV_INTERRUPTED)
955 error = EINTR;
956 else
957 error = EPERM;
958 goto out;
959 }
960 }
961 }
962
963
964 task_reference(target);
965
966 #if CONFIG_MEMORYSTATUS
967 memorystatus_on_resume(targetproc);
968 #endif
969
970 error = task_pidresume(target);
971 if (error) {
972 if (error == KERN_INVALID_ARGUMENT) {
973 error = EINVAL;
974 } else {
975 if (error == KERN_MEMORY_ERROR) {
976 psignal(targetproc, SIGKILL);
977 error = EIO;
978 } else
979 error = EPERM;
980 }
981 }
982
983 task_deallocate(target);
984
985 out:
986 if (targetproc != PROC_NULL)
987 proc_rele(targetproc);
988
989 *ret = error;
990 return error;
991 }
992
993
994 static int
995 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
996 __unused int arg2, struct sysctl_req *req)
997 {
998 int error = 0;
999 int new_value;
1000
1001 error = SYSCTL_OUT(req, arg1, sizeof(int));
1002 if (error || req->newptr == USER_ADDR_NULL)
1003 return(error);
1004
1005 if (!kauth_cred_issuser(kauth_cred_get()))
1006 return(EPERM);
1007
1008 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1009 goto out;
1010 }
1011 if ((new_value == KERN_TFP_POLICY_DENY)
1012 || (new_value == KERN_TFP_POLICY_DEFAULT))
1013 tfp_policy = new_value;
1014 else
1015 error = EINVAL;
1016 out:
1017 return(error);
1018
1019 }
1020
1021 #if defined(SECURE_KERNEL)
1022 static int kern_secure_kernel = 1;
1023 #else
1024 static int kern_secure_kernel = 0;
1025 #endif
1026
1027 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1028
1029 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1030 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1031 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1032
1033 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1034 &shared_region_trace_level, 0, "");
1035 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1036 &shared_region_version, 0, "");
1037 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1038 &shared_region_persistence, 0, "");
1039
1040 /*
1041 * shared_region_check_np:
1042 *
1043 * This system call is intended for dyld.
1044 *
1045 * dyld calls this when any process starts to see if the process's shared
1046 * region is already set up and ready to use.
1047 * This call returns the base address of the first mapping in the
1048 * process's shared region's first mapping.
1049 * dyld will then check what's mapped at that address.
1050 *
1051 * If the shared region is empty, dyld will then attempt to map the shared
1052 * cache file in the shared region via the shared_region_map_np() system call.
1053 *
1054 * If something's already mapped in the shared region, dyld will check if it
1055 * matches the shared cache it would like to use for that process.
1056 * If it matches, evrything's ready and the process can proceed and use the
1057 * shared region.
1058 * If it doesn't match, dyld will unmap the shared region and map the shared
1059 * cache into the process's address space via mmap().
1060 *
1061 * ERROR VALUES
1062 * EINVAL no shared region
1063 * ENOMEM shared region is empty
1064 * EFAULT bad address for "start_address"
1065 */
1066 int
1067 shared_region_check_np(
1068 __unused struct proc *p,
1069 struct shared_region_check_np_args *uap,
1070 __unused int *retvalp)
1071 {
1072 vm_shared_region_t shared_region;
1073 mach_vm_offset_t start_address = 0;
1074 int error;
1075 kern_return_t kr;
1076
1077 SHARED_REGION_TRACE_DEBUG(
1078 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1079 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1080 p->p_pid, p->p_comm,
1081 (uint64_t)uap->start_address));
1082
1083 /* retrieve the current tasks's shared region */
1084 shared_region = vm_shared_region_get(current_task());
1085 if (shared_region != NULL) {
1086 /* retrieve address of its first mapping... */
1087 kr = vm_shared_region_start_address(shared_region,
1088 &start_address);
1089 if (kr != KERN_SUCCESS) {
1090 error = ENOMEM;
1091 } else {
1092 /* ... and give it to the caller */
1093 error = copyout(&start_address,
1094 (user_addr_t) uap->start_address,
1095 sizeof (start_address));
1096 if (error) {
1097 SHARED_REGION_TRACE_ERROR(
1098 ("shared_region: %p [%d(%s)] "
1099 "check_np(0x%llx) "
1100 "copyout(0x%llx) error %d\n",
1101 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1102 p->p_pid, p->p_comm,
1103 (uint64_t)uap->start_address, (uint64_t)start_address,
1104 error));
1105 }
1106 }
1107 vm_shared_region_deallocate(shared_region);
1108 } else {
1109 /* no shared region ! */
1110 error = EINVAL;
1111 }
1112
1113 SHARED_REGION_TRACE_DEBUG(
1114 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1115 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1116 p->p_pid, p->p_comm,
1117 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1118
1119 return error;
1120 }
1121
1122
1123 int
1124 shared_region_copyin_mappings(
1125 struct proc *p,
1126 user_addr_t user_mappings,
1127 unsigned int mappings_count,
1128 struct shared_file_mapping_np *mappings)
1129 {
1130 int error = 0;
1131 vm_size_t mappings_size = 0;
1132
1133 /* get the list of mappings the caller wants us to establish */
1134 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1135 error = copyin(user_mappings,
1136 mappings,
1137 mappings_size);
1138 if (error) {
1139 SHARED_REGION_TRACE_ERROR(
1140 ("shared_region: %p [%d(%s)] map(): "
1141 "copyin(0x%llx, %d) failed (error=%d)\n",
1142 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1143 p->p_pid, p->p_comm,
1144 (uint64_t)user_mappings, mappings_count, error));
1145 }
1146 return error;
1147 }
1148 /*
1149 * shared_region_map_np()
1150 *
1151 * This system call is intended for dyld.
1152 *
1153 * dyld uses this to map a shared cache file into a shared region.
1154 * This is usually done only the first time a shared cache is needed.
1155 * Subsequent processes will just use the populated shared region without
1156 * requiring any further setup.
1157 */
1158 int
1159 _shared_region_map_and_slide(
1160 struct proc *p,
1161 int fd,
1162 uint32_t mappings_count,
1163 struct shared_file_mapping_np *mappings,
1164 uint32_t slide,
1165 user_addr_t slide_start,
1166 user_addr_t slide_size)
1167 {
1168 int error;
1169 kern_return_t kr;
1170 struct fileproc *fp;
1171 struct vnode *vp, *root_vp, *scdir_vp;
1172 struct vnode_attr va;
1173 off_t fs;
1174 memory_object_size_t file_size;
1175 #if CONFIG_MACF
1176 vm_prot_t maxprot = VM_PROT_ALL;
1177 #endif
1178 memory_object_control_t file_control;
1179 struct vm_shared_region *shared_region;
1180
1181 SHARED_REGION_TRACE_DEBUG(
1182 ("shared_region: %p [%d(%s)] -> map\n",
1183 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1184 p->p_pid, p->p_comm));
1185
1186 shared_region = NULL;
1187 fp = NULL;
1188 vp = NULL;
1189 scdir_vp = NULL;
1190
1191 /* get file structure from file descriptor */
1192 error = fp_lookup(p, fd, &fp, 0);
1193 if (error) {
1194 SHARED_REGION_TRACE_ERROR(
1195 ("shared_region: %p [%d(%s)] map: "
1196 "fd=%d lookup failed (error=%d)\n",
1197 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1198 p->p_pid, p->p_comm, fd, error));
1199 goto done;
1200 }
1201
1202 /* make sure we're attempting to map a vnode */
1203 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1204 SHARED_REGION_TRACE_ERROR(
1205 ("shared_region: %p [%d(%s)] map: "
1206 "fd=%d not a vnode (type=%d)\n",
1207 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1208 p->p_pid, p->p_comm,
1209 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1210 error = EINVAL;
1211 goto done;
1212 }
1213
1214 /* we need at least read permission on the file */
1215 if (! (fp->f_fglob->fg_flag & FREAD)) {
1216 SHARED_REGION_TRACE_ERROR(
1217 ("shared_region: %p [%d(%s)] map: "
1218 "fd=%d not readable\n",
1219 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1220 p->p_pid, p->p_comm, fd));
1221 error = EPERM;
1222 goto done;
1223 }
1224
1225 /* get vnode from file structure */
1226 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1227 if (error) {
1228 SHARED_REGION_TRACE_ERROR(
1229 ("shared_region: %p [%d(%s)] map: "
1230 "fd=%d getwithref failed (error=%d)\n",
1231 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1232 p->p_pid, p->p_comm, fd, error));
1233 goto done;
1234 }
1235 vp = (struct vnode *) fp->f_fglob->fg_data;
1236
1237 /* make sure the vnode is a regular file */
1238 if (vp->v_type != VREG) {
1239 SHARED_REGION_TRACE_ERROR(
1240 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1241 "not a file (type=%d)\n",
1242 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1243 p->p_pid, p->p_comm,
1244 (void *)VM_KERNEL_ADDRPERM(vp),
1245 vp->v_name, vp->v_type));
1246 error = EINVAL;
1247 goto done;
1248 }
1249
1250 #if CONFIG_MACF
1251 /* pass in 0 for the offset argument because AMFI does not need the offset
1252 of the shared cache */
1253 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1254 fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
1255 if (error) {
1256 goto done;
1257 }
1258 #endif /* MAC */
1259
1260 #if CONFIG_PROTECT
1261 /* check for content protection access */
1262 {
1263 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1264 if (error) {
1265 goto done;
1266 }
1267 }
1268 #endif /* CONFIG_PROTECT */
1269
1270 /* make sure vnode is on the process's root volume */
1271 root_vp = p->p_fd->fd_rdir;
1272 if (root_vp == NULL) {
1273 root_vp = rootvnode;
1274 } else {
1275 /*
1276 * Chroot-ed processes can't use the shared_region.
1277 */
1278 error = EINVAL;
1279 goto done;
1280 }
1281
1282 if (vp->v_mount != root_vp->v_mount) {
1283 SHARED_REGION_TRACE_ERROR(
1284 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1285 "not on process's root volume\n",
1286 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1287 p->p_pid, p->p_comm,
1288 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1289 error = EPERM;
1290 goto done;
1291 }
1292
1293 /* make sure vnode is owned by "root" */
1294 VATTR_INIT(&va);
1295 VATTR_WANTED(&va, va_uid);
1296 error = vnode_getattr(vp, &va, vfs_context_current());
1297 if (error) {
1298 SHARED_REGION_TRACE_ERROR(
1299 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1300 "vnode_getattr(%p) failed (error=%d)\n",
1301 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1302 p->p_pid, p->p_comm,
1303 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1304 (void *)VM_KERNEL_ADDRPERM(vp), error));
1305 goto done;
1306 }
1307 if (va.va_uid != 0) {
1308 SHARED_REGION_TRACE_ERROR(
1309 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1310 "owned by uid=%d instead of 0\n",
1311 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1312 p->p_pid, p->p_comm,
1313 (void *)VM_KERNEL_ADDRPERM(vp),
1314 vp->v_name, va.va_uid));
1315 error = EPERM;
1316 goto done;
1317 }
1318
1319 if (scdir_enforce) {
1320 /* get vnode for scdir_path */
1321 error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
1322 if (error) {
1323 SHARED_REGION_TRACE_ERROR(
1324 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1325 "vnode_lookup(%s) failed (error=%d)\n",
1326 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1327 p->p_pid, p->p_comm,
1328 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1329 scdir_path, error));
1330 goto done;
1331 }
1332
1333 /* ensure parent is scdir_vp */
1334 if (vnode_parent(vp) != scdir_vp) {
1335 SHARED_REGION_TRACE_ERROR(
1336 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1337 "shared cache file not in %s\n",
1338 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1339 p->p_pid, p->p_comm,
1340 (void *)VM_KERNEL_ADDRPERM(vp),
1341 vp->v_name, scdir_path));
1342 error = EPERM;
1343 goto done;
1344 }
1345 }
1346
1347 /* get vnode size */
1348 error = vnode_size(vp, &fs, vfs_context_current());
1349 if (error) {
1350 SHARED_REGION_TRACE_ERROR(
1351 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1352 "vnode_size(%p) failed (error=%d)\n",
1353 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1354 p->p_pid, p->p_comm,
1355 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1356 (void *)VM_KERNEL_ADDRPERM(vp), error));
1357 goto done;
1358 }
1359 file_size = fs;
1360
1361 /* get the file's memory object handle */
1362 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1363 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1364 SHARED_REGION_TRACE_ERROR(
1365 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1366 "no memory object\n",
1367 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1368 p->p_pid, p->p_comm,
1369 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1370 error = EINVAL;
1371 goto done;
1372 }
1373
1374
1375 /* get the process's shared region (setup in vm_map_exec()) */
1376 shared_region = vm_shared_region_get(current_task());
1377 if (shared_region == NULL) {
1378 SHARED_REGION_TRACE_ERROR(
1379 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1380 "no shared region\n",
1381 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1382 p->p_pid, p->p_comm,
1383 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1384 goto done;
1385 }
1386
1387 /* map the file into that shared region's submap */
1388 kr = vm_shared_region_map_file(shared_region,
1389 mappings_count,
1390 mappings,
1391 file_control,
1392 file_size,
1393 (void *) p->p_fd->fd_rdir,
1394 slide,
1395 slide_start,
1396 slide_size);
1397 if (kr != KERN_SUCCESS) {
1398 SHARED_REGION_TRACE_ERROR(
1399 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1400 "vm_shared_region_map_file() failed kr=0x%x\n",
1401 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1402 p->p_pid, p->p_comm,
1403 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
1404 switch (kr) {
1405 case KERN_INVALID_ADDRESS:
1406 error = EFAULT;
1407 break;
1408 case KERN_PROTECTION_FAILURE:
1409 error = EPERM;
1410 break;
1411 case KERN_NO_SPACE:
1412 error = ENOMEM;
1413 break;
1414 case KERN_FAILURE:
1415 case KERN_INVALID_ARGUMENT:
1416 default:
1417 error = EINVAL;
1418 break;
1419 }
1420 goto done;
1421 }
1422
1423 error = 0;
1424
1425 vnode_lock_spin(vp);
1426
1427 vp->v_flag |= VSHARED_DYLD;
1428
1429 vnode_unlock(vp);
1430
1431 /* update the vnode's access time */
1432 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1433 VATTR_INIT(&va);
1434 nanotime(&va.va_access_time);
1435 VATTR_SET_ACTIVE(&va, va_access_time);
1436 vnode_setattr(vp, &va, vfs_context_current());
1437 }
1438
1439 if (p->p_flag & P_NOSHLIB) {
1440 /* signal that this process is now using split libraries */
1441 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1442 }
1443
1444 done:
1445 if (vp != NULL) {
1446 /*
1447 * release the vnode...
1448 * ubc_map() still holds it for us in the non-error case
1449 */
1450 (void) vnode_put(vp);
1451 vp = NULL;
1452 }
1453 if (fp != NULL) {
1454 /* release the file descriptor */
1455 fp_drop(p, fd, fp, 0);
1456 fp = NULL;
1457 }
1458 if (scdir_vp != NULL) {
1459 (void)vnode_put(scdir_vp);
1460 scdir_vp = NULL;
1461 }
1462
1463 if (shared_region != NULL) {
1464 vm_shared_region_deallocate(shared_region);
1465 }
1466
1467 SHARED_REGION_TRACE_DEBUG(
1468 ("shared_region: %p [%d(%s)] <- map\n",
1469 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1470 p->p_pid, p->p_comm));
1471
1472 return error;
1473 }
1474
1475 int
1476 shared_region_map_and_slide_np(
1477 struct proc *p,
1478 struct shared_region_map_and_slide_np_args *uap,
1479 __unused int *retvalp)
1480 {
1481 struct shared_file_mapping_np *mappings;
1482 unsigned int mappings_count = uap->count;
1483 kern_return_t kr = KERN_SUCCESS;
1484 uint32_t slide = uap->slide;
1485
1486 #define SFM_MAX_STACK 8
1487 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1488
1489 /* Is the process chrooted?? */
1490 if (p->p_fd->fd_rdir != NULL) {
1491 kr = EINVAL;
1492 goto done;
1493 }
1494
1495 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1496 if (kr == KERN_INVALID_ARGUMENT) {
1497 /*
1498 * This will happen if we request sliding again
1499 * with the same slide value that was used earlier
1500 * for the very first sliding.
1501 */
1502 kr = KERN_SUCCESS;
1503 }
1504 goto done;
1505 }
1506
1507 if (mappings_count == 0) {
1508 SHARED_REGION_TRACE_INFO(
1509 ("shared_region: %p [%d(%s)] map(): "
1510 "no mappings\n",
1511 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1512 p->p_pid, p->p_comm));
1513 kr = 0; /* no mappings: we're done ! */
1514 goto done;
1515 } else if (mappings_count <= SFM_MAX_STACK) {
1516 mappings = &stack_mappings[0];
1517 } else {
1518 SHARED_REGION_TRACE_ERROR(
1519 ("shared_region: %p [%d(%s)] map(): "
1520 "too many mappings (%d)\n",
1521 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1522 p->p_pid, p->p_comm,
1523 mappings_count));
1524 kr = KERN_FAILURE;
1525 goto done;
1526 }
1527
1528 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1529 goto done;
1530 }
1531
1532
1533 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1534 slide,
1535 uap->slide_start, uap->slide_size);
1536 if (kr != KERN_SUCCESS) {
1537 return kr;
1538 }
1539
1540 done:
1541 return kr;
1542 }
1543
1544 /* sysctl overflow room */
1545
1546 SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
1547 (int *) &page_size, 0, "vm page size");
1548
1549 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1550 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1551 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1552 extern unsigned int vm_page_free_target;
1553 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1554 &vm_page_free_target, 0, "Pageout daemon free target");
1555
1556 extern unsigned int vm_memory_pressure;
1557 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1558 &vm_memory_pressure, 0, "Memory pressure indicator");
1559
1560 static int
1561 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1562 {
1563 #pragma unused(oidp, arg1, arg2)
1564 unsigned int page_free_wanted;
1565
1566 page_free_wanted = mach_vm_ctl_page_free_wanted();
1567 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1568 }
1569 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1570 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1571 0, 0, vm_ctl_page_free_wanted, "I", "");
1572
1573 extern unsigned int vm_page_purgeable_count;
1574 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1575 &vm_page_purgeable_count, 0, "Purgeable page count");
1576
1577 extern unsigned int vm_page_purgeable_wired_count;
1578 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1579 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1580
1581 extern unsigned int vm_pageout_purged_objects;
1582 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
1583 &vm_pageout_purged_objects, 0, "System purged object count");
1584
1585 extern int madvise_free_debug;
1586 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1587 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1588
1589 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1590 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1591 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1592 &vm_page_stats_reusable.reusable_pages_success, "");
1593 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1594 &vm_page_stats_reusable.reusable_pages_failure, "");
1595 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1596 &vm_page_stats_reusable.reusable_pages_shared, "");
1597 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1598 &vm_page_stats_reusable.all_reusable_calls, "");
1599 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1600 &vm_page_stats_reusable.partial_reusable_calls, "");
1601 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1602 &vm_page_stats_reusable.reuse_pages_success, "");
1603 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1604 &vm_page_stats_reusable.reuse_pages_failure, "");
1605 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1606 &vm_page_stats_reusable.all_reuse_calls, "");
1607 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1608 &vm_page_stats_reusable.partial_reuse_calls, "");
1609 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1610 &vm_page_stats_reusable.can_reuse_success, "");
1611 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1612 &vm_page_stats_reusable.can_reuse_failure, "");
1613 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1614 &vm_page_stats_reusable.reusable_reclaimed, "");
1615
1616
1617 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1618 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1619 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1620
1621 extern unsigned int vm_page_cleaned_count;
1622 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1623
1624 /* pageout counts */
1625 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1626 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1627 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1628 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1629 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1630 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1631 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1632 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1633 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1634
1635 extern unsigned int vm_pageout_freed_from_cleaned;
1636 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1637
1638 /* counts of pages entering the cleaned queue */
1639 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1640 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1641 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1642 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1643
1644 /* counts of pages leaving the cleaned queue */
1645 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1646 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1647 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1648 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1649 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1650 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1651 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1652 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1653 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1654
1655 /* counts of pages prefaulted when entering a memory object */
1656 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
1657 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
1658 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
1659
1660 #include <kern/thread.h>
1661 #include <sys/user.h>
1662
1663 void vm_pageout_io_throttle(void);
1664
1665 void vm_pageout_io_throttle(void) {
1666 struct uthread *uthread = get_bsdthread_info(current_thread());
1667
1668 /*
1669 * thread is marked as a low priority I/O type
1670 * and the I/O we issued while in this cleaning operation
1671 * collided with normal I/O operations... we'll
1672 * delay in order to mitigate the impact of this
1673 * task on the normal operation of the system
1674 */
1675
1676 if (uthread->uu_lowpri_window) {
1677 throttle_lowpri_io(1);
1678 }
1679
1680 }
1681
1682 int
1683 vm_pressure_monitor(
1684 __unused struct proc *p,
1685 struct vm_pressure_monitor_args *uap,
1686 int *retval)
1687 {
1688 kern_return_t kr;
1689 uint32_t pages_reclaimed;
1690 uint32_t pages_wanted;
1691
1692 kr = mach_vm_pressure_monitor(
1693 (boolean_t) uap->wait_for_pressure,
1694 uap->nsecs_monitored,
1695 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1696 &pages_wanted);
1697
1698 switch (kr) {
1699 case KERN_SUCCESS:
1700 break;
1701 case KERN_ABORTED:
1702 return EINTR;
1703 default:
1704 return EINVAL;
1705 }
1706
1707 if (uap->pages_reclaimed) {
1708 if (copyout((void *)&pages_reclaimed,
1709 uap->pages_reclaimed,
1710 sizeof (pages_reclaimed)) != 0) {
1711 return EFAULT;
1712 }
1713 }
1714
1715 *retval = (int) pages_wanted;
1716 return 0;
1717 }
1718
1719 int
1720 kas_info(struct proc *p,
1721 struct kas_info_args *uap,
1722 int *retval __unused)
1723 {
1724 #ifdef SECURE_KERNEL
1725 (void)p;
1726 (void)uap;
1727 return ENOTSUP;
1728 #else /* !SECURE_KERNEL */
1729 int selector = uap->selector;
1730 user_addr_t valuep = uap->value;
1731 user_addr_t sizep = uap->size;
1732 user_size_t size;
1733 int error;
1734
1735 if (!kauth_cred_issuser(kauth_cred_get())) {
1736 return EPERM;
1737 }
1738
1739 #if CONFIG_MACF
1740 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1741 if (error) {
1742 return error;
1743 }
1744 #endif
1745
1746 if (IS_64BIT_PROCESS(p)) {
1747 user64_size_t size64;
1748 error = copyin(sizep, &size64, sizeof(size64));
1749 size = (user_size_t)size64;
1750 } else {
1751 user32_size_t size32;
1752 error = copyin(sizep, &size32, sizeof(size32));
1753 size = (user_size_t)size32;
1754 }
1755 if (error) {
1756 return error;
1757 }
1758
1759 switch (selector) {
1760 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1761 {
1762 uint64_t slide = vm_kernel_slide;
1763
1764 if (sizeof(slide) != size) {
1765 return EINVAL;
1766 }
1767
1768 if (IS_64BIT_PROCESS(p)) {
1769 user64_size_t size64 = (user64_size_t)size;
1770 error = copyout(&size64, sizep, sizeof(size64));
1771 } else {
1772 user32_size_t size32 = (user32_size_t)size;
1773 error = copyout(&size32, sizep, sizeof(size32));
1774 }
1775 if (error) {
1776 return error;
1777 }
1778
1779 error = copyout(&slide, valuep, sizeof(slide));
1780 if (error) {
1781 return error;
1782 }
1783 }
1784 break;
1785 default:
1786 return EINVAL;
1787 }
1788
1789 return 0;
1790 #endif /* !SECURE_KERNEL */
1791 }