]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-2782.1.97.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <vm/vm_options.h>
44
45 #include <kern/task.h>
46 #include <kern/thread.h>
47 #include <kern/debug.h>
48 #include <kern/extmod_statistics.h>
49 #include <mach/mach_traps.h>
50 #include <mach/port.h>
51 #include <mach/task.h>
52 #include <mach/task_access.h>
53 #include <mach/task_special_ports.h>
54 #include <mach/time_value.h>
55 #include <mach/vm_map.h>
56 #include <mach/vm_param.h>
57 #include <mach/vm_prot.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84
85 #include <security/audit/audit.h>
86 #include <security/mac.h>
87 #include <bsm/audit_kevents.h>
88
89 #include <kern/kalloc.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_pageout.h>
93
94 #include <machine/spl.h>
95
96 #include <mach/shared_region.h>
97 #include <vm/vm_shared_region.h>
98
99 #include <vm/vm_protos.h>
100
101 #include <sys/kern_memorystatus.h>
102
103
104 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
105 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
106
107 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
108 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
109 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
110 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
111 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
112 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
113 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
114 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
115 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
116 #if VM_SCAN_FOR_SHADOW_CHAIN
117 static int vm_shadow_max_enabled = 0; /* Disabled by default */
118 extern int proc_shadow_max(void);
119 static int
120 vm_shadow_max SYSCTL_HANDLER_ARGS
121 {
122 #pragma unused(arg1, arg2, oidp)
123 int value = 0;
124
125 if (vm_shadow_max_enabled)
126 value = proc_shadow_max();
127
128 return SYSCTL_OUT(req, &value, sizeof(value));
129 }
130 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
131 0, 0, &vm_shadow_max, "I", "");
132
133 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
134
135 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
136
137 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
138
139 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
140 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
141 /*
142 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
143 */
144
145 #ifndef SECURE_KERNEL
146 extern int allow_stack_exec, allow_data_exec;
147
148 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
149 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
150 #endif /* !SECURE_KERNEL */
151
152 static const char *prot_values[] = {
153 "none",
154 "read-only",
155 "write-only",
156 "read-write",
157 "execute-only",
158 "read-execute",
159 "write-execute",
160 "read-write-execute"
161 };
162
163 void
164 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
165 {
166 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
167 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
168 }
169
170 int shared_region_unnest_logging = 1;
171
172 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
173 &shared_region_unnest_logging, 0, "");
174
175 int vm_shared_region_unnest_log_interval = 10;
176 int shared_region_unnest_log_count_threshold = 5;
177
178 /*
179 * Shared cache path enforcement.
180 */
181
182 static int scdir_enforce = 1;
183 static char scdir_path[] = "/var/db/dyld/";
184
185 #ifndef SECURE_KERNEL
186 SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, "");
187 #endif
188
189 /* These log rate throttling state variables aren't thread safe, but
190 * are sufficient unto the task.
191 */
192 static int64_t last_unnest_log_time = 0;
193 static int shared_region_unnest_log_count = 0;
194
195 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
196 struct timeval tv;
197 const char *pcommstr;
198
199 if (shared_region_unnest_logging == 0)
200 return;
201
202 if (shared_region_unnest_logging == 1) {
203 microtime(&tv);
204 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
205 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
206 return;
207 }
208 else {
209 last_unnest_log_time = tv.tv_sec;
210 shared_region_unnest_log_count = 0;
211 }
212 }
213
214 pcommstr = current_proc()->p_comm;
215
216 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
217 }
218
219 int
220 useracc(
221 user_addr_t addr,
222 user_size_t len,
223 int prot)
224 {
225 vm_map_t map;
226
227 map = current_map();
228 return (vm_map_check_protection(
229 map,
230 vm_map_trunc_page(addr,
231 vm_map_page_mask(map)),
232 vm_map_round_page(addr+len,
233 vm_map_page_mask(map)),
234 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
235 }
236
237 int
238 vslock(
239 user_addr_t addr,
240 user_size_t len)
241 {
242 kern_return_t kret;
243 vm_map_t map;
244
245 map = current_map();
246 kret = vm_map_wire(map,
247 vm_map_trunc_page(addr,
248 vm_map_page_mask(map)),
249 vm_map_round_page(addr+len,
250 vm_map_page_mask(map)),
251 VM_PROT_READ | VM_PROT_WRITE,
252 FALSE);
253
254 switch (kret) {
255 case KERN_SUCCESS:
256 return (0);
257 case KERN_INVALID_ADDRESS:
258 case KERN_NO_SPACE:
259 return (ENOMEM);
260 case KERN_PROTECTION_FAILURE:
261 return (EACCES);
262 default:
263 return (EINVAL);
264 }
265 }
266
267 int
268 vsunlock(
269 user_addr_t addr,
270 user_size_t len,
271 __unused int dirtied)
272 {
273 #if FIXME /* [ */
274 pmap_t pmap;
275 vm_page_t pg;
276 vm_map_offset_t vaddr;
277 ppnum_t paddr;
278 #endif /* FIXME ] */
279 kern_return_t kret;
280 vm_map_t map;
281
282 map = current_map();
283
284 #if FIXME /* [ */
285 if (dirtied) {
286 pmap = get_task_pmap(current_task());
287 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
288 vaddr < vm_map_round_page(addr+len, PAGE_MASK);
289 vaddr += PAGE_SIZE) {
290 paddr = pmap_extract(pmap, vaddr);
291 pg = PHYS_TO_VM_PAGE(paddr);
292 vm_page_set_modified(pg);
293 }
294 }
295 #endif /* FIXME ] */
296 #ifdef lint
297 dirtied++;
298 #endif /* lint */
299 kret = vm_map_unwire(map,
300 vm_map_trunc_page(addr,
301 vm_map_page_mask(map)),
302 vm_map_round_page(addr+len,
303 vm_map_page_mask(map)),
304 FALSE);
305 switch (kret) {
306 case KERN_SUCCESS:
307 return (0);
308 case KERN_INVALID_ADDRESS:
309 case KERN_NO_SPACE:
310 return (ENOMEM);
311 case KERN_PROTECTION_FAILURE:
312 return (EACCES);
313 default:
314 return (EINVAL);
315 }
316 }
317
318 int
319 subyte(
320 user_addr_t addr,
321 int byte)
322 {
323 char character;
324
325 character = (char)byte;
326 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
327 }
328
329 int
330 suibyte(
331 user_addr_t addr,
332 int byte)
333 {
334 char character;
335
336 character = (char)byte;
337 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
338 }
339
340 int fubyte(user_addr_t addr)
341 {
342 unsigned char byte;
343
344 if (copyin(addr, (void *) &byte, sizeof(char)))
345 return(-1);
346 return(byte);
347 }
348
349 int fuibyte(user_addr_t addr)
350 {
351 unsigned char byte;
352
353 if (copyin(addr, (void *) &(byte), sizeof(char)))
354 return(-1);
355 return(byte);
356 }
357
358 int
359 suword(
360 user_addr_t addr,
361 long word)
362 {
363 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
364 }
365
366 long fuword(user_addr_t addr)
367 {
368 long word = 0;
369
370 if (copyin(addr, (void *) &word, sizeof(int)))
371 return(-1);
372 return(word);
373 }
374
375 /* suiword and fuiword are the same as suword and fuword, respectively */
376
377 int
378 suiword(
379 user_addr_t addr,
380 long word)
381 {
382 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
383 }
384
385 long fuiword(user_addr_t addr)
386 {
387 long word = 0;
388
389 if (copyin(addr, (void *) &word, sizeof(int)))
390 return(-1);
391 return(word);
392 }
393
394 /*
395 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
396 * fetching and setting of process-sized size_t and pointer values.
397 */
398 int
399 sulong(user_addr_t addr, int64_t word)
400 {
401
402 if (IS_64BIT_PROCESS(current_proc())) {
403 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
404 } else {
405 return(suiword(addr, (long)word));
406 }
407 }
408
409 int64_t
410 fulong(user_addr_t addr)
411 {
412 int64_t longword;
413
414 if (IS_64BIT_PROCESS(current_proc())) {
415 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
416 return(-1);
417 return(longword);
418 } else {
419 return((int64_t)fuiword(addr));
420 }
421 }
422
423 int
424 suulong(user_addr_t addr, uint64_t uword)
425 {
426
427 if (IS_64BIT_PROCESS(current_proc())) {
428 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
429 } else {
430 return(suiword(addr, (uint32_t)uword));
431 }
432 }
433
434 uint64_t
435 fuulong(user_addr_t addr)
436 {
437 uint64_t ulongword;
438
439 if (IS_64BIT_PROCESS(current_proc())) {
440 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
441 return(-1ULL);
442 return(ulongword);
443 } else {
444 return((uint64_t)fuiword(addr));
445 }
446 }
447
448 int
449 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
450 {
451 return(ENOTSUP);
452 }
453
454 /*
455 * pid_for_task
456 *
457 * Find the BSD process ID for the Mach task associated with the given Mach port
458 * name
459 *
460 * Parameters: args User argument descriptor (see below)
461 *
462 * Indirect parameters: args->t Mach port name
463 * args->pid Process ID (returned value; see below)
464 *
465 * Returns: KERL_SUCCESS Success
466 * KERN_FAILURE Not success
467 *
468 * Implicit returns: args->pid Process ID
469 *
470 */
471 kern_return_t
472 pid_for_task(
473 struct pid_for_task_args *args)
474 {
475 mach_port_name_t t = args->t;
476 user_addr_t pid_addr = args->pid;
477 proc_t p;
478 task_t t1;
479 int pid = -1;
480 kern_return_t err = KERN_SUCCESS;
481
482 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
483 AUDIT_ARG(mach_port1, t);
484
485 t1 = port_name_to_task(t);
486
487 if (t1 == TASK_NULL) {
488 err = KERN_FAILURE;
489 goto pftout;
490 } else {
491 p = get_bsdtask_info(t1);
492 if (p) {
493 pid = proc_pid(p);
494 err = KERN_SUCCESS;
495 } else {
496 err = KERN_FAILURE;
497 }
498 }
499 task_deallocate(t1);
500 pftout:
501 AUDIT_ARG(pid, pid);
502 (void) copyout((char *) &pid, pid_addr, sizeof(int));
503 AUDIT_MACH_SYSCALL_EXIT(err);
504 return(err);
505 }
506
507 /*
508 *
509 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
510 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
511 *
512 */
513 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
514
515 /*
516 * Routine: task_for_pid_posix_check
517 * Purpose:
518 * Verify that the current process should be allowed to
519 * get the target process's task port. This is only
520 * permitted if:
521 * - The current process is root
522 * OR all of the following are true:
523 * - The target process's real, effective, and saved uids
524 * are the same as the current proc's euid,
525 * - The target process's group set is a subset of the
526 * calling process's group set, and
527 * - The target process hasn't switched credentials.
528 *
529 * Returns: TRUE: permitted
530 * FALSE: denied
531 */
532 static int
533 task_for_pid_posix_check(proc_t target)
534 {
535 kauth_cred_t targetcred, mycred;
536 uid_t myuid;
537 int allowed;
538
539 /* No task_for_pid on bad targets */
540 if (target->p_stat == SZOMB) {
541 return FALSE;
542 }
543
544 mycred = kauth_cred_get();
545 myuid = kauth_cred_getuid(mycred);
546
547 /* If we're running as root, the check passes */
548 if (kauth_cred_issuser(mycred))
549 return TRUE;
550
551 /* We're allowed to get our own task port */
552 if (target == current_proc())
553 return TRUE;
554
555 /*
556 * Under DENY, only root can get another proc's task port,
557 * so no more checks are needed.
558 */
559 if (tfp_policy == KERN_TFP_POLICY_DENY) {
560 return FALSE;
561 }
562
563 targetcred = kauth_cred_proc_ref(target);
564 allowed = TRUE;
565
566 /* Do target's ruid, euid, and saved uid match my euid? */
567 if ((kauth_cred_getuid(targetcred) != myuid) ||
568 (kauth_cred_getruid(targetcred) != myuid) ||
569 (kauth_cred_getsvuid(targetcred) != myuid)) {
570 allowed = FALSE;
571 goto out;
572 }
573
574 /* Are target's groups a subset of my groups? */
575 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
576 allowed == 0) {
577 allowed = FALSE;
578 goto out;
579 }
580
581 /* Has target switched credentials? */
582 if (target->p_flag & P_SUGID) {
583 allowed = FALSE;
584 goto out;
585 }
586
587 out:
588 kauth_cred_unref(&targetcred);
589 return allowed;
590 }
591
592 /*
593 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
594 *
595 * Description: Waits for the user space daemon to respond to the request
596 * we made. Function declared non inline to be visible in
597 * stackshots and spindumps as well as debugging.
598 */
599 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
600 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
601 {
602 return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
603 }
604
605 /*
606 * Routine: task_for_pid
607 * Purpose:
608 * Get the task port for another "process", named by its
609 * process ID on the same host as "target_task".
610 *
611 * Only permitted to privileged processes, or processes
612 * with the same user ID.
613 *
614 * Note: if pid == 0, an error is return no matter who is calling.
615 *
616 * XXX This should be a BSD system call, not a Mach trap!!!
617 */
618 kern_return_t
619 task_for_pid(
620 struct task_for_pid_args *args)
621 {
622 mach_port_name_t target_tport = args->target_tport;
623 int pid = args->pid;
624 user_addr_t task_addr = args->t;
625 proc_t p = PROC_NULL;
626 task_t t1 = TASK_NULL;
627 mach_port_name_t tret = MACH_PORT_NULL;
628 ipc_port_t tfpport;
629 void * sright;
630 int error = 0;
631
632 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
633 AUDIT_ARG(pid, pid);
634 AUDIT_ARG(mach_port1, target_tport);
635
636 /* Always check if pid == 0 */
637 if (pid == 0) {
638 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
639 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
640 return(KERN_FAILURE);
641 }
642
643 t1 = port_name_to_task(target_tport);
644 if (t1 == TASK_NULL) {
645 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
646 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
647 return(KERN_FAILURE);
648 }
649
650
651 p = proc_find(pid);
652 if (p == PROC_NULL) {
653 error = KERN_FAILURE;
654 goto tfpout;
655 }
656
657 #if CONFIG_AUDIT
658 AUDIT_ARG(process, p);
659 #endif
660
661 if (!(task_for_pid_posix_check(p))) {
662 error = KERN_FAILURE;
663 goto tfpout;
664 }
665
666 if (p->task != TASK_NULL) {
667 /* If we aren't root and target's task access port is set... */
668 if (!kauth_cred_issuser(kauth_cred_get()) &&
669 p != current_proc() &&
670 (task_get_task_access_port(p->task, &tfpport) == 0) &&
671 (tfpport != IPC_PORT_NULL)) {
672
673 if (tfpport == IPC_PORT_DEAD) {
674 error = KERN_PROTECTION_FAILURE;
675 goto tfpout;
676 }
677
678 /* Call up to the task access server */
679 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
680
681 if (error != MACH_MSG_SUCCESS) {
682 if (error == MACH_RCV_INTERRUPTED)
683 error = KERN_ABORTED;
684 else
685 error = KERN_FAILURE;
686 goto tfpout;
687 }
688 }
689 #if CONFIG_MACF
690 error = mac_proc_check_get_task(kauth_cred_get(), p);
691 if (error) {
692 error = KERN_FAILURE;
693 goto tfpout;
694 }
695 #endif
696
697 /* Grant task port access */
698 task_reference(p->task);
699 extmod_statistics_incr_task_for_pid(p->task);
700
701 sright = (void *) convert_task_to_port(p->task);
702 tret = ipc_port_copyout_send(
703 sright,
704 get_task_ipcspace(current_task()));
705 }
706 error = KERN_SUCCESS;
707
708 tfpout:
709 task_deallocate(t1);
710 AUDIT_ARG(mach_port2, tret);
711 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
712 if (p != PROC_NULL)
713 proc_rele(p);
714 AUDIT_MACH_SYSCALL_EXIT(error);
715 return(error);
716 }
717
718 /*
719 * Routine: task_name_for_pid
720 * Purpose:
721 * Get the task name port for another "process", named by its
722 * process ID on the same host as "target_task".
723 *
724 * Only permitted to privileged processes, or processes
725 * with the same user ID.
726 *
727 * XXX This should be a BSD system call, not a Mach trap!!!
728 */
729
730 kern_return_t
731 task_name_for_pid(
732 struct task_name_for_pid_args *args)
733 {
734 mach_port_name_t target_tport = args->target_tport;
735 int pid = args->pid;
736 user_addr_t task_addr = args->t;
737 proc_t p = PROC_NULL;
738 task_t t1;
739 mach_port_name_t tret;
740 void * sright;
741 int error = 0, refheld = 0;
742 kauth_cred_t target_cred;
743
744 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
745 AUDIT_ARG(pid, pid);
746 AUDIT_ARG(mach_port1, target_tport);
747
748 t1 = port_name_to_task(target_tport);
749 if (t1 == TASK_NULL) {
750 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
751 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
752 return(KERN_FAILURE);
753 }
754
755 p = proc_find(pid);
756 if (p != PROC_NULL) {
757 AUDIT_ARG(process, p);
758 target_cred = kauth_cred_proc_ref(p);
759 refheld = 1;
760
761 if ((p->p_stat != SZOMB)
762 && ((current_proc() == p)
763 || kauth_cred_issuser(kauth_cred_get())
764 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
765 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
766
767 if (p->task != TASK_NULL) {
768 task_reference(p->task);
769 #if CONFIG_MACF
770 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
771 if (error) {
772 task_deallocate(p->task);
773 goto noperm;
774 }
775 #endif
776 sright = (void *)convert_task_name_to_port(p->task);
777 tret = ipc_port_copyout_send(sright,
778 get_task_ipcspace(current_task()));
779 } else
780 tret = MACH_PORT_NULL;
781
782 AUDIT_ARG(mach_port2, tret);
783 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
784 task_deallocate(t1);
785 error = KERN_SUCCESS;
786 goto tnfpout;
787 }
788 }
789
790 #if CONFIG_MACF
791 noperm:
792 #endif
793 task_deallocate(t1);
794 tret = MACH_PORT_NULL;
795 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
796 error = KERN_FAILURE;
797 tnfpout:
798 if (refheld != 0)
799 kauth_cred_unref(&target_cred);
800 if (p != PROC_NULL)
801 proc_rele(p);
802 AUDIT_MACH_SYSCALL_EXIT(error);
803 return(error);
804 }
805
806 kern_return_t
807 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
808 {
809 task_t target = NULL;
810 proc_t targetproc = PROC_NULL;
811 int pid = args->pid;
812 int error = 0;
813
814 #if CONFIG_MACF
815 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
816 if (error) {
817 error = EPERM;
818 goto out;
819 }
820 #endif
821
822 if (pid == 0) {
823 error = EPERM;
824 goto out;
825 }
826
827 targetproc = proc_find(pid);
828 if (targetproc == PROC_NULL) {
829 error = ESRCH;
830 goto out;
831 }
832
833 if (!task_for_pid_posix_check(targetproc)) {
834 error = EPERM;
835 goto out;
836 }
837
838 target = targetproc->task;
839 if (target != TASK_NULL) {
840 mach_port_t tfpport;
841
842 /* If we aren't root and target's task access port is set... */
843 if (!kauth_cred_issuser(kauth_cred_get()) &&
844 targetproc != current_proc() &&
845 (task_get_task_access_port(target, &tfpport) == 0) &&
846 (tfpport != IPC_PORT_NULL)) {
847
848 if (tfpport == IPC_PORT_DEAD) {
849 error = EACCES;
850 goto out;
851 }
852
853 /* Call up to the task access server */
854 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
855
856 if (error != MACH_MSG_SUCCESS) {
857 if (error == MACH_RCV_INTERRUPTED)
858 error = EINTR;
859 else
860 error = EPERM;
861 goto out;
862 }
863 }
864 }
865
866 task_reference(target);
867 error = task_pidsuspend(target);
868 if (error) {
869 if (error == KERN_INVALID_ARGUMENT) {
870 error = EINVAL;
871 } else {
872 error = EPERM;
873 }
874 }
875 #if CONFIG_MEMORYSTATUS
876 else {
877 memorystatus_on_suspend(targetproc);
878 }
879 #endif
880
881 task_deallocate(target);
882
883 out:
884 if (targetproc != PROC_NULL)
885 proc_rele(targetproc);
886 *ret = error;
887 return error;
888 }
889
890 kern_return_t
891 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
892 {
893 task_t target = NULL;
894 proc_t targetproc = PROC_NULL;
895 int pid = args->pid;
896 int error = 0;
897
898 #if CONFIG_MACF
899 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
900 if (error) {
901 error = EPERM;
902 goto out;
903 }
904 #endif
905
906 if (pid == 0) {
907 error = EPERM;
908 goto out;
909 }
910
911 targetproc = proc_find(pid);
912 if (targetproc == PROC_NULL) {
913 error = ESRCH;
914 goto out;
915 }
916
917 if (!task_for_pid_posix_check(targetproc)) {
918 error = EPERM;
919 goto out;
920 }
921
922 target = targetproc->task;
923 if (target != TASK_NULL) {
924 mach_port_t tfpport;
925
926 /* If we aren't root and target's task access port is set... */
927 if (!kauth_cred_issuser(kauth_cred_get()) &&
928 targetproc != current_proc() &&
929 (task_get_task_access_port(target, &tfpport) == 0) &&
930 (tfpport != IPC_PORT_NULL)) {
931
932 if (tfpport == IPC_PORT_DEAD) {
933 error = EACCES;
934 goto out;
935 }
936
937 /* Call up to the task access server */
938 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
939
940 if (error != MACH_MSG_SUCCESS) {
941 if (error == MACH_RCV_INTERRUPTED)
942 error = EINTR;
943 else
944 error = EPERM;
945 goto out;
946 }
947 }
948 }
949
950 task_reference(target);
951
952 #if CONFIG_MEMORYSTATUS
953 memorystatus_on_resume(targetproc);
954 #endif
955
956 error = task_pidresume(target);
957 if (error) {
958 if (error == KERN_INVALID_ARGUMENT) {
959 error = EINVAL;
960 } else {
961 if (error == KERN_MEMORY_ERROR) {
962 psignal(targetproc, SIGKILL);
963 error = EIO;
964 } else
965 error = EPERM;
966 }
967 }
968
969 task_deallocate(target);
970
971 out:
972 if (targetproc != PROC_NULL)
973 proc_rele(targetproc);
974
975 *ret = error;
976 return error;
977 }
978
979
980 static int
981 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
982 __unused int arg2, struct sysctl_req *req)
983 {
984 int error = 0;
985 int new_value;
986
987 error = SYSCTL_OUT(req, arg1, sizeof(int));
988 if (error || req->newptr == USER_ADDR_NULL)
989 return(error);
990
991 if (!kauth_cred_issuser(kauth_cred_get()))
992 return(EPERM);
993
994 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
995 goto out;
996 }
997 if ((new_value == KERN_TFP_POLICY_DENY)
998 || (new_value == KERN_TFP_POLICY_DEFAULT))
999 tfp_policy = new_value;
1000 else
1001 error = EINVAL;
1002 out:
1003 return(error);
1004
1005 }
1006
1007 #if defined(SECURE_KERNEL)
1008 static int kern_secure_kernel = 1;
1009 #else
1010 static int kern_secure_kernel = 0;
1011 #endif
1012
1013 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1014
1015 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1016 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1017 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1018
1019 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1020 &shared_region_trace_level, 0, "");
1021 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1022 &shared_region_version, 0, "");
1023 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1024 &shared_region_persistence, 0, "");
1025
1026 /*
1027 * shared_region_check_np:
1028 *
1029 * This system call is intended for dyld.
1030 *
1031 * dyld calls this when any process starts to see if the process's shared
1032 * region is already set up and ready to use.
1033 * This call returns the base address of the first mapping in the
1034 * process's shared region's first mapping.
1035 * dyld will then check what's mapped at that address.
1036 *
1037 * If the shared region is empty, dyld will then attempt to map the shared
1038 * cache file in the shared region via the shared_region_map_np() system call.
1039 *
1040 * If something's already mapped in the shared region, dyld will check if it
1041 * matches the shared cache it would like to use for that process.
1042 * If it matches, evrything's ready and the process can proceed and use the
1043 * shared region.
1044 * If it doesn't match, dyld will unmap the shared region and map the shared
1045 * cache into the process's address space via mmap().
1046 *
1047 * ERROR VALUES
1048 * EINVAL no shared region
1049 * ENOMEM shared region is empty
1050 * EFAULT bad address for "start_address"
1051 */
1052 int
1053 shared_region_check_np(
1054 __unused struct proc *p,
1055 struct shared_region_check_np_args *uap,
1056 __unused int *retvalp)
1057 {
1058 vm_shared_region_t shared_region;
1059 mach_vm_offset_t start_address = 0;
1060 int error;
1061 kern_return_t kr;
1062
1063 SHARED_REGION_TRACE_DEBUG(
1064 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1065 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1066 p->p_pid, p->p_comm,
1067 (uint64_t)uap->start_address));
1068
1069 /* retrieve the current tasks's shared region */
1070 shared_region = vm_shared_region_get(current_task());
1071 if (shared_region != NULL) {
1072 /* retrieve address of its first mapping... */
1073 kr = vm_shared_region_start_address(shared_region,
1074 &start_address);
1075 if (kr != KERN_SUCCESS) {
1076 error = ENOMEM;
1077 } else {
1078 /* ... and give it to the caller */
1079 error = copyout(&start_address,
1080 (user_addr_t) uap->start_address,
1081 sizeof (start_address));
1082 if (error) {
1083 SHARED_REGION_TRACE_ERROR(
1084 ("shared_region: %p [%d(%s)] "
1085 "check_np(0x%llx) "
1086 "copyout(0x%llx) error %d\n",
1087 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1088 p->p_pid, p->p_comm,
1089 (uint64_t)uap->start_address, (uint64_t)start_address,
1090 error));
1091 }
1092 }
1093 vm_shared_region_deallocate(shared_region);
1094 } else {
1095 /* no shared region ! */
1096 error = EINVAL;
1097 }
1098
1099 SHARED_REGION_TRACE_DEBUG(
1100 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1101 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1102 p->p_pid, p->p_comm,
1103 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1104
1105 return error;
1106 }
1107
1108
1109 int
1110 shared_region_copyin_mappings(
1111 struct proc *p,
1112 user_addr_t user_mappings,
1113 unsigned int mappings_count,
1114 struct shared_file_mapping_np *mappings)
1115 {
1116 int error = 0;
1117 vm_size_t mappings_size = 0;
1118
1119 /* get the list of mappings the caller wants us to establish */
1120 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1121 error = copyin(user_mappings,
1122 mappings,
1123 mappings_size);
1124 if (error) {
1125 SHARED_REGION_TRACE_ERROR(
1126 ("shared_region: %p [%d(%s)] map(): "
1127 "copyin(0x%llx, %d) failed (error=%d)\n",
1128 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1129 p->p_pid, p->p_comm,
1130 (uint64_t)user_mappings, mappings_count, error));
1131 }
1132 return error;
1133 }
1134 /*
1135 * shared_region_map_np()
1136 *
1137 * This system call is intended for dyld.
1138 *
1139 * dyld uses this to map a shared cache file into a shared region.
1140 * This is usually done only the first time a shared cache is needed.
1141 * Subsequent processes will just use the populated shared region without
1142 * requiring any further setup.
1143 */
1144 int
1145 _shared_region_map_and_slide(
1146 struct proc *p,
1147 int fd,
1148 uint32_t mappings_count,
1149 struct shared_file_mapping_np *mappings,
1150 uint32_t slide,
1151 user_addr_t slide_start,
1152 user_addr_t slide_size)
1153 {
1154 int error;
1155 kern_return_t kr;
1156 struct fileproc *fp;
1157 struct vnode *vp, *root_vp, *scdir_vp;
1158 struct vnode_attr va;
1159 off_t fs;
1160 memory_object_size_t file_size;
1161 #if CONFIG_MACF
1162 vm_prot_t maxprot = VM_PROT_ALL;
1163 #endif
1164 memory_object_control_t file_control;
1165 struct vm_shared_region *shared_region;
1166
1167 SHARED_REGION_TRACE_DEBUG(
1168 ("shared_region: %p [%d(%s)] -> map\n",
1169 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1170 p->p_pid, p->p_comm));
1171
1172 shared_region = NULL;
1173 fp = NULL;
1174 vp = NULL;
1175 scdir_vp = NULL;
1176
1177 /* get file structure from file descriptor */
1178 error = fp_lookup(p, fd, &fp, 0);
1179 if (error) {
1180 SHARED_REGION_TRACE_ERROR(
1181 ("shared_region: %p [%d(%s)] map: "
1182 "fd=%d lookup failed (error=%d)\n",
1183 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1184 p->p_pid, p->p_comm, fd, error));
1185 goto done;
1186 }
1187
1188 /* make sure we're attempting to map a vnode */
1189 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1190 SHARED_REGION_TRACE_ERROR(
1191 ("shared_region: %p [%d(%s)] map: "
1192 "fd=%d not a vnode (type=%d)\n",
1193 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1194 p->p_pid, p->p_comm,
1195 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1196 error = EINVAL;
1197 goto done;
1198 }
1199
1200 /* we need at least read permission on the file */
1201 if (! (fp->f_fglob->fg_flag & FREAD)) {
1202 SHARED_REGION_TRACE_ERROR(
1203 ("shared_region: %p [%d(%s)] map: "
1204 "fd=%d not readable\n",
1205 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1206 p->p_pid, p->p_comm, fd));
1207 error = EPERM;
1208 goto done;
1209 }
1210
1211 /* get vnode from file structure */
1212 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1213 if (error) {
1214 SHARED_REGION_TRACE_ERROR(
1215 ("shared_region: %p [%d(%s)] map: "
1216 "fd=%d getwithref failed (error=%d)\n",
1217 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1218 p->p_pid, p->p_comm, fd, error));
1219 goto done;
1220 }
1221 vp = (struct vnode *) fp->f_fglob->fg_data;
1222
1223 /* make sure the vnode is a regular file */
1224 if (vp->v_type != VREG) {
1225 SHARED_REGION_TRACE_ERROR(
1226 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1227 "not a file (type=%d)\n",
1228 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1229 p->p_pid, p->p_comm,
1230 (void *)VM_KERNEL_ADDRPERM(vp),
1231 vp->v_name, vp->v_type));
1232 error = EINVAL;
1233 goto done;
1234 }
1235
1236 #if CONFIG_MACF
1237 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1238 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1239 if (error) {
1240 goto done;
1241 }
1242 #endif /* MAC */
1243
1244 #if CONFIG_PROTECT
1245 /* check for content protection access */
1246 {
1247 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1248 if (error) {
1249 goto done;
1250 }
1251 }
1252 #endif /* CONFIG_PROTECT */
1253
1254 /* make sure vnode is on the process's root volume */
1255 root_vp = p->p_fd->fd_rdir;
1256 if (root_vp == NULL) {
1257 root_vp = rootvnode;
1258 } else {
1259 /*
1260 * Chroot-ed processes can't use the shared_region.
1261 */
1262 error = EINVAL;
1263 goto done;
1264 }
1265
1266 if (vp->v_mount != root_vp->v_mount) {
1267 SHARED_REGION_TRACE_ERROR(
1268 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1269 "not on process's root volume\n",
1270 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1271 p->p_pid, p->p_comm,
1272 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1273 error = EPERM;
1274 goto done;
1275 }
1276
1277 /* make sure vnode is owned by "root" */
1278 VATTR_INIT(&va);
1279 VATTR_WANTED(&va, va_uid);
1280 error = vnode_getattr(vp, &va, vfs_context_current());
1281 if (error) {
1282 SHARED_REGION_TRACE_ERROR(
1283 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1284 "vnode_getattr(%p) failed (error=%d)\n",
1285 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1286 p->p_pid, p->p_comm,
1287 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1288 (void *)VM_KERNEL_ADDRPERM(vp), error));
1289 goto done;
1290 }
1291 if (va.va_uid != 0) {
1292 SHARED_REGION_TRACE_ERROR(
1293 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1294 "owned by uid=%d instead of 0\n",
1295 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1296 p->p_pid, p->p_comm,
1297 (void *)VM_KERNEL_ADDRPERM(vp),
1298 vp->v_name, va.va_uid));
1299 error = EPERM;
1300 goto done;
1301 }
1302
1303 if (scdir_enforce) {
1304 /* get vnode for scdir_path */
1305 error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
1306 if (error) {
1307 SHARED_REGION_TRACE_ERROR(
1308 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1309 "vnode_lookup(%s) failed (error=%d)\n",
1310 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1311 p->p_pid, p->p_comm,
1312 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1313 scdir_path, error));
1314 goto done;
1315 }
1316
1317 /* ensure parent is scdir_vp */
1318 if (vnode_parent(vp) != scdir_vp) {
1319 SHARED_REGION_TRACE_ERROR(
1320 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1321 "shared cache file not in %s\n",
1322 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1323 p->p_pid, p->p_comm,
1324 (void *)VM_KERNEL_ADDRPERM(vp),
1325 vp->v_name, scdir_path));
1326 error = EPERM;
1327 goto done;
1328 }
1329 }
1330
1331 /* get vnode size */
1332 error = vnode_size(vp, &fs, vfs_context_current());
1333 if (error) {
1334 SHARED_REGION_TRACE_ERROR(
1335 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1336 "vnode_size(%p) failed (error=%d)\n",
1337 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1338 p->p_pid, p->p_comm,
1339 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1340 (void *)VM_KERNEL_ADDRPERM(vp), error));
1341 goto done;
1342 }
1343 file_size = fs;
1344
1345 /* get the file's memory object handle */
1346 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1347 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1348 SHARED_REGION_TRACE_ERROR(
1349 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1350 "no memory object\n",
1351 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1352 p->p_pid, p->p_comm,
1353 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1354 error = EINVAL;
1355 goto done;
1356 }
1357
1358
1359 /* get the process's shared region (setup in vm_map_exec()) */
1360 shared_region = vm_shared_region_get(current_task());
1361 if (shared_region == NULL) {
1362 SHARED_REGION_TRACE_ERROR(
1363 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1364 "no shared region\n",
1365 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1366 p->p_pid, p->p_comm,
1367 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1368 goto done;
1369 }
1370
1371 /* map the file into that shared region's submap */
1372 kr = vm_shared_region_map_file(shared_region,
1373 mappings_count,
1374 mappings,
1375 file_control,
1376 file_size,
1377 (void *) p->p_fd->fd_rdir,
1378 slide,
1379 slide_start,
1380 slide_size);
1381 if (kr != KERN_SUCCESS) {
1382 SHARED_REGION_TRACE_ERROR(
1383 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1384 "vm_shared_region_map_file() failed kr=0x%x\n",
1385 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1386 p->p_pid, p->p_comm,
1387 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
1388 switch (kr) {
1389 case KERN_INVALID_ADDRESS:
1390 error = EFAULT;
1391 break;
1392 case KERN_PROTECTION_FAILURE:
1393 error = EPERM;
1394 break;
1395 case KERN_NO_SPACE:
1396 error = ENOMEM;
1397 break;
1398 case KERN_FAILURE:
1399 case KERN_INVALID_ARGUMENT:
1400 default:
1401 error = EINVAL;
1402 break;
1403 }
1404 goto done;
1405 }
1406
1407 error = 0;
1408
1409 vnode_lock_spin(vp);
1410
1411 vp->v_flag |= VSHARED_DYLD;
1412
1413 vnode_unlock(vp);
1414
1415 /* update the vnode's access time */
1416 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1417 VATTR_INIT(&va);
1418 nanotime(&va.va_access_time);
1419 VATTR_SET_ACTIVE(&va, va_access_time);
1420 vnode_setattr(vp, &va, vfs_context_current());
1421 }
1422
1423 if (p->p_flag & P_NOSHLIB) {
1424 /* signal that this process is now using split libraries */
1425 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1426 }
1427
1428 done:
1429 if (vp != NULL) {
1430 /*
1431 * release the vnode...
1432 * ubc_map() still holds it for us in the non-error case
1433 */
1434 (void) vnode_put(vp);
1435 vp = NULL;
1436 }
1437 if (fp != NULL) {
1438 /* release the file descriptor */
1439 fp_drop(p, fd, fp, 0);
1440 fp = NULL;
1441 }
1442 if (scdir_vp != NULL) {
1443 (void)vnode_put(scdir_vp);
1444 scdir_vp = NULL;
1445 }
1446
1447 if (shared_region != NULL) {
1448 vm_shared_region_deallocate(shared_region);
1449 }
1450
1451 SHARED_REGION_TRACE_DEBUG(
1452 ("shared_region: %p [%d(%s)] <- map\n",
1453 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1454 p->p_pid, p->p_comm));
1455
1456 return error;
1457 }
1458
1459 int
1460 shared_region_map_and_slide_np(
1461 struct proc *p,
1462 struct shared_region_map_and_slide_np_args *uap,
1463 __unused int *retvalp)
1464 {
1465 struct shared_file_mapping_np *mappings;
1466 unsigned int mappings_count = uap->count;
1467 kern_return_t kr = KERN_SUCCESS;
1468 uint32_t slide = uap->slide;
1469
1470 #define SFM_MAX_STACK 8
1471 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1472
1473 /* Is the process chrooted?? */
1474 if (p->p_fd->fd_rdir != NULL) {
1475 kr = EINVAL;
1476 goto done;
1477 }
1478
1479 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1480 if (kr == KERN_INVALID_ARGUMENT) {
1481 /*
1482 * This will happen if we request sliding again
1483 * with the same slide value that was used earlier
1484 * for the very first sliding.
1485 */
1486 kr = KERN_SUCCESS;
1487 }
1488 goto done;
1489 }
1490
1491 if (mappings_count == 0) {
1492 SHARED_REGION_TRACE_INFO(
1493 ("shared_region: %p [%d(%s)] map(): "
1494 "no mappings\n",
1495 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1496 p->p_pid, p->p_comm));
1497 kr = 0; /* no mappings: we're done ! */
1498 goto done;
1499 } else if (mappings_count <= SFM_MAX_STACK) {
1500 mappings = &stack_mappings[0];
1501 } else {
1502 SHARED_REGION_TRACE_ERROR(
1503 ("shared_region: %p [%d(%s)] map(): "
1504 "too many mappings (%d)\n",
1505 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1506 p->p_pid, p->p_comm,
1507 mappings_count));
1508 kr = KERN_FAILURE;
1509 goto done;
1510 }
1511
1512 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1513 goto done;
1514 }
1515
1516
1517 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1518 slide,
1519 uap->slide_start, uap->slide_size);
1520 if (kr != KERN_SUCCESS) {
1521 return kr;
1522 }
1523
1524 done:
1525 return kr;
1526 }
1527
1528 /* sysctl overflow room */
1529
1530 SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
1531 (int *) &page_size, 0, "vm page size");
1532
1533 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1534 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1535 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1536 extern unsigned int vm_page_free_target;
1537 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1538 &vm_page_free_target, 0, "Pageout daemon free target");
1539
1540 extern unsigned int vm_memory_pressure;
1541 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1542 &vm_memory_pressure, 0, "Memory pressure indicator");
1543
1544 static int
1545 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1546 {
1547 #pragma unused(oidp, arg1, arg2)
1548 unsigned int page_free_wanted;
1549
1550 page_free_wanted = mach_vm_ctl_page_free_wanted();
1551 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1552 }
1553 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1554 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1555 0, 0, vm_ctl_page_free_wanted, "I", "");
1556
1557 extern unsigned int vm_page_purgeable_count;
1558 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1559 &vm_page_purgeable_count, 0, "Purgeable page count");
1560
1561 extern unsigned int vm_page_purgeable_wired_count;
1562 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1563 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1564
1565 extern int madvise_free_debug;
1566 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1567 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1568
1569 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1570 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1571 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1572 &vm_page_stats_reusable.reusable_pages_success, "");
1573 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1574 &vm_page_stats_reusable.reusable_pages_failure, "");
1575 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1576 &vm_page_stats_reusable.reusable_pages_shared, "");
1577 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1578 &vm_page_stats_reusable.all_reusable_calls, "");
1579 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1580 &vm_page_stats_reusable.partial_reusable_calls, "");
1581 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1582 &vm_page_stats_reusable.reuse_pages_success, "");
1583 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1584 &vm_page_stats_reusable.reuse_pages_failure, "");
1585 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1586 &vm_page_stats_reusable.all_reuse_calls, "");
1587 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1588 &vm_page_stats_reusable.partial_reuse_calls, "");
1589 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1590 &vm_page_stats_reusable.can_reuse_success, "");
1591 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1592 &vm_page_stats_reusable.can_reuse_failure, "");
1593 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1594 &vm_page_stats_reusable.reusable_reclaimed, "");
1595
1596
1597 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1598 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1599 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1600
1601 extern unsigned int vm_page_cleaned_count;
1602 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1603
1604 /* pageout counts */
1605 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1606 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1607 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1608 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1609 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1610 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1611 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1612 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1613 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1614
1615 extern unsigned int vm_pageout_freed_from_cleaned;
1616 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1617
1618 /* counts of pages entering the cleaned queue */
1619 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1620 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1621 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1622 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1623
1624 /* counts of pages leaving the cleaned queue */
1625 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1626 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1627 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1628 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1629 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1630 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1631 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1632 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1633 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1634
1635 /* counts of pages prefaulted when entering a memory object */
1636 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
1637 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
1638 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
1639
1640 #include <kern/thread.h>
1641 #include <sys/user.h>
1642
1643 void vm_pageout_io_throttle(void);
1644
1645 void vm_pageout_io_throttle(void) {
1646 struct uthread *uthread = get_bsdthread_info(current_thread());
1647
1648 /*
1649 * thread is marked as a low priority I/O type
1650 * and the I/O we issued while in this cleaning operation
1651 * collided with normal I/O operations... we'll
1652 * delay in order to mitigate the impact of this
1653 * task on the normal operation of the system
1654 */
1655
1656 if (uthread->uu_lowpri_window) {
1657 throttle_lowpri_io(1);
1658 }
1659
1660 }
1661
1662 int
1663 vm_pressure_monitor(
1664 __unused struct proc *p,
1665 struct vm_pressure_monitor_args *uap,
1666 int *retval)
1667 {
1668 kern_return_t kr;
1669 uint32_t pages_reclaimed;
1670 uint32_t pages_wanted;
1671
1672 kr = mach_vm_pressure_monitor(
1673 (boolean_t) uap->wait_for_pressure,
1674 uap->nsecs_monitored,
1675 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1676 &pages_wanted);
1677
1678 switch (kr) {
1679 case KERN_SUCCESS:
1680 break;
1681 case KERN_ABORTED:
1682 return EINTR;
1683 default:
1684 return EINVAL;
1685 }
1686
1687 if (uap->pages_reclaimed) {
1688 if (copyout((void *)&pages_reclaimed,
1689 uap->pages_reclaimed,
1690 sizeof (pages_reclaimed)) != 0) {
1691 return EFAULT;
1692 }
1693 }
1694
1695 *retval = (int) pages_wanted;
1696 return 0;
1697 }
1698
1699 int
1700 kas_info(struct proc *p,
1701 struct kas_info_args *uap,
1702 int *retval __unused)
1703 {
1704 #ifdef SECURE_KERNEL
1705 (void)p;
1706 (void)uap;
1707 return ENOTSUP;
1708 #else /* !SECURE_KERNEL */
1709 int selector = uap->selector;
1710 user_addr_t valuep = uap->value;
1711 user_addr_t sizep = uap->size;
1712 user_size_t size;
1713 int error;
1714
1715 if (!kauth_cred_issuser(kauth_cred_get())) {
1716 return EPERM;
1717 }
1718
1719 #if CONFIG_MACF
1720 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1721 if (error) {
1722 return error;
1723 }
1724 #endif
1725
1726 if (IS_64BIT_PROCESS(p)) {
1727 user64_size_t size64;
1728 error = copyin(sizep, &size64, sizeof(size64));
1729 size = (user_size_t)size64;
1730 } else {
1731 user32_size_t size32;
1732 error = copyin(sizep, &size32, sizeof(size32));
1733 size = (user_size_t)size32;
1734 }
1735 if (error) {
1736 return error;
1737 }
1738
1739 switch (selector) {
1740 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1741 {
1742 uint64_t slide = vm_kernel_slide;
1743
1744 if (sizeof(slide) != size) {
1745 return EINVAL;
1746 }
1747
1748 if (IS_64BIT_PROCESS(p)) {
1749 user64_size_t size64 = (user64_size_t)size;
1750 error = copyout(&size64, sizep, sizeof(size64));
1751 } else {
1752 user32_size_t size32 = (user32_size_t)size;
1753 error = copyout(&size32, sizep, sizeof(size32));
1754 }
1755 if (error) {
1756 return error;
1757 }
1758
1759 error = copyout(&slide, valuep, sizeof(slide));
1760 if (error) {
1761 return error;
1762 }
1763 }
1764 break;
1765 default:
1766 return EINVAL;
1767 }
1768
1769 return 0;
1770 #endif /* !SECURE_KERNEL */
1771 }