]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/dir.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
64 #include <sys/vm.h>
65 #include <sys/file.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
71 #include <sys/user.h>
72 #include <sys/syslog.h>
73 #include <sys/stat.h>
74 #include <sys/sysproto.h>
75 #include <sys/mman.h>
76 #include <sys/sysctl.h>
77
78 #include <security/audit/audit.h>
79 #include <bsm/audit_kevents.h>
80
81 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_pageout.h>
85
86 #include <machine/spl.h>
87
88 #include <mach/shared_region.h>
89 #include <vm/vm_shared_region.h>
90
91 #include <vm/vm_protos.h>
92
93 /*
94 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
95 */
96
97 #ifndef SECURE_KERNEL
98 extern int allow_stack_exec, allow_data_exec;
99
100 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
101 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
102 #endif /* !SECURE_KERNEL */
103
104 static const char *prot_values[] = {
105 "none",
106 "read-only",
107 "write-only",
108 "read-write",
109 "execute-only",
110 "read-execute",
111 "write-execute",
112 "read-write-execute"
113 };
114
115 void
116 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
117 {
118 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
119 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
120 }
121
122 int shared_region_unnest_logging = 1;
123
124 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW,
125 &shared_region_unnest_logging, 0, "");
126
127 int vm_shared_region_unnest_log_interval = 10;
128 int shared_region_unnest_log_count_threshold = 5;
129
130 /* These log rate throttling state variables aren't thread safe, but
131 * are sufficient unto the task.
132 */
133 static int64_t last_unnest_log_time = 0;
134 static int shared_region_unnest_log_count = 0;
135
136 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
137 struct timeval tv;
138 const char *pcommstr;
139
140 if (shared_region_unnest_logging == 0)
141 return;
142
143 if (shared_region_unnest_logging == 1) {
144 microtime(&tv);
145 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
146 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
147 return;
148 }
149 else {
150 last_unnest_log_time = tv.tv_sec;
151 shared_region_unnest_log_count = 0;
152 }
153 }
154
155 pcommstr = current_proc()->p_comm;
156
157 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
158 }
159
160 int
161 useracc(
162 user_addr_t addr,
163 user_size_t len,
164 int prot)
165 {
166 return (vm_map_check_protection(
167 current_map(),
168 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
169 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
170 }
171
172 int
173 vslock(
174 user_addr_t addr,
175 user_size_t len)
176 {
177 kern_return_t kret;
178 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
179 vm_map_round_page(addr+len),
180 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
181
182 switch (kret) {
183 case KERN_SUCCESS:
184 return (0);
185 case KERN_INVALID_ADDRESS:
186 case KERN_NO_SPACE:
187 return (ENOMEM);
188 case KERN_PROTECTION_FAILURE:
189 return (EACCES);
190 default:
191 return (EINVAL);
192 }
193 }
194
195 int
196 vsunlock(
197 user_addr_t addr,
198 user_size_t len,
199 __unused int dirtied)
200 {
201 #if FIXME /* [ */
202 pmap_t pmap;
203 vm_page_t pg;
204 vm_map_offset_t vaddr;
205 ppnum_t paddr;
206 #endif /* FIXME ] */
207 kern_return_t kret;
208
209 #if FIXME /* [ */
210 if (dirtied) {
211 pmap = get_task_pmap(current_task());
212 for (vaddr = vm_map_trunc_page(addr);
213 vaddr < vm_map_round_page(addr+len);
214 vaddr += PAGE_SIZE) {
215 paddr = pmap_extract(pmap, vaddr);
216 pg = PHYS_TO_VM_PAGE(paddr);
217 vm_page_set_modified(pg);
218 }
219 }
220 #endif /* FIXME ] */
221 #ifdef lint
222 dirtied++;
223 #endif /* lint */
224 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
225 vm_map_round_page(addr+len), FALSE);
226 switch (kret) {
227 case KERN_SUCCESS:
228 return (0);
229 case KERN_INVALID_ADDRESS:
230 case KERN_NO_SPACE:
231 return (ENOMEM);
232 case KERN_PROTECTION_FAILURE:
233 return (EACCES);
234 default:
235 return (EINVAL);
236 }
237 }
238
239 int
240 subyte(
241 user_addr_t addr,
242 int byte)
243 {
244 char character;
245
246 character = (char)byte;
247 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
248 }
249
250 int
251 suibyte(
252 user_addr_t addr,
253 int byte)
254 {
255 char character;
256
257 character = (char)byte;
258 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
259 }
260
261 int fubyte(user_addr_t addr)
262 {
263 unsigned char byte;
264
265 if (copyin(addr, (void *) &byte, sizeof(char)))
266 return(-1);
267 return(byte);
268 }
269
270 int fuibyte(user_addr_t addr)
271 {
272 unsigned char byte;
273
274 if (copyin(addr, (void *) &(byte), sizeof(char)))
275 return(-1);
276 return(byte);
277 }
278
279 int
280 suword(
281 user_addr_t addr,
282 long word)
283 {
284 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
285 }
286
287 long fuword(user_addr_t addr)
288 {
289 long word = 0;
290
291 if (copyin(addr, (void *) &word, sizeof(int)))
292 return(-1);
293 return(word);
294 }
295
296 /* suiword and fuiword are the same as suword and fuword, respectively */
297
298 int
299 suiword(
300 user_addr_t addr,
301 long word)
302 {
303 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
304 }
305
306 long fuiword(user_addr_t addr)
307 {
308 long word = 0;
309
310 if (copyin(addr, (void *) &word, sizeof(int)))
311 return(-1);
312 return(word);
313 }
314
315 /*
316 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
317 * fetching and setting of process-sized size_t and pointer values.
318 */
319 int
320 sulong(user_addr_t addr, int64_t word)
321 {
322
323 if (IS_64BIT_PROCESS(current_proc())) {
324 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
325 } else {
326 return(suiword(addr, (long)word));
327 }
328 }
329
330 int64_t
331 fulong(user_addr_t addr)
332 {
333 int64_t longword;
334
335 if (IS_64BIT_PROCESS(current_proc())) {
336 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
337 return(-1);
338 return(longword);
339 } else {
340 return((int64_t)fuiword(addr));
341 }
342 }
343
344 int
345 suulong(user_addr_t addr, uint64_t uword)
346 {
347
348 if (IS_64BIT_PROCESS(current_proc())) {
349 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
350 } else {
351 return(suiword(addr, (uint32_t)uword));
352 }
353 }
354
355 uint64_t
356 fuulong(user_addr_t addr)
357 {
358 uint64_t ulongword;
359
360 if (IS_64BIT_PROCESS(current_proc())) {
361 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
362 return(-1ULL);
363 return(ulongword);
364 } else {
365 return((uint64_t)fuiword(addr));
366 }
367 }
368
369 int
370 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
371 {
372 return(ENOTSUP);
373 }
374
375 /*
376 * pid_for_task
377 *
378 * Find the BSD process ID for the Mach task associated with the given Mach port
379 * name
380 *
381 * Parameters: args User argument descriptor (see below)
382 *
383 * Indirect parameters: args->t Mach port name
384 * args->pid Process ID (returned value; see below)
385 *
386 * Returns: KERL_SUCCESS Success
387 * KERN_FAILURE Not success
388 *
389 * Implicit returns: args->pid Process ID
390 *
391 */
392 kern_return_t
393 pid_for_task(
394 struct pid_for_task_args *args)
395 {
396 mach_port_name_t t = args->t;
397 user_addr_t pid_addr = args->pid;
398 proc_t p;
399 task_t t1;
400 int pid = -1;
401 kern_return_t err = KERN_SUCCESS;
402
403 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
404 AUDIT_ARG(mach_port1, t);
405
406 t1 = port_name_to_task(t);
407
408 if (t1 == TASK_NULL) {
409 err = KERN_FAILURE;
410 goto pftout;
411 } else {
412 p = get_bsdtask_info(t1);
413 if (p) {
414 pid = proc_pid(p);
415 err = KERN_SUCCESS;
416 } else {
417 err = KERN_FAILURE;
418 }
419 }
420 task_deallocate(t1);
421 pftout:
422 AUDIT_ARG(pid, pid);
423 (void) copyout((char *) &pid, pid_addr, sizeof(int));
424 AUDIT_MACH_SYSCALL_EXIT(err);
425 return(err);
426 }
427
428 /*
429 *
430 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
431 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
432 *
433 */
434 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
435
436 /*
437 * Routine: task_for_pid_posix_check
438 * Purpose:
439 * Verify that the current process should be allowed to
440 * get the target process's task port. This is only
441 * permitted if:
442 * - The current process is root
443 * OR all of the following are true:
444 * - The target process's real, effective, and saved uids
445 * are the same as the current proc's euid,
446 * - The target process's group set is a subset of the
447 * calling process's group set, and
448 * - The target process hasn't switched credentials.
449 *
450 * Returns: TRUE: permitted
451 * FALSE: denied
452 */
453 static int
454 task_for_pid_posix_check(proc_t target)
455 {
456 kauth_cred_t targetcred, mycred;
457 uid_t myuid;
458 int allowed;
459
460 /* No task_for_pid on bad targets */
461 if (target == PROC_NULL || target->p_stat == SZOMB) {
462 return FALSE;
463 }
464
465 mycred = kauth_cred_get();
466 myuid = kauth_cred_getuid(mycred);
467
468 /* If we're running as root, the check passes */
469 if (kauth_cred_issuser(mycred))
470 return TRUE;
471
472 /* We're allowed to get our own task port */
473 if (target == current_proc())
474 return TRUE;
475
476 /*
477 * Under DENY, only root can get another proc's task port,
478 * so no more checks are needed.
479 */
480 if (tfp_policy == KERN_TFP_POLICY_DENY) {
481 return FALSE;
482 }
483
484 targetcred = kauth_cred_proc_ref(target);
485 allowed = TRUE;
486
487 /* Do target's ruid, euid, and saved uid match my euid? */
488 if ((kauth_cred_getuid(targetcred) != myuid) ||
489 (targetcred->cr_ruid != myuid) ||
490 (targetcred->cr_svuid != myuid)) {
491 allowed = FALSE;
492 goto out;
493 }
494
495 /* Are target's groups a subset of my groups? */
496 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
497 allowed == 0) {
498 allowed = FALSE;
499 goto out;
500 }
501
502 /* Has target switched credentials? */
503 if (target->p_flag & P_SUGID) {
504 allowed = FALSE;
505 goto out;
506 }
507
508 out:
509 kauth_cred_unref(&targetcred);
510 return allowed;
511 }
512
513 /*
514 * Routine: task_for_pid
515 * Purpose:
516 * Get the task port for another "process", named by its
517 * process ID on the same host as "target_task".
518 *
519 * Only permitted to privileged processes, or processes
520 * with the same user ID.
521 *
522 * Note: if pid == 0, an error is return no matter who is calling.
523 *
524 * XXX This should be a BSD system call, not a Mach trap!!!
525 */
526 kern_return_t
527 task_for_pid(
528 struct task_for_pid_args *args)
529 {
530 mach_port_name_t target_tport = args->target_tport;
531 int pid = args->pid;
532 user_addr_t task_addr = args->t;
533 proc_t p = PROC_NULL;
534 task_t t1 = TASK_NULL;
535 mach_port_name_t tret = MACH_PORT_NULL;
536 ipc_port_t tfpport;
537 void * sright;
538 int error = 0;
539
540 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
541 AUDIT_ARG(pid, pid);
542 AUDIT_ARG(mach_port1, target_tport);
543
544 /* Always check if pid == 0 */
545 if (pid == 0) {
546 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
547 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
548 return(KERN_FAILURE);
549 }
550
551 t1 = port_name_to_task(target_tport);
552 if (t1 == TASK_NULL) {
553 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
554 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
555 return(KERN_FAILURE);
556 }
557
558
559 p = proc_find(pid);
560 #if CONFIG_AUDIT
561 if (p != PROC_NULL)
562 AUDIT_ARG(process, p);
563 #endif
564
565 if (!(task_for_pid_posix_check(p))) {
566 error = KERN_FAILURE;
567 goto tfpout;
568 }
569
570 if (p->task != TASK_NULL) {
571 /* If we aren't root and target's task access port is set... */
572 if (!kauth_cred_issuser(kauth_cred_get()) &&
573 p != current_proc() &&
574 (task_get_task_access_port(p->task, &tfpport) == 0) &&
575 (tfpport != IPC_PORT_NULL)) {
576
577 if (tfpport == IPC_PORT_DEAD) {
578 error = KERN_PROTECTION_FAILURE;
579 goto tfpout;
580 }
581
582 /* Call up to the task access server */
583 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
584
585 if (error != MACH_MSG_SUCCESS) {
586 if (error == MACH_RCV_INTERRUPTED)
587 error = KERN_ABORTED;
588 else
589 error = KERN_FAILURE;
590 goto tfpout;
591 }
592 }
593 #if CONFIG_MACF
594 error = mac_proc_check_get_task(kauth_cred_get(), p);
595 if (error) {
596 error = KERN_FAILURE;
597 goto tfpout;
598 }
599 #endif
600
601 /* Grant task port access */
602 task_reference(p->task);
603 sright = (void *) convert_task_to_port(p->task);
604 tret = ipc_port_copyout_send(
605 sright,
606 get_task_ipcspace(current_task()));
607 }
608 error = KERN_SUCCESS;
609
610 tfpout:
611 task_deallocate(t1);
612 AUDIT_ARG(mach_port2, tret);
613 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
614 if (p != PROC_NULL)
615 proc_rele(p);
616 AUDIT_MACH_SYSCALL_EXIT(error);
617 return(error);
618 }
619
620 /*
621 * Routine: task_name_for_pid
622 * Purpose:
623 * Get the task name port for another "process", named by its
624 * process ID on the same host as "target_task".
625 *
626 * Only permitted to privileged processes, or processes
627 * with the same user ID.
628 *
629 * XXX This should be a BSD system call, not a Mach trap!!!
630 */
631
632 kern_return_t
633 task_name_for_pid(
634 struct task_name_for_pid_args *args)
635 {
636 mach_port_name_t target_tport = args->target_tport;
637 int pid = args->pid;
638 user_addr_t task_addr = args->t;
639 proc_t p = PROC_NULL;
640 task_t t1;
641 mach_port_name_t tret;
642 void * sright;
643 int error = 0, refheld = 0;
644 kauth_cred_t target_cred;
645
646 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
647 AUDIT_ARG(pid, pid);
648 AUDIT_ARG(mach_port1, target_tport);
649
650 t1 = port_name_to_task(target_tport);
651 if (t1 == TASK_NULL) {
652 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
653 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
654 return(KERN_FAILURE);
655 }
656
657 p = proc_find(pid);
658 if (p != PROC_NULL) {
659 AUDIT_ARG(process, p);
660 target_cred = kauth_cred_proc_ref(p);
661 refheld = 1;
662
663 if ((p->p_stat != SZOMB)
664 && ((current_proc() == p)
665 || kauth_cred_issuser(kauth_cred_get())
666 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
667 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
668
669 if (p->task != TASK_NULL) {
670 task_reference(p->task);
671 #if CONFIG_MACF
672 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
673 if (error) {
674 task_deallocate(p->task);
675 goto noperm;
676 }
677 #endif
678 sright = (void *)convert_task_name_to_port(p->task);
679 tret = ipc_port_copyout_send(sright,
680 get_task_ipcspace(current_task()));
681 } else
682 tret = MACH_PORT_NULL;
683
684 AUDIT_ARG(mach_port2, tret);
685 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
686 task_deallocate(t1);
687 error = KERN_SUCCESS;
688 goto tnfpout;
689 }
690 }
691
692 #if CONFIG_MACF
693 noperm:
694 #endif
695 task_deallocate(t1);
696 tret = MACH_PORT_NULL;
697 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
698 error = KERN_FAILURE;
699 tnfpout:
700 if (refheld != 0)
701 kauth_cred_unref(&target_cred);
702 if (p != PROC_NULL)
703 proc_rele(p);
704 AUDIT_MACH_SYSCALL_EXIT(error);
705 return(error);
706 }
707
708 kern_return_t
709 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
710 {
711 task_t target = NULL;
712 proc_t targetproc = PROC_NULL;
713 int pid = args->pid;
714 int error = 0;
715
716 #if CONFIG_MACF
717 error = mac_proc_check_suspend_resume(p, 0); /* 0 for suspend */
718 if (error) {
719 error = KERN_FAILURE;
720 goto out;
721 }
722 #endif
723
724 if (pid == 0) {
725 error = KERN_FAILURE;
726 goto out;
727 }
728
729 targetproc = proc_find(pid);
730 if (!task_for_pid_posix_check(targetproc)) {
731 error = KERN_FAILURE;
732 goto out;
733 }
734
735 target = targetproc->task;
736 #ifndef CONFIG_EMBEDDED
737 if (target != TASK_NULL) {
738 mach_port_t tfpport;
739
740 /* If we aren't root and target's task access port is set... */
741 if (!kauth_cred_issuser(kauth_cred_get()) &&
742 targetproc != current_proc() &&
743 (task_get_task_access_port(target, &tfpport) == 0) &&
744 (tfpport != IPC_PORT_NULL)) {
745
746 if (tfpport == IPC_PORT_DEAD) {
747 error = KERN_PROTECTION_FAILURE;
748 goto out;
749 }
750
751 /* Call up to the task access server */
752 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
753
754 if (error != MACH_MSG_SUCCESS) {
755 if (error == MACH_RCV_INTERRUPTED)
756 error = KERN_ABORTED;
757 else
758 error = KERN_FAILURE;
759 goto out;
760 }
761 }
762 }
763 #endif
764
765 task_reference(target);
766 error = task_suspend(target);
767 task_deallocate(target);
768
769 out:
770 if (targetproc != PROC_NULL)
771 proc_rele(targetproc);
772 *ret = error;
773 return error;
774 }
775
776 kern_return_t
777 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
778 {
779 task_t target = NULL;
780 proc_t targetproc = PROC_NULL;
781 int pid = args->pid;
782 int error = 0;
783
784 #if CONFIG_MACF
785 error = mac_proc_check_suspend_resume(p, 1); /* 1 for resume */
786 if (error) {
787 error = KERN_FAILURE;
788 goto out;
789 }
790 #endif
791
792 if (pid == 0) {
793 error = KERN_FAILURE;
794 goto out;
795 }
796
797 targetproc = proc_find(pid);
798 if (!task_for_pid_posix_check(targetproc)) {
799 error = KERN_FAILURE;
800 goto out;
801 }
802
803 target = targetproc->task;
804 #ifndef CONFIG_EMBEDDED
805 if (target != TASK_NULL) {
806 mach_port_t tfpport;
807
808 /* If we aren't root and target's task access port is set... */
809 if (!kauth_cred_issuser(kauth_cred_get()) &&
810 targetproc != current_proc() &&
811 (task_get_task_access_port(target, &tfpport) == 0) &&
812 (tfpport != IPC_PORT_NULL)) {
813
814 if (tfpport == IPC_PORT_DEAD) {
815 error = KERN_PROTECTION_FAILURE;
816 goto out;
817 }
818
819 /* Call up to the task access server */
820 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
821
822 if (error != MACH_MSG_SUCCESS) {
823 if (error == MACH_RCV_INTERRUPTED)
824 error = KERN_ABORTED;
825 else
826 error = KERN_FAILURE;
827 goto out;
828 }
829 }
830 }
831 #endif
832
833 task_reference(target);
834 error = task_resume(target);
835 task_deallocate(target);
836
837 out:
838 if (targetproc != PROC_NULL)
839 proc_rele(targetproc);
840 *ret = error;
841 return error;
842
843 return 0;
844 }
845
846 static int
847 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
848 __unused int arg2, struct sysctl_req *req)
849 {
850 int error = 0;
851 int new_value;
852
853 error = SYSCTL_OUT(req, arg1, sizeof(int));
854 if (error || req->newptr == USER_ADDR_NULL)
855 return(error);
856
857 if (!is_suser())
858 return(EPERM);
859
860 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
861 goto out;
862 }
863 if ((new_value == KERN_TFP_POLICY_DENY)
864 || (new_value == KERN_TFP_POLICY_DEFAULT))
865 tfp_policy = new_value;
866 else
867 error = EINVAL;
868 out:
869 return(error);
870
871 }
872
873 #if defined(SECURE_KERNEL)
874 static int kern_secure_kernel = 1;
875 #else
876 static int kern_secure_kernel = 0;
877 #endif
878
879 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
880
881 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
882 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
883 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
884
885 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
886 &shared_region_trace_level, 0, "");
887 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
888 &shared_region_version, 0, "");
889 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
890 &shared_region_persistence, 0, "");
891
892 /*
893 * shared_region_check_np:
894 *
895 * This system call is intended for dyld.
896 *
897 * dyld calls this when any process starts to see if the process's shared
898 * region is already set up and ready to use.
899 * This call returns the base address of the first mapping in the
900 * process's shared region's first mapping.
901 * dyld will then check what's mapped at that address.
902 *
903 * If the shared region is empty, dyld will then attempt to map the shared
904 * cache file in the shared region via the shared_region_map_np() system call.
905 *
906 * If something's already mapped in the shared region, dyld will check if it
907 * matches the shared cache it would like to use for that process.
908 * If it matches, evrything's ready and the process can proceed and use the
909 * shared region.
910 * If it doesn't match, dyld will unmap the shared region and map the shared
911 * cache into the process's address space via mmap().
912 *
913 * ERROR VALUES
914 * EINVAL no shared region
915 * ENOMEM shared region is empty
916 * EFAULT bad address for "start_address"
917 */
918 int
919 shared_region_check_np(
920 __unused struct proc *p,
921 struct shared_region_check_np_args *uap,
922 __unused int *retvalp)
923 {
924 vm_shared_region_t shared_region;
925 mach_vm_offset_t start_address;
926 int error;
927 kern_return_t kr;
928
929 SHARED_REGION_TRACE_DEBUG(
930 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
931 current_thread(), p->p_pid, p->p_comm,
932 (uint64_t)uap->start_address));
933
934 /* retrieve the current tasks's shared region */
935 shared_region = vm_shared_region_get(current_task());
936 if (shared_region != NULL) {
937 /* retrieve address of its first mapping... */
938 kr = vm_shared_region_start_address(shared_region,
939 &start_address);
940 if (kr != KERN_SUCCESS) {
941 error = ENOMEM;
942 } else {
943 /* ... and give it to the caller */
944 error = copyout(&start_address,
945 (user_addr_t) uap->start_address,
946 sizeof (start_address));
947 if (error) {
948 SHARED_REGION_TRACE_ERROR(
949 ("shared_region: %p [%d(%s)] "
950 "check_np(0x%llx) "
951 "copyout(0x%llx) error %d\n",
952 current_thread(), p->p_pid, p->p_comm,
953 (uint64_t)uap->start_address, (uint64_t)start_address,
954 error));
955 }
956 }
957 vm_shared_region_deallocate(shared_region);
958 } else {
959 /* no shared region ! */
960 error = EINVAL;
961 }
962
963 SHARED_REGION_TRACE_DEBUG(
964 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
965 current_thread(), p->p_pid, p->p_comm,
966 (uint64_t)uap->start_address, (uint64_t)start_address, error));
967
968 return error;
969 }
970
971 /*
972 * shared_region_map_np()
973 *
974 * This system call is intended for dyld.
975 *
976 * dyld uses this to map a shared cache file into a shared region.
977 * This is usually done only the first time a shared cache is needed.
978 * Subsequent processes will just use the populated shared region without
979 * requiring any further setup.
980 */
981 int
982 shared_region_map_np(
983 struct proc *p,
984 struct shared_region_map_np_args *uap,
985 __unused int *retvalp)
986 {
987 int error;
988 kern_return_t kr;
989 int fd;
990 struct fileproc *fp;
991 struct vnode *vp, *root_vp;
992 struct vnode_attr va;
993 off_t fs;
994 memory_object_size_t file_size;
995 user_addr_t user_mappings;
996 struct shared_file_mapping_np *mappings;
997 #define SFM_MAX_STACK 8
998 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
999 unsigned int mappings_count;
1000 vm_size_t mappings_size;
1001 memory_object_control_t file_control;
1002 struct vm_shared_region *shared_region;
1003
1004 SHARED_REGION_TRACE_DEBUG(
1005 ("shared_region: %p [%d(%s)] -> map\n",
1006 current_thread(), p->p_pid, p->p_comm));
1007
1008 shared_region = NULL;
1009 mappings_count = 0;
1010 mappings_size = 0;
1011 mappings = NULL;
1012 fp = NULL;
1013 vp = NULL;
1014
1015 /* get file descriptor for shared region cache file */
1016 fd = uap->fd;
1017
1018 /* get file structure from file descriptor */
1019 error = fp_lookup(p, fd, &fp, 0);
1020 if (error) {
1021 SHARED_REGION_TRACE_ERROR(
1022 ("shared_region: %p [%d(%s)] map: "
1023 "fd=%d lookup failed (error=%d)\n",
1024 current_thread(), p->p_pid, p->p_comm, fd, error));
1025 goto done;
1026 }
1027
1028 /* make sure we're attempting to map a vnode */
1029 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
1030 SHARED_REGION_TRACE_ERROR(
1031 ("shared_region: %p [%d(%s)] map: "
1032 "fd=%d not a vnode (type=%d)\n",
1033 current_thread(), p->p_pid, p->p_comm,
1034 fd, fp->f_fglob->fg_type));
1035 error = EINVAL;
1036 goto done;
1037 }
1038
1039 /* we need at least read permission on the file */
1040 if (! (fp->f_fglob->fg_flag & FREAD)) {
1041 SHARED_REGION_TRACE_ERROR(
1042 ("shared_region: %p [%d(%s)] map: "
1043 "fd=%d not readable\n",
1044 current_thread(), p->p_pid, p->p_comm, fd));
1045 error = EPERM;
1046 goto done;
1047 }
1048
1049 /* get vnode from file structure */
1050 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1051 if (error) {
1052 SHARED_REGION_TRACE_ERROR(
1053 ("shared_region: %p [%d(%s)] map: "
1054 "fd=%d getwithref failed (error=%d)\n",
1055 current_thread(), p->p_pid, p->p_comm, fd, error));
1056 goto done;
1057 }
1058 vp = (struct vnode *) fp->f_fglob->fg_data;
1059
1060 /* make sure the vnode is a regular file */
1061 if (vp->v_type != VREG) {
1062 SHARED_REGION_TRACE_ERROR(
1063 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1064 "not a file (type=%d)\n",
1065 current_thread(), p->p_pid, p->p_comm,
1066 vp, vp->v_name, vp->v_type));
1067 error = EINVAL;
1068 goto done;
1069 }
1070
1071 /* make sure vnode is on the process's root volume */
1072 root_vp = p->p_fd->fd_rdir;
1073 if (root_vp == NULL) {
1074 root_vp = rootvnode;
1075 }
1076 if (vp->v_mount != root_vp->v_mount) {
1077 SHARED_REGION_TRACE_ERROR(
1078 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1079 "not on process's root volume\n",
1080 current_thread(), p->p_pid, p->p_comm,
1081 vp, vp->v_name));
1082 error = EPERM;
1083 goto done;
1084 }
1085
1086 /* make sure vnode is owned by "root" */
1087 VATTR_INIT(&va);
1088 VATTR_WANTED(&va, va_uid);
1089 error = vnode_getattr(vp, &va, vfs_context_current());
1090 if (error) {
1091 SHARED_REGION_TRACE_ERROR(
1092 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1093 "vnode_getattr(%p) failed (error=%d)\n",
1094 current_thread(), p->p_pid, p->p_comm,
1095 vp, vp->v_name, vp, error));
1096 goto done;
1097 }
1098 if (va.va_uid != 0) {
1099 SHARED_REGION_TRACE_ERROR(
1100 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1101 "owned by uid=%d instead of 0\n",
1102 current_thread(), p->p_pid, p->p_comm,
1103 vp, vp->v_name, va.va_uid));
1104 error = EPERM;
1105 goto done;
1106 }
1107
1108 /* get vnode size */
1109 error = vnode_size(vp, &fs, vfs_context_current());
1110 if (error) {
1111 SHARED_REGION_TRACE_ERROR(
1112 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1113 "vnode_size(%p) failed (error=%d)\n",
1114 current_thread(), p->p_pid, p->p_comm,
1115 vp, vp->v_name, vp, error));
1116 goto done;
1117 }
1118 file_size = fs;
1119
1120 /* get the file's memory object handle */
1121 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1122 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1123 SHARED_REGION_TRACE_ERROR(
1124 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1125 "no memory object\n",
1126 current_thread(), p->p_pid, p->p_comm,
1127 vp, vp->v_name));
1128 error = EINVAL;
1129 goto done;
1130 }
1131
1132 /* get the list of mappings the caller wants us to establish */
1133 mappings_count = uap->count; /* number of mappings */
1134 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1135 if (mappings_count == 0) {
1136 SHARED_REGION_TRACE_INFO(
1137 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1138 "no mappings\n",
1139 current_thread(), p->p_pid, p->p_comm,
1140 vp, vp->v_name));
1141 error = 0; /* no mappings: we're done ! */
1142 goto done;
1143 } else if (mappings_count <= SFM_MAX_STACK) {
1144 mappings = &stack_mappings[0];
1145 } else {
1146 SHARED_REGION_TRACE_ERROR(
1147 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1148 "too many mappings (%d)\n",
1149 current_thread(), p->p_pid, p->p_comm,
1150 vp, vp->v_name, mappings_count));
1151 error = EINVAL;
1152 goto done;
1153 }
1154
1155 user_mappings = uap->mappings; /* the mappings, in user space */
1156 error = copyin(user_mappings,
1157 mappings,
1158 mappings_size);
1159 if (error) {
1160 SHARED_REGION_TRACE_ERROR(
1161 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1162 "copyin(0x%llx, %d) failed (error=%d)\n",
1163 current_thread(), p->p_pid, p->p_comm,
1164 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
1165 goto done;
1166 }
1167
1168 /* get the process's shared region (setup in vm_map_exec()) */
1169 shared_region = vm_shared_region_get(current_task());
1170 if (shared_region == NULL) {
1171 SHARED_REGION_TRACE_ERROR(
1172 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1173 "no shared region\n",
1174 current_thread(), p->p_pid, p->p_comm,
1175 vp, vp->v_name));
1176 goto done;
1177 }
1178
1179 /* map the file into that shared region's submap */
1180 kr = vm_shared_region_map_file(shared_region,
1181 mappings_count,
1182 mappings,
1183 file_control,
1184 file_size,
1185 (void *) p->p_fd->fd_rdir);
1186 if (kr != KERN_SUCCESS) {
1187 SHARED_REGION_TRACE_ERROR(
1188 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1189 "vm_shared_region_map_file() failed kr=0x%x\n",
1190 current_thread(), p->p_pid, p->p_comm,
1191 vp, vp->v_name, kr));
1192 switch (kr) {
1193 case KERN_INVALID_ADDRESS:
1194 error = EFAULT;
1195 break;
1196 case KERN_PROTECTION_FAILURE:
1197 error = EPERM;
1198 break;
1199 case KERN_NO_SPACE:
1200 error = ENOMEM;
1201 break;
1202 case KERN_FAILURE:
1203 case KERN_INVALID_ARGUMENT:
1204 default:
1205 error = EINVAL;
1206 break;
1207 }
1208 goto done;
1209 }
1210
1211 error = 0;
1212
1213 /* update the vnode's access time */
1214 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1215 VATTR_INIT(&va);
1216 nanotime(&va.va_access_time);
1217 VATTR_SET_ACTIVE(&va, va_access_time);
1218 vnode_setattr(vp, &va, vfs_context_current());
1219 }
1220
1221 if (p->p_flag & P_NOSHLIB) {
1222 /* signal that this process is now using split libraries */
1223 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1224 }
1225
1226 done:
1227 if (vp != NULL) {
1228 /*
1229 * release the vnode...
1230 * ubc_map() still holds it for us in the non-error case
1231 */
1232 (void) vnode_put(vp);
1233 vp = NULL;
1234 }
1235 if (fp != NULL) {
1236 /* release the file descriptor */
1237 fp_drop(p, fd, fp, 0);
1238 fp = NULL;
1239 }
1240
1241 if (shared_region != NULL) {
1242 vm_shared_region_deallocate(shared_region);
1243 }
1244
1245 SHARED_REGION_TRACE_DEBUG(
1246 ("shared_region: %p [%d(%s)] <- map\n",
1247 current_thread(), p->p_pid, p->p_comm));
1248
1249 return error;
1250 }
1251
1252
1253 /* sysctl overflow room */
1254
1255 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1256 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1257 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1258 extern unsigned int vm_page_free_target;
1259 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1260 &vm_page_free_target, 0, "Pageout daemon free target");
1261
1262 extern unsigned int vm_memory_pressure;
1263 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD,
1264 &vm_memory_pressure, 0, "Memory pressure indicator");
1265
1266 static int
1267 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1268 {
1269 #pragma unused(oidp, arg1, arg2)
1270 unsigned int page_free_wanted;
1271
1272 page_free_wanted = mach_vm_ctl_page_free_wanted();
1273 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1274 }
1275 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1276 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1277 0, 0, vm_ctl_page_free_wanted, "I", "");
1278
1279 extern unsigned int vm_page_purgeable_count;
1280 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD,
1281 &vm_page_purgeable_count, 0, "Purgeable page count");
1282
1283 extern unsigned int vm_page_purgeable_wired_count;
1284 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD,
1285 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1286
1287 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD,
1288 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1289 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD,
1290 &vm_page_stats_reusable.reusable_pages_success, "");
1291 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD,
1292 &vm_page_stats_reusable.reusable_pages_failure, "");
1293 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD,
1294 &vm_page_stats_reusable.reusable_pages_shared, "");
1295 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD,
1296 &vm_page_stats_reusable.all_reusable_calls, "");
1297 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD,
1298 &vm_page_stats_reusable.partial_reusable_calls, "");
1299 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD,
1300 &vm_page_stats_reusable.reuse_pages_success, "");
1301 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD,
1302 &vm_page_stats_reusable.reuse_pages_failure, "");
1303 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD,
1304 &vm_page_stats_reusable.all_reuse_calls, "");
1305 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD,
1306 &vm_page_stats_reusable.partial_reuse_calls, "");
1307 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD,
1308 &vm_page_stats_reusable.can_reuse_success, "");
1309 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD,
1310 &vm_page_stats_reusable.can_reuse_failure, "");
1311
1312
1313 int
1314 vm_pressure_monitor(
1315 __unused struct proc *p,
1316 struct vm_pressure_monitor_args *uap,
1317 int *retval)
1318 {
1319 kern_return_t kr;
1320 uint32_t pages_reclaimed;
1321 uint32_t pages_wanted;
1322
1323 kr = mach_vm_pressure_monitor(
1324 (boolean_t) uap->wait_for_pressure,
1325 uap->nsecs_monitored,
1326 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1327 &pages_wanted);
1328
1329 switch (kr) {
1330 case KERN_SUCCESS:
1331 break;
1332 case KERN_ABORTED:
1333 return EINTR;
1334 default:
1335 return EINVAL;
1336 }
1337
1338 if (uap->pages_reclaimed) {
1339 if (copyout((void *)&pages_reclaimed,
1340 uap->pages_reclaimed,
1341 sizeof (pages_reclaimed)) != 0) {
1342 return EFAULT;
1343 }
1344 }
1345
1346 *retval = (int) pages_wanted;
1347 return 0;
1348 }