]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <kern/extmod_statistics.h>
48 #include <mach/mach_traps.h>
49 #include <mach/port.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/trace.h>
70 #include <sys/kernel.h>
71 #include <sys/ubc_internal.h>
72 #include <sys/user.h>
73 #include <sys/syslog.h>
74 #include <sys/stat.h>
75 #include <sys/sysproto.h>
76 #include <sys/mman.h>
77 #include <sys/sysctl.h>
78 #include <sys/cprotect.h>
79 #include <sys/kpi_socket.h>
80 #include <sys/kas_info.h>
81
82 #include <security/audit/audit.h>
83 #include <security/mac.h>
84 #include <bsm/audit_kevents.h>
85
86 #include <kern/kalloc.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_pageout.h>
90
91 #include <machine/spl.h>
92
93 #include <mach/shared_region.h>
94 #include <vm/vm_shared_region.h>
95
96 #include <vm/vm_protos.h>
97
98 #include <sys/kern_memorystatus.h>
99
100
101 int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*);
102 int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t);
103 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
104
105 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
106
107
108 /*
109 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
110 */
111
112 #ifndef SECURE_KERNEL
113 extern int allow_stack_exec, allow_data_exec;
114
115 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
116 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
117 #endif /* !SECURE_KERNEL */
118
119 static const char *prot_values[] = {
120 "none",
121 "read-only",
122 "write-only",
123 "read-write",
124 "execute-only",
125 "read-execute",
126 "write-execute",
127 "read-write-execute"
128 };
129
130 void
131 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
132 {
133 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
134 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
135 }
136
137 int shared_region_unnest_logging = 1;
138
139 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
140 &shared_region_unnest_logging, 0, "");
141
142 int vm_shared_region_unnest_log_interval = 10;
143 int shared_region_unnest_log_count_threshold = 5;
144
145 /* These log rate throttling state variables aren't thread safe, but
146 * are sufficient unto the task.
147 */
148 static int64_t last_unnest_log_time = 0;
149 static int shared_region_unnest_log_count = 0;
150
151 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
152 struct timeval tv;
153 const char *pcommstr;
154
155 if (shared_region_unnest_logging == 0)
156 return;
157
158 if (shared_region_unnest_logging == 1) {
159 microtime(&tv);
160 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
161 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
162 return;
163 }
164 else {
165 last_unnest_log_time = tv.tv_sec;
166 shared_region_unnest_log_count = 0;
167 }
168 }
169
170 pcommstr = current_proc()->p_comm;
171
172 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
173 }
174
175 int
176 useracc(
177 user_addr_t addr,
178 user_size_t len,
179 int prot)
180 {
181 return (vm_map_check_protection(
182 current_map(),
183 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
184 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
185 }
186
187 int
188 vslock(
189 user_addr_t addr,
190 user_size_t len)
191 {
192 kern_return_t kret;
193 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
194 vm_map_round_page(addr+len),
195 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
196
197 switch (kret) {
198 case KERN_SUCCESS:
199 return (0);
200 case KERN_INVALID_ADDRESS:
201 case KERN_NO_SPACE:
202 return (ENOMEM);
203 case KERN_PROTECTION_FAILURE:
204 return (EACCES);
205 default:
206 return (EINVAL);
207 }
208 }
209
210 int
211 vsunlock(
212 user_addr_t addr,
213 user_size_t len,
214 __unused int dirtied)
215 {
216 #if FIXME /* [ */
217 pmap_t pmap;
218 vm_page_t pg;
219 vm_map_offset_t vaddr;
220 ppnum_t paddr;
221 #endif /* FIXME ] */
222 kern_return_t kret;
223
224 #if FIXME /* [ */
225 if (dirtied) {
226 pmap = get_task_pmap(current_task());
227 for (vaddr = vm_map_trunc_page(addr);
228 vaddr < vm_map_round_page(addr+len);
229 vaddr += PAGE_SIZE) {
230 paddr = pmap_extract(pmap, vaddr);
231 pg = PHYS_TO_VM_PAGE(paddr);
232 vm_page_set_modified(pg);
233 }
234 }
235 #endif /* FIXME ] */
236 #ifdef lint
237 dirtied++;
238 #endif /* lint */
239 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
240 vm_map_round_page(addr+len), FALSE);
241 switch (kret) {
242 case KERN_SUCCESS:
243 return (0);
244 case KERN_INVALID_ADDRESS:
245 case KERN_NO_SPACE:
246 return (ENOMEM);
247 case KERN_PROTECTION_FAILURE:
248 return (EACCES);
249 default:
250 return (EINVAL);
251 }
252 }
253
254 int
255 subyte(
256 user_addr_t addr,
257 int byte)
258 {
259 char character;
260
261 character = (char)byte;
262 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
263 }
264
265 int
266 suibyte(
267 user_addr_t addr,
268 int byte)
269 {
270 char character;
271
272 character = (char)byte;
273 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
274 }
275
276 int fubyte(user_addr_t addr)
277 {
278 unsigned char byte;
279
280 if (copyin(addr, (void *) &byte, sizeof(char)))
281 return(-1);
282 return(byte);
283 }
284
285 int fuibyte(user_addr_t addr)
286 {
287 unsigned char byte;
288
289 if (copyin(addr, (void *) &(byte), sizeof(char)))
290 return(-1);
291 return(byte);
292 }
293
294 int
295 suword(
296 user_addr_t addr,
297 long word)
298 {
299 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
300 }
301
302 long fuword(user_addr_t addr)
303 {
304 long word = 0;
305
306 if (copyin(addr, (void *) &word, sizeof(int)))
307 return(-1);
308 return(word);
309 }
310
311 /* suiword and fuiword are the same as suword and fuword, respectively */
312
313 int
314 suiword(
315 user_addr_t addr,
316 long word)
317 {
318 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
319 }
320
321 long fuiword(user_addr_t addr)
322 {
323 long word = 0;
324
325 if (copyin(addr, (void *) &word, sizeof(int)))
326 return(-1);
327 return(word);
328 }
329
330 /*
331 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
332 * fetching and setting of process-sized size_t and pointer values.
333 */
334 int
335 sulong(user_addr_t addr, int64_t word)
336 {
337
338 if (IS_64BIT_PROCESS(current_proc())) {
339 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
340 } else {
341 return(suiword(addr, (long)word));
342 }
343 }
344
345 int64_t
346 fulong(user_addr_t addr)
347 {
348 int64_t longword;
349
350 if (IS_64BIT_PROCESS(current_proc())) {
351 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
352 return(-1);
353 return(longword);
354 } else {
355 return((int64_t)fuiword(addr));
356 }
357 }
358
359 int
360 suulong(user_addr_t addr, uint64_t uword)
361 {
362
363 if (IS_64BIT_PROCESS(current_proc())) {
364 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
365 } else {
366 return(suiword(addr, (uint32_t)uword));
367 }
368 }
369
370 uint64_t
371 fuulong(user_addr_t addr)
372 {
373 uint64_t ulongword;
374
375 if (IS_64BIT_PROCESS(current_proc())) {
376 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
377 return(-1ULL);
378 return(ulongword);
379 } else {
380 return((uint64_t)fuiword(addr));
381 }
382 }
383
384 int
385 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
386 {
387 return(ENOTSUP);
388 }
389
390 /*
391 * pid_for_task
392 *
393 * Find the BSD process ID for the Mach task associated with the given Mach port
394 * name
395 *
396 * Parameters: args User argument descriptor (see below)
397 *
398 * Indirect parameters: args->t Mach port name
399 * args->pid Process ID (returned value; see below)
400 *
401 * Returns: KERL_SUCCESS Success
402 * KERN_FAILURE Not success
403 *
404 * Implicit returns: args->pid Process ID
405 *
406 */
407 kern_return_t
408 pid_for_task(
409 struct pid_for_task_args *args)
410 {
411 mach_port_name_t t = args->t;
412 user_addr_t pid_addr = args->pid;
413 proc_t p;
414 task_t t1;
415 int pid = -1;
416 kern_return_t err = KERN_SUCCESS;
417
418 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
419 AUDIT_ARG(mach_port1, t);
420
421 t1 = port_name_to_task(t);
422
423 if (t1 == TASK_NULL) {
424 err = KERN_FAILURE;
425 goto pftout;
426 } else {
427 p = get_bsdtask_info(t1);
428 if (p) {
429 pid = proc_pid(p);
430 err = KERN_SUCCESS;
431 } else {
432 err = KERN_FAILURE;
433 }
434 }
435 task_deallocate(t1);
436 pftout:
437 AUDIT_ARG(pid, pid);
438 (void) copyout((char *) &pid, pid_addr, sizeof(int));
439 AUDIT_MACH_SYSCALL_EXIT(err);
440 return(err);
441 }
442
443 /*
444 *
445 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
446 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
447 *
448 */
449 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
450
451 /*
452 * Routine: task_for_pid_posix_check
453 * Purpose:
454 * Verify that the current process should be allowed to
455 * get the target process's task port. This is only
456 * permitted if:
457 * - The current process is root
458 * OR all of the following are true:
459 * - The target process's real, effective, and saved uids
460 * are the same as the current proc's euid,
461 * - The target process's group set is a subset of the
462 * calling process's group set, and
463 * - The target process hasn't switched credentials.
464 *
465 * Returns: TRUE: permitted
466 * FALSE: denied
467 */
468 static int
469 task_for_pid_posix_check(proc_t target)
470 {
471 kauth_cred_t targetcred, mycred;
472 uid_t myuid;
473 int allowed;
474
475 /* No task_for_pid on bad targets */
476 if (target->p_stat == SZOMB) {
477 return FALSE;
478 }
479
480 mycred = kauth_cred_get();
481 myuid = kauth_cred_getuid(mycred);
482
483 /* If we're running as root, the check passes */
484 if (kauth_cred_issuser(mycred))
485 return TRUE;
486
487 /* We're allowed to get our own task port */
488 if (target == current_proc())
489 return TRUE;
490
491 /*
492 * Under DENY, only root can get another proc's task port,
493 * so no more checks are needed.
494 */
495 if (tfp_policy == KERN_TFP_POLICY_DENY) {
496 return FALSE;
497 }
498
499 targetcred = kauth_cred_proc_ref(target);
500 allowed = TRUE;
501
502 /* Do target's ruid, euid, and saved uid match my euid? */
503 if ((kauth_cred_getuid(targetcred) != myuid) ||
504 (kauth_cred_getruid(targetcred) != myuid) ||
505 (kauth_cred_getsvuid(targetcred) != myuid)) {
506 allowed = FALSE;
507 goto out;
508 }
509
510 /* Are target's groups a subset of my groups? */
511 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
512 allowed == 0) {
513 allowed = FALSE;
514 goto out;
515 }
516
517 /* Has target switched credentials? */
518 if (target->p_flag & P_SUGID) {
519 allowed = FALSE;
520 goto out;
521 }
522
523 out:
524 kauth_cred_unref(&targetcred);
525 return allowed;
526 }
527
528 /*
529 * Routine: task_for_pid
530 * Purpose:
531 * Get the task port for another "process", named by its
532 * process ID on the same host as "target_task".
533 *
534 * Only permitted to privileged processes, or processes
535 * with the same user ID.
536 *
537 * Note: if pid == 0, an error is return no matter who is calling.
538 *
539 * XXX This should be a BSD system call, not a Mach trap!!!
540 */
541 kern_return_t
542 task_for_pid(
543 struct task_for_pid_args *args)
544 {
545 mach_port_name_t target_tport = args->target_tport;
546 int pid = args->pid;
547 user_addr_t task_addr = args->t;
548 proc_t p = PROC_NULL;
549 task_t t1 = TASK_NULL;
550 mach_port_name_t tret = MACH_PORT_NULL;
551 ipc_port_t tfpport;
552 void * sright;
553 int error = 0;
554
555 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
556 AUDIT_ARG(pid, pid);
557 AUDIT_ARG(mach_port1, target_tport);
558
559 /* Always check if pid == 0 */
560 if (pid == 0) {
561 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
562 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
563 return(KERN_FAILURE);
564 }
565
566 t1 = port_name_to_task(target_tport);
567 if (t1 == TASK_NULL) {
568 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
569 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
570 return(KERN_FAILURE);
571 }
572
573
574 p = proc_find(pid);
575 if (p == PROC_NULL) {
576 error = KERN_FAILURE;
577 goto tfpout;
578 }
579
580 #if CONFIG_AUDIT
581 AUDIT_ARG(process, p);
582 #endif
583
584 if (!(task_for_pid_posix_check(p))) {
585 error = KERN_FAILURE;
586 goto tfpout;
587 }
588
589 if (p->task != TASK_NULL) {
590 /* If we aren't root and target's task access port is set... */
591 if (!kauth_cred_issuser(kauth_cred_get()) &&
592 p != current_proc() &&
593 (task_get_task_access_port(p->task, &tfpport) == 0) &&
594 (tfpport != IPC_PORT_NULL)) {
595
596 if (tfpport == IPC_PORT_DEAD) {
597 error = KERN_PROTECTION_FAILURE;
598 goto tfpout;
599 }
600
601 /* Call up to the task access server */
602 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
603
604 if (error != MACH_MSG_SUCCESS) {
605 if (error == MACH_RCV_INTERRUPTED)
606 error = KERN_ABORTED;
607 else
608 error = KERN_FAILURE;
609 goto tfpout;
610 }
611 }
612 #if CONFIG_MACF
613 error = mac_proc_check_get_task(kauth_cred_get(), p);
614 if (error) {
615 error = KERN_FAILURE;
616 goto tfpout;
617 }
618 #endif
619
620 /* Grant task port access */
621 task_reference(p->task);
622 extmod_statistics_incr_task_for_pid(p->task);
623
624 sright = (void *) convert_task_to_port(p->task);
625 tret = ipc_port_copyout_send(
626 sright,
627 get_task_ipcspace(current_task()));
628 }
629 error = KERN_SUCCESS;
630
631 tfpout:
632 task_deallocate(t1);
633 AUDIT_ARG(mach_port2, tret);
634 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
635 if (p != PROC_NULL)
636 proc_rele(p);
637 AUDIT_MACH_SYSCALL_EXIT(error);
638 return(error);
639 }
640
641 /*
642 * Routine: task_name_for_pid
643 * Purpose:
644 * Get the task name port for another "process", named by its
645 * process ID on the same host as "target_task".
646 *
647 * Only permitted to privileged processes, or processes
648 * with the same user ID.
649 *
650 * XXX This should be a BSD system call, not a Mach trap!!!
651 */
652
653 kern_return_t
654 task_name_for_pid(
655 struct task_name_for_pid_args *args)
656 {
657 mach_port_name_t target_tport = args->target_tport;
658 int pid = args->pid;
659 user_addr_t task_addr = args->t;
660 proc_t p = PROC_NULL;
661 task_t t1;
662 mach_port_name_t tret;
663 void * sright;
664 int error = 0, refheld = 0;
665 kauth_cred_t target_cred;
666
667 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
668 AUDIT_ARG(pid, pid);
669 AUDIT_ARG(mach_port1, target_tport);
670
671 t1 = port_name_to_task(target_tport);
672 if (t1 == TASK_NULL) {
673 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
674 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
675 return(KERN_FAILURE);
676 }
677
678 p = proc_find(pid);
679 if (p != PROC_NULL) {
680 AUDIT_ARG(process, p);
681 target_cred = kauth_cred_proc_ref(p);
682 refheld = 1;
683
684 if ((p->p_stat != SZOMB)
685 && ((current_proc() == p)
686 || kauth_cred_issuser(kauth_cred_get())
687 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
688 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
689
690 if (p->task != TASK_NULL) {
691 task_reference(p->task);
692 #if CONFIG_MACF
693 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
694 if (error) {
695 task_deallocate(p->task);
696 goto noperm;
697 }
698 #endif
699 sright = (void *)convert_task_name_to_port(p->task);
700 tret = ipc_port_copyout_send(sright,
701 get_task_ipcspace(current_task()));
702 } else
703 tret = MACH_PORT_NULL;
704
705 AUDIT_ARG(mach_port2, tret);
706 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
707 task_deallocate(t1);
708 error = KERN_SUCCESS;
709 goto tnfpout;
710 }
711 }
712
713 #if CONFIG_MACF
714 noperm:
715 #endif
716 task_deallocate(t1);
717 tret = MACH_PORT_NULL;
718 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
719 error = KERN_FAILURE;
720 tnfpout:
721 if (refheld != 0)
722 kauth_cred_unref(&target_cred);
723 if (p != PROC_NULL)
724 proc_rele(p);
725 AUDIT_MACH_SYSCALL_EXIT(error);
726 return(error);
727 }
728
729 kern_return_t
730 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
731 {
732 task_t target = NULL;
733 proc_t targetproc = PROC_NULL;
734 int pid = args->pid;
735 int error = 0;
736
737 #if CONFIG_MACF
738 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
739 if (error) {
740 error = EPERM;
741 goto out;
742 }
743 #endif
744
745 if (pid == 0) {
746 error = EPERM;
747 goto out;
748 }
749
750 targetproc = proc_find(pid);
751 if (targetproc == PROC_NULL) {
752 error = ESRCH;
753 goto out;
754 }
755
756 if (!task_for_pid_posix_check(targetproc)) {
757 error = EPERM;
758 goto out;
759 }
760
761 target = targetproc->task;
762 #ifndef CONFIG_EMBEDDED
763 if (target != TASK_NULL) {
764 mach_port_t tfpport;
765
766 /* If we aren't root and target's task access port is set... */
767 if (!kauth_cred_issuser(kauth_cred_get()) &&
768 targetproc != current_proc() &&
769 (task_get_task_access_port(target, &tfpport) == 0) &&
770 (tfpport != IPC_PORT_NULL)) {
771
772 if (tfpport == IPC_PORT_DEAD) {
773 error = EACCES;
774 goto out;
775 }
776
777 /* Call up to the task access server */
778 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
779
780 if (error != MACH_MSG_SUCCESS) {
781 if (error == MACH_RCV_INTERRUPTED)
782 error = EINTR;
783 else
784 error = EPERM;
785 goto out;
786 }
787 }
788 }
789 #endif
790
791 task_reference(target);
792 error = task_pidsuspend(target);
793 if (error) {
794 if (error == KERN_INVALID_ARGUMENT) {
795 error = EINVAL;
796 } else {
797 error = EPERM;
798 }
799 }
800 #if CONFIG_MEMORYSTATUS
801 else {
802 memorystatus_on_suspend(pid);
803 }
804 #endif
805
806 task_deallocate(target);
807
808 out:
809 if (targetproc != PROC_NULL)
810 proc_rele(targetproc);
811 *ret = error;
812 return error;
813 }
814
815 kern_return_t
816 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
817 {
818 task_t target = NULL;
819 proc_t targetproc = PROC_NULL;
820 int pid = args->pid;
821 int error = 0;
822
823 #if CONFIG_MACF
824 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
825 if (error) {
826 error = EPERM;
827 goto out;
828 }
829 #endif
830
831 if (pid == 0) {
832 error = EPERM;
833 goto out;
834 }
835
836 targetproc = proc_find(pid);
837 if (targetproc == PROC_NULL) {
838 error = ESRCH;
839 goto out;
840 }
841
842 if (!task_for_pid_posix_check(targetproc)) {
843 error = EPERM;
844 goto out;
845 }
846
847 target = targetproc->task;
848 #ifndef CONFIG_EMBEDDED
849 if (target != TASK_NULL) {
850 mach_port_t tfpport;
851
852 /* If we aren't root and target's task access port is set... */
853 if (!kauth_cred_issuser(kauth_cred_get()) &&
854 targetproc != current_proc() &&
855 (task_get_task_access_port(target, &tfpport) == 0) &&
856 (tfpport != IPC_PORT_NULL)) {
857
858 if (tfpport == IPC_PORT_DEAD) {
859 error = EACCES;
860 goto out;
861 }
862
863 /* Call up to the task access server */
864 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
865
866 if (error != MACH_MSG_SUCCESS) {
867 if (error == MACH_RCV_INTERRUPTED)
868 error = EINTR;
869 else
870 error = EPERM;
871 goto out;
872 }
873 }
874 }
875 #endif
876
877 task_reference(target);
878
879 #if CONFIG_MEMORYSTATUS
880 memorystatus_on_resume(pid);
881 #endif
882
883 error = task_pidresume(target);
884 if (error) {
885 if (error == KERN_INVALID_ARGUMENT) {
886 error = EINVAL;
887 } else {
888 error = EPERM;
889 }
890 }
891
892 task_deallocate(target);
893
894 out:
895 if (targetproc != PROC_NULL)
896 proc_rele(targetproc);
897
898 *ret = error;
899 return error;
900 }
901
902 #if CONFIG_EMBEDDED
903 kern_return_t
904 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
905 {
906 int error = 0;
907 proc_t targetproc = PROC_NULL;
908 int pid = args->pid;
909
910 #ifndef CONFIG_FREEZE
911 #pragma unused(pid)
912 #else
913
914 #if CONFIG_MACF
915 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
916 if (error) {
917 error = EPERM;
918 goto out;
919 }
920 #endif
921
922 /*
923 * The only accepted pid value here is currently -1, since we just kick off the freeze thread
924 * here - individual ids aren't required. However, it's intended that that this call is to change
925 * in the future to initiate freeze of individual processes. In anticipation, we'll obtain the
926 * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
927 * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
928 */
929 if (pid >= 0) {
930 targetproc = proc_find(pid);
931 if (targetproc == PROC_NULL) {
932 error = ESRCH;
933 goto out;
934 }
935
936 if (!task_for_pid_posix_check(targetproc)) {
937 error = EPERM;
938 goto out;
939 }
940 }
941
942 if (pid == -1) {
943 memorystatus_on_inactivity(pid);
944 } else {
945 error = EPERM;
946 }
947
948 out:
949
950 #endif /* CONFIG_FREEZE */
951
952 if (targetproc != PROC_NULL)
953 proc_rele(targetproc);
954 *ret = error;
955 return error;
956 }
957
958 int
959 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
960 {
961 int error = 0;
962 proc_t targetproc = PROC_NULL;
963 struct filedesc *fdp;
964 struct fileproc *fp;
965 int pid = args->pid;
966 int level = args->level;
967 int i;
968
969 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
970 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
971 {
972 error = EINVAL;
973 goto out;
974 }
975
976 #if CONFIG_MACF
977 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
978 if (error) {
979 error = EPERM;
980 goto out;
981 }
982 #endif
983
984 targetproc = proc_find(pid);
985 if (targetproc == PROC_NULL) {
986 error = ESRCH;
987 goto out;
988 }
989
990 if (!task_for_pid_posix_check(targetproc)) {
991 error = EPERM;
992 goto out;
993 }
994
995 proc_fdlock(targetproc);
996 fdp = targetproc->p_fd;
997
998 for (i = 0; i < fdp->fd_nfiles; i++) {
999 struct socket *sockp;
1000
1001 fp = fdp->fd_ofiles[i];
1002 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
1003 fp->f_fglob->fg_type != DTYPE_SOCKET)
1004 {
1005 continue;
1006 }
1007
1008 sockp = (struct socket *)fp->f_fglob->fg_data;
1009
1010 /* Call networking stack with socket and level */
1011 (void) socket_defunct(targetproc, sockp, level);
1012 }
1013
1014 proc_fdunlock(targetproc);
1015
1016 out:
1017 if (targetproc != PROC_NULL)
1018 proc_rele(targetproc);
1019 *ret = error;
1020 return error;
1021 }
1022 #endif /* CONFIG_EMBEDDED */
1023
1024 static int
1025 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1026 __unused int arg2, struct sysctl_req *req)
1027 {
1028 int error = 0;
1029 int new_value;
1030
1031 error = SYSCTL_OUT(req, arg1, sizeof(int));
1032 if (error || req->newptr == USER_ADDR_NULL)
1033 return(error);
1034
1035 if (!is_suser())
1036 return(EPERM);
1037
1038 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1039 goto out;
1040 }
1041 if ((new_value == KERN_TFP_POLICY_DENY)
1042 || (new_value == KERN_TFP_POLICY_DEFAULT))
1043 tfp_policy = new_value;
1044 else
1045 error = EINVAL;
1046 out:
1047 return(error);
1048
1049 }
1050
1051 #if defined(SECURE_KERNEL)
1052 static int kern_secure_kernel = 1;
1053 #else
1054 static int kern_secure_kernel = 0;
1055 #endif
1056
1057 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1058
1059 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1060 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1061 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1062
1063 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1064 &shared_region_trace_level, 0, "");
1065 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1066 &shared_region_version, 0, "");
1067 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1068 &shared_region_persistence, 0, "");
1069
1070 /*
1071 * shared_region_check_np:
1072 *
1073 * This system call is intended for dyld.
1074 *
1075 * dyld calls this when any process starts to see if the process's shared
1076 * region is already set up and ready to use.
1077 * This call returns the base address of the first mapping in the
1078 * process's shared region's first mapping.
1079 * dyld will then check what's mapped at that address.
1080 *
1081 * If the shared region is empty, dyld will then attempt to map the shared
1082 * cache file in the shared region via the shared_region_map_np() system call.
1083 *
1084 * If something's already mapped in the shared region, dyld will check if it
1085 * matches the shared cache it would like to use for that process.
1086 * If it matches, evrything's ready and the process can proceed and use the
1087 * shared region.
1088 * If it doesn't match, dyld will unmap the shared region and map the shared
1089 * cache into the process's address space via mmap().
1090 *
1091 * ERROR VALUES
1092 * EINVAL no shared region
1093 * ENOMEM shared region is empty
1094 * EFAULT bad address for "start_address"
1095 */
1096 int
1097 shared_region_check_np(
1098 __unused struct proc *p,
1099 struct shared_region_check_np_args *uap,
1100 __unused int *retvalp)
1101 {
1102 vm_shared_region_t shared_region;
1103 mach_vm_offset_t start_address = 0;
1104 int error;
1105 kern_return_t kr;
1106
1107 SHARED_REGION_TRACE_DEBUG(
1108 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1109 current_thread(), p->p_pid, p->p_comm,
1110 (uint64_t)uap->start_address));
1111
1112 /* retrieve the current tasks's shared region */
1113 shared_region = vm_shared_region_get(current_task());
1114 if (shared_region != NULL) {
1115 /* retrieve address of its first mapping... */
1116 kr = vm_shared_region_start_address(shared_region,
1117 &start_address);
1118 if (kr != KERN_SUCCESS) {
1119 error = ENOMEM;
1120 } else {
1121 /* ... and give it to the caller */
1122 error = copyout(&start_address,
1123 (user_addr_t) uap->start_address,
1124 sizeof (start_address));
1125 if (error) {
1126 SHARED_REGION_TRACE_ERROR(
1127 ("shared_region: %p [%d(%s)] "
1128 "check_np(0x%llx) "
1129 "copyout(0x%llx) error %d\n",
1130 current_thread(), p->p_pid, p->p_comm,
1131 (uint64_t)uap->start_address, (uint64_t)start_address,
1132 error));
1133 }
1134 }
1135 vm_shared_region_deallocate(shared_region);
1136 } else {
1137 /* no shared region ! */
1138 error = EINVAL;
1139 }
1140
1141 SHARED_REGION_TRACE_DEBUG(
1142 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1143 current_thread(), p->p_pid, p->p_comm,
1144 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1145
1146 return error;
1147 }
1148
1149
1150 int
1151 shared_region_copyin_mappings(
1152 struct proc *p,
1153 user_addr_t user_mappings,
1154 unsigned int mappings_count,
1155 struct shared_file_mapping_np *mappings)
1156 {
1157 int error = 0;
1158 vm_size_t mappings_size = 0;
1159
1160 /* get the list of mappings the caller wants us to establish */
1161 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1162 error = copyin(user_mappings,
1163 mappings,
1164 mappings_size);
1165 if (error) {
1166 SHARED_REGION_TRACE_ERROR(
1167 ("shared_region: %p [%d(%s)] map(): "
1168 "copyin(0x%llx, %d) failed (error=%d)\n",
1169 current_thread(), p->p_pid, p->p_comm,
1170 (uint64_t)user_mappings, mappings_count, error));
1171 }
1172 return error;
1173 }
1174 /*
1175 * shared_region_map_np()
1176 *
1177 * This system call is intended for dyld.
1178 *
1179 * dyld uses this to map a shared cache file into a shared region.
1180 * This is usually done only the first time a shared cache is needed.
1181 * Subsequent processes will just use the populated shared region without
1182 * requiring any further setup.
1183 */
1184 int
1185 _shared_region_map(
1186 struct proc *p,
1187 int fd,
1188 uint32_t mappings_count,
1189 struct shared_file_mapping_np *mappings,
1190 memory_object_control_t *sr_file_control,
1191 struct shared_file_mapping_np *mapping_to_slide)
1192 {
1193 int error;
1194 kern_return_t kr;
1195 struct fileproc *fp;
1196 struct vnode *vp, *root_vp;
1197 struct vnode_attr va;
1198 off_t fs;
1199 memory_object_size_t file_size;
1200 vm_prot_t maxprot = VM_PROT_ALL;
1201 memory_object_control_t file_control;
1202 struct vm_shared_region *shared_region;
1203
1204 SHARED_REGION_TRACE_DEBUG(
1205 ("shared_region: %p [%d(%s)] -> map\n",
1206 current_thread(), p->p_pid, p->p_comm));
1207
1208 shared_region = NULL;
1209 fp = NULL;
1210 vp = NULL;
1211
1212 /* get file structure from file descriptor */
1213 error = fp_lookup(p, fd, &fp, 0);
1214 if (error) {
1215 SHARED_REGION_TRACE_ERROR(
1216 ("shared_region: %p [%d(%s)] map: "
1217 "fd=%d lookup failed (error=%d)\n",
1218 current_thread(), p->p_pid, p->p_comm, fd, error));
1219 goto done;
1220 }
1221
1222 /* make sure we're attempting to map a vnode */
1223 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
1224 SHARED_REGION_TRACE_ERROR(
1225 ("shared_region: %p [%d(%s)] map: "
1226 "fd=%d not a vnode (type=%d)\n",
1227 current_thread(), p->p_pid, p->p_comm,
1228 fd, fp->f_fglob->fg_type));
1229 error = EINVAL;
1230 goto done;
1231 }
1232
1233 /* we need at least read permission on the file */
1234 if (! (fp->f_fglob->fg_flag & FREAD)) {
1235 SHARED_REGION_TRACE_ERROR(
1236 ("shared_region: %p [%d(%s)] map: "
1237 "fd=%d not readable\n",
1238 current_thread(), p->p_pid, p->p_comm, fd));
1239 error = EPERM;
1240 goto done;
1241 }
1242
1243 /* get vnode from file structure */
1244 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1245 if (error) {
1246 SHARED_REGION_TRACE_ERROR(
1247 ("shared_region: %p [%d(%s)] map: "
1248 "fd=%d getwithref failed (error=%d)\n",
1249 current_thread(), p->p_pid, p->p_comm, fd, error));
1250 goto done;
1251 }
1252 vp = (struct vnode *) fp->f_fglob->fg_data;
1253
1254 /* make sure the vnode is a regular file */
1255 if (vp->v_type != VREG) {
1256 SHARED_REGION_TRACE_ERROR(
1257 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1258 "not a file (type=%d)\n",
1259 current_thread(), p->p_pid, p->p_comm,
1260 vp, vp->v_name, vp->v_type));
1261 error = EINVAL;
1262 goto done;
1263 }
1264
1265 #if CONFIG_MACF
1266 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1267 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1268 if (error) {
1269 goto done;
1270 }
1271 #endif /* MAC */
1272
1273 #if CONFIG_PROTECT
1274 /* check for content protection access */
1275 {
1276 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1277 if (error) {
1278 goto done;
1279 }
1280 }
1281 #endif /* CONFIG_PROTECT */
1282
1283 /* make sure vnode is on the process's root volume */
1284 root_vp = p->p_fd->fd_rdir;
1285 if (root_vp == NULL) {
1286 root_vp = rootvnode;
1287 } else {
1288 /*
1289 * Chroot-ed processes can't use the shared_region.
1290 */
1291 error = EINVAL;
1292 goto done;
1293 }
1294
1295 if (vp->v_mount != root_vp->v_mount) {
1296 SHARED_REGION_TRACE_ERROR(
1297 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1298 "not on process's root volume\n",
1299 current_thread(), p->p_pid, p->p_comm,
1300 vp, vp->v_name));
1301 error = EPERM;
1302 goto done;
1303 }
1304
1305 /* make sure vnode is owned by "root" */
1306 VATTR_INIT(&va);
1307 VATTR_WANTED(&va, va_uid);
1308 error = vnode_getattr(vp, &va, vfs_context_current());
1309 if (error) {
1310 SHARED_REGION_TRACE_ERROR(
1311 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1312 "vnode_getattr(%p) failed (error=%d)\n",
1313 current_thread(), p->p_pid, p->p_comm,
1314 vp, vp->v_name, vp, error));
1315 goto done;
1316 }
1317 if (va.va_uid != 0) {
1318 SHARED_REGION_TRACE_ERROR(
1319 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1320 "owned by uid=%d instead of 0\n",
1321 current_thread(), p->p_pid, p->p_comm,
1322 vp, vp->v_name, va.va_uid));
1323 error = EPERM;
1324 goto done;
1325 }
1326
1327 /* get vnode size */
1328 error = vnode_size(vp, &fs, vfs_context_current());
1329 if (error) {
1330 SHARED_REGION_TRACE_ERROR(
1331 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1332 "vnode_size(%p) failed (error=%d)\n",
1333 current_thread(), p->p_pid, p->p_comm,
1334 vp, vp->v_name, vp, error));
1335 goto done;
1336 }
1337 file_size = fs;
1338
1339 /* get the file's memory object handle */
1340 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1341 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1342 SHARED_REGION_TRACE_ERROR(
1343 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1344 "no memory object\n",
1345 current_thread(), p->p_pid, p->p_comm,
1346 vp, vp->v_name));
1347 error = EINVAL;
1348 goto done;
1349 }
1350
1351 if (sr_file_control != NULL) {
1352 *sr_file_control = file_control;
1353 }
1354
1355
1356
1357 /* get the process's shared region (setup in vm_map_exec()) */
1358 shared_region = vm_shared_region_get(current_task());
1359 if (shared_region == NULL) {
1360 SHARED_REGION_TRACE_ERROR(
1361 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1362 "no shared region\n",
1363 current_thread(), p->p_pid, p->p_comm,
1364 vp, vp->v_name));
1365 goto done;
1366 }
1367
1368 /* map the file into that shared region's submap */
1369 kr = vm_shared_region_map_file(shared_region,
1370 mappings_count,
1371 mappings,
1372 file_control,
1373 file_size,
1374 (void *) p->p_fd->fd_rdir,
1375 mapping_to_slide);
1376 if (kr != KERN_SUCCESS) {
1377 SHARED_REGION_TRACE_ERROR(
1378 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1379 "vm_shared_region_map_file() failed kr=0x%x\n",
1380 current_thread(), p->p_pid, p->p_comm,
1381 vp, vp->v_name, kr));
1382 switch (kr) {
1383 case KERN_INVALID_ADDRESS:
1384 error = EFAULT;
1385 break;
1386 case KERN_PROTECTION_FAILURE:
1387 error = EPERM;
1388 break;
1389 case KERN_NO_SPACE:
1390 error = ENOMEM;
1391 break;
1392 case KERN_FAILURE:
1393 case KERN_INVALID_ARGUMENT:
1394 default:
1395 error = EINVAL;
1396 break;
1397 }
1398 goto done;
1399 }
1400
1401 error = 0;
1402
1403 vnode_lock_spin(vp);
1404
1405 vp->v_flag |= VSHARED_DYLD;
1406
1407 vnode_unlock(vp);
1408
1409 /* update the vnode's access time */
1410 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1411 VATTR_INIT(&va);
1412 nanotime(&va.va_access_time);
1413 VATTR_SET_ACTIVE(&va, va_access_time);
1414 vnode_setattr(vp, &va, vfs_context_current());
1415 }
1416
1417 if (p->p_flag & P_NOSHLIB) {
1418 /* signal that this process is now using split libraries */
1419 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1420 }
1421
1422 done:
1423 if (vp != NULL) {
1424 /*
1425 * release the vnode...
1426 * ubc_map() still holds it for us in the non-error case
1427 */
1428 (void) vnode_put(vp);
1429 vp = NULL;
1430 }
1431 if (fp != NULL) {
1432 /* release the file descriptor */
1433 fp_drop(p, fd, fp, 0);
1434 fp = NULL;
1435 }
1436
1437 if (shared_region != NULL) {
1438 vm_shared_region_deallocate(shared_region);
1439 }
1440
1441 SHARED_REGION_TRACE_DEBUG(
1442 ("shared_region: %p [%d(%s)] <- map\n",
1443 current_thread(), p->p_pid, p->p_comm));
1444
1445 return error;
1446 }
1447
1448 int
1449 _shared_region_slide(uint32_t slide,
1450 mach_vm_offset_t entry_start_address,
1451 mach_vm_size_t entry_size,
1452 mach_vm_offset_t slide_start,
1453 mach_vm_size_t slide_size,
1454 memory_object_control_t sr_file_control)
1455 {
1456 void *slide_info_entry = NULL;
1457 int error;
1458
1459 if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
1460 printf("slide_info initialization failed with kr=%d\n", error);
1461 goto done;
1462 }
1463
1464 slide_info_entry = vm_shared_region_get_slide_info_entry();
1465 if (slide_info_entry == NULL){
1466 error = EFAULT;
1467 } else {
1468 error = copyin((user_addr_t)slide_start,
1469 slide_info_entry,
1470 (vm_size_t)slide_size);
1471 }
1472 if (error) {
1473 goto done;
1474 }
1475
1476 if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) {
1477 error = EFAULT;
1478 printf("Sanity Check failed for slide_info\n");
1479 } else {
1480 #if DEBUG
1481 printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
1482 (void*)(uintptr_t)entry_start_address,
1483 (unsigned long)entry_size,
1484 (unsigned long)slide_size);
1485 #endif
1486 }
1487 done:
1488 return error;
1489 }
1490
1491 int
1492 shared_region_map_and_slide_np(
1493 struct proc *p,
1494 struct shared_region_map_and_slide_np_args *uap,
1495 __unused int *retvalp)
1496 {
1497 struct shared_file_mapping_np mapping_to_slide;
1498 struct shared_file_mapping_np *mappings;
1499 unsigned int mappings_count = uap->count;
1500
1501 memory_object_control_t sr_file_control;
1502 kern_return_t kr = KERN_SUCCESS;
1503 uint32_t slide = uap->slide;
1504
1505 #define SFM_MAX_STACK 8
1506 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1507
1508 /* Is the process chrooted?? */
1509 if (p->p_fd->fd_rdir != NULL) {
1510 kr = EINVAL;
1511 goto done;
1512 }
1513
1514 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1515 if (kr == KERN_INVALID_ARGUMENT) {
1516 /*
1517 * This will happen if we request sliding again
1518 * with the same slide value that was used earlier
1519 * for the very first sliding.
1520 */
1521 kr = KERN_SUCCESS;
1522 }
1523 goto done;
1524 }
1525
1526 if (mappings_count == 0) {
1527 SHARED_REGION_TRACE_INFO(
1528 ("shared_region: %p [%d(%s)] map(): "
1529 "no mappings\n",
1530 current_thread(), p->p_pid, p->p_comm));
1531 kr = 0; /* no mappings: we're done ! */
1532 goto done;
1533 } else if (mappings_count <= SFM_MAX_STACK) {
1534 mappings = &stack_mappings[0];
1535 } else {
1536 SHARED_REGION_TRACE_ERROR(
1537 ("shared_region: %p [%d(%s)] map(): "
1538 "too many mappings (%d)\n",
1539 current_thread(), p->p_pid, p->p_comm,
1540 mappings_count));
1541 kr = KERN_FAILURE;
1542 goto done;
1543 }
1544
1545 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1546 goto done;
1547 }
1548
1549
1550 kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
1551 if (kr != KERN_SUCCESS) {
1552 return kr;
1553 }
1554
1555 if (slide) {
1556 kr = _shared_region_slide(slide,
1557 mapping_to_slide.sfm_file_offset,
1558 mapping_to_slide.sfm_size,
1559 uap->slide_start,
1560 uap->slide_size,
1561 sr_file_control);
1562 if (kr != KERN_SUCCESS) {
1563 vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
1564 return kr;
1565 }
1566 }
1567 done:
1568 return kr;
1569 }
1570
1571 /* sysctl overflow room */
1572
1573 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1574 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1575 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1576 extern unsigned int vm_page_free_target;
1577 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1578 &vm_page_free_target, 0, "Pageout daemon free target");
1579
1580 extern unsigned int vm_memory_pressure;
1581 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1582 &vm_memory_pressure, 0, "Memory pressure indicator");
1583
1584 static int
1585 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1586 {
1587 #pragma unused(oidp, arg1, arg2)
1588 unsigned int page_free_wanted;
1589
1590 page_free_wanted = mach_vm_ctl_page_free_wanted();
1591 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1592 }
1593 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1594 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1595 0, 0, vm_ctl_page_free_wanted, "I", "");
1596
1597 extern unsigned int vm_page_purgeable_count;
1598 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1599 &vm_page_purgeable_count, 0, "Purgeable page count");
1600
1601 extern unsigned int vm_page_purgeable_wired_count;
1602 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1603 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1604
1605 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1606 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1607 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1608 &vm_page_stats_reusable.reusable_pages_success, "");
1609 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1610 &vm_page_stats_reusable.reusable_pages_failure, "");
1611 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1612 &vm_page_stats_reusable.reusable_pages_shared, "");
1613 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1614 &vm_page_stats_reusable.all_reusable_calls, "");
1615 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1616 &vm_page_stats_reusable.partial_reusable_calls, "");
1617 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1618 &vm_page_stats_reusable.reuse_pages_success, "");
1619 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1620 &vm_page_stats_reusable.reuse_pages_failure, "");
1621 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1622 &vm_page_stats_reusable.all_reuse_calls, "");
1623 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1624 &vm_page_stats_reusable.partial_reuse_calls, "");
1625 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1626 &vm_page_stats_reusable.can_reuse_success, "");
1627 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1628 &vm_page_stats_reusable.can_reuse_failure, "");
1629
1630
1631 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1632 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1633 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1634
1635 extern unsigned int vm_page_cleaned_count;
1636 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1637
1638 /* pageout counts */
1639 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1640 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1641 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1642 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1643 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1644 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1645 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1646 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1647 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1648
1649 extern unsigned int vm_pageout_freed_from_cleaned;
1650 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1651
1652 /* counts of pages entering the cleaned queue */
1653 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1654 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1655 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1656 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1657
1658 /* counts of pages leaving the cleaned queue */
1659 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1660 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1661 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1662 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1663 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1664 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1665 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1666 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1667 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1668
1669 #include <kern/thread.h>
1670 #include <sys/user.h>
1671
1672 void vm_pageout_io_throttle(void);
1673
1674 void vm_pageout_io_throttle(void) {
1675 struct uthread *uthread = get_bsdthread_info(current_thread());
1676
1677 /*
1678 * thread is marked as a low priority I/O type
1679 * and the I/O we issued while in this cleaning operation
1680 * collided with normal I/O operations... we'll
1681 * delay in order to mitigate the impact of this
1682 * task on the normal operation of the system
1683 */
1684
1685 if (uthread->uu_lowpri_window) {
1686 throttle_lowpri_io(TRUE);
1687 }
1688
1689 }
1690
1691 int
1692 vm_pressure_monitor(
1693 __unused struct proc *p,
1694 struct vm_pressure_monitor_args *uap,
1695 int *retval)
1696 {
1697 kern_return_t kr;
1698 uint32_t pages_reclaimed;
1699 uint32_t pages_wanted;
1700
1701 kr = mach_vm_pressure_monitor(
1702 (boolean_t) uap->wait_for_pressure,
1703 uap->nsecs_monitored,
1704 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1705 &pages_wanted);
1706
1707 switch (kr) {
1708 case KERN_SUCCESS:
1709 break;
1710 case KERN_ABORTED:
1711 return EINTR;
1712 default:
1713 return EINVAL;
1714 }
1715
1716 if (uap->pages_reclaimed) {
1717 if (copyout((void *)&pages_reclaimed,
1718 uap->pages_reclaimed,
1719 sizeof (pages_reclaimed)) != 0) {
1720 return EFAULT;
1721 }
1722 }
1723
1724 *retval = (int) pages_wanted;
1725 return 0;
1726 }
1727
1728 int
1729 kas_info(struct proc *p,
1730 struct kas_info_args *uap,
1731 int *retval __unused)
1732 {
1733 #ifdef SECURE_KERNEL
1734 (void)p;
1735 (void)uap;
1736 return ENOTSUP;
1737 #else /* !SECURE_KERNEL */
1738 int selector = uap->selector;
1739 user_addr_t valuep = uap->value;
1740 user_addr_t sizep = uap->size;
1741 user_size_t size;
1742 int error;
1743
1744 if (!kauth_cred_issuser(kauth_cred_get())) {
1745 return EPERM;
1746 }
1747
1748 #if CONFIG_MACF
1749 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1750 if (error) {
1751 return error;
1752 }
1753 #endif
1754
1755 if (IS_64BIT_PROCESS(p)) {
1756 user64_size_t size64;
1757 error = copyin(sizep, &size64, sizeof(size64));
1758 size = (user_size_t)size64;
1759 } else {
1760 user32_size_t size32;
1761 error = copyin(sizep, &size32, sizeof(size32));
1762 size = (user_size_t)size32;
1763 }
1764 if (error) {
1765 return error;
1766 }
1767
1768 switch (selector) {
1769 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1770 {
1771 uint64_t slide = vm_kernel_slide;
1772
1773 if (sizeof(slide) != size) {
1774 return EINVAL;
1775 }
1776
1777 if (IS_64BIT_PROCESS(p)) {
1778 user64_size_t size64 = (user64_size_t)size;
1779 error = copyout(&size64, sizep, sizeof(size64));
1780 } else {
1781 user32_size_t size32 = (user32_size_t)size;
1782 error = copyout(&size32, sizep, sizeof(size32));
1783 }
1784 if (error) {
1785 return error;
1786 }
1787
1788 error = copyout(&slide, valuep, sizeof(slide));
1789 if (error) {
1790 return error;
1791 }
1792 }
1793 break;
1794 default:
1795 return EINVAL;
1796 }
1797
1798 return 0;
1799 #endif /* !SECURE_KERNEL */
1800 }