]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
1aa66039943dcf2be8eb4d9f1aba8b454bc801c6
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <kern/extmod_statistics.h>
48 #include <mach/mach_traps.h>
49 #include <mach/port.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/trace.h>
70 #include <sys/kernel.h>
71 #include <sys/ubc_internal.h>
72 #include <sys/user.h>
73 #include <sys/syslog.h>
74 #include <sys/stat.h>
75 #include <sys/sysproto.h>
76 #include <sys/mman.h>
77 #include <sys/sysctl.h>
78 #include <sys/cprotect.h>
79 #include <sys/kpi_socket.h>
80 #include <sys/kas_info.h>
81
82 #include <security/audit/audit.h>
83 #include <security/mac.h>
84 #include <bsm/audit_kevents.h>
85
86 #include <kern/kalloc.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_pageout.h>
90
91 #include <machine/spl.h>
92
93 #include <mach/shared_region.h>
94 #include <vm/vm_shared_region.h>
95
96 #include <vm/vm_protos.h>
97
98 #include <sys/kern_memorystatus.h>
99
100
101 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
102 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
103
104 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
105
106
107 /*
108 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
109 */
110
111 #ifndef SECURE_KERNEL
112 extern int allow_stack_exec, allow_data_exec;
113
114 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
115 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
116 #endif /* !SECURE_KERNEL */
117
118 static const char *prot_values[] = {
119 "none",
120 "read-only",
121 "write-only",
122 "read-write",
123 "execute-only",
124 "read-execute",
125 "write-execute",
126 "read-write-execute"
127 };
128
129 void
130 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
131 {
132 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
133 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
134 }
135
136 int shared_region_unnest_logging = 1;
137
138 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
139 &shared_region_unnest_logging, 0, "");
140
141 int vm_shared_region_unnest_log_interval = 10;
142 int shared_region_unnest_log_count_threshold = 5;
143
144 /* These log rate throttling state variables aren't thread safe, but
145 * are sufficient unto the task.
146 */
147 static int64_t last_unnest_log_time = 0;
148 static int shared_region_unnest_log_count = 0;
149
150 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
151 struct timeval tv;
152 const char *pcommstr;
153
154 if (shared_region_unnest_logging == 0)
155 return;
156
157 if (shared_region_unnest_logging == 1) {
158 microtime(&tv);
159 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
160 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
161 return;
162 }
163 else {
164 last_unnest_log_time = tv.tv_sec;
165 shared_region_unnest_log_count = 0;
166 }
167 }
168
169 pcommstr = current_proc()->p_comm;
170
171 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
172 }
173
174 int
175 useracc(
176 user_addr_t addr,
177 user_size_t len,
178 int prot)
179 {
180 vm_map_t map;
181
182 map = current_map();
183 return (vm_map_check_protection(
184 map,
185 vm_map_trunc_page(addr,
186 vm_map_page_mask(map)),
187 vm_map_round_page(addr+len,
188 vm_map_page_mask(map)),
189 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
190 }
191
192 int
193 vslock(
194 user_addr_t addr,
195 user_size_t len)
196 {
197 kern_return_t kret;
198 vm_map_t map;
199
200 map = current_map();
201 kret = vm_map_wire(map,
202 vm_map_trunc_page(addr,
203 vm_map_page_mask(map)),
204 vm_map_round_page(addr+len,
205 vm_map_page_mask(map)),
206 VM_PROT_READ | VM_PROT_WRITE,
207 FALSE);
208
209 switch (kret) {
210 case KERN_SUCCESS:
211 return (0);
212 case KERN_INVALID_ADDRESS:
213 case KERN_NO_SPACE:
214 return (ENOMEM);
215 case KERN_PROTECTION_FAILURE:
216 return (EACCES);
217 default:
218 return (EINVAL);
219 }
220 }
221
222 int
223 vsunlock(
224 user_addr_t addr,
225 user_size_t len,
226 __unused int dirtied)
227 {
228 #if FIXME /* [ */
229 pmap_t pmap;
230 vm_page_t pg;
231 vm_map_offset_t vaddr;
232 ppnum_t paddr;
233 #endif /* FIXME ] */
234 kern_return_t kret;
235 vm_map_t map;
236
237 map = current_map();
238
239 #if FIXME /* [ */
240 if (dirtied) {
241 pmap = get_task_pmap(current_task());
242 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
243 vaddr < vm_map_round_page(addr+len, PAGE_MASK);
244 vaddr += PAGE_SIZE) {
245 paddr = pmap_extract(pmap, vaddr);
246 pg = PHYS_TO_VM_PAGE(paddr);
247 vm_page_set_modified(pg);
248 }
249 }
250 #endif /* FIXME ] */
251 #ifdef lint
252 dirtied++;
253 #endif /* lint */
254 kret = vm_map_unwire(map,
255 vm_map_trunc_page(addr,
256 vm_map_page_mask(map)),
257 vm_map_round_page(addr+len,
258 vm_map_page_mask(map)),
259 FALSE);
260 switch (kret) {
261 case KERN_SUCCESS:
262 return (0);
263 case KERN_INVALID_ADDRESS:
264 case KERN_NO_SPACE:
265 return (ENOMEM);
266 case KERN_PROTECTION_FAILURE:
267 return (EACCES);
268 default:
269 return (EINVAL);
270 }
271 }
272
273 int
274 subyte(
275 user_addr_t addr,
276 int byte)
277 {
278 char character;
279
280 character = (char)byte;
281 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
282 }
283
284 int
285 suibyte(
286 user_addr_t addr,
287 int byte)
288 {
289 char character;
290
291 character = (char)byte;
292 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
293 }
294
295 int fubyte(user_addr_t addr)
296 {
297 unsigned char byte;
298
299 if (copyin(addr, (void *) &byte, sizeof(char)))
300 return(-1);
301 return(byte);
302 }
303
304 int fuibyte(user_addr_t addr)
305 {
306 unsigned char byte;
307
308 if (copyin(addr, (void *) &(byte), sizeof(char)))
309 return(-1);
310 return(byte);
311 }
312
313 int
314 suword(
315 user_addr_t addr,
316 long word)
317 {
318 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
319 }
320
321 long fuword(user_addr_t addr)
322 {
323 long word = 0;
324
325 if (copyin(addr, (void *) &word, sizeof(int)))
326 return(-1);
327 return(word);
328 }
329
330 /* suiword and fuiword are the same as suword and fuword, respectively */
331
332 int
333 suiword(
334 user_addr_t addr,
335 long word)
336 {
337 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
338 }
339
340 long fuiword(user_addr_t addr)
341 {
342 long word = 0;
343
344 if (copyin(addr, (void *) &word, sizeof(int)))
345 return(-1);
346 return(word);
347 }
348
349 /*
350 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
351 * fetching and setting of process-sized size_t and pointer values.
352 */
353 int
354 sulong(user_addr_t addr, int64_t word)
355 {
356
357 if (IS_64BIT_PROCESS(current_proc())) {
358 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
359 } else {
360 return(suiword(addr, (long)word));
361 }
362 }
363
364 int64_t
365 fulong(user_addr_t addr)
366 {
367 int64_t longword;
368
369 if (IS_64BIT_PROCESS(current_proc())) {
370 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
371 return(-1);
372 return(longword);
373 } else {
374 return((int64_t)fuiword(addr));
375 }
376 }
377
378 int
379 suulong(user_addr_t addr, uint64_t uword)
380 {
381
382 if (IS_64BIT_PROCESS(current_proc())) {
383 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
384 } else {
385 return(suiword(addr, (uint32_t)uword));
386 }
387 }
388
389 uint64_t
390 fuulong(user_addr_t addr)
391 {
392 uint64_t ulongword;
393
394 if (IS_64BIT_PROCESS(current_proc())) {
395 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
396 return(-1ULL);
397 return(ulongword);
398 } else {
399 return((uint64_t)fuiword(addr));
400 }
401 }
402
403 int
404 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
405 {
406 return(ENOTSUP);
407 }
408
409 /*
410 * pid_for_task
411 *
412 * Find the BSD process ID for the Mach task associated with the given Mach port
413 * name
414 *
415 * Parameters: args User argument descriptor (see below)
416 *
417 * Indirect parameters: args->t Mach port name
418 * args->pid Process ID (returned value; see below)
419 *
420 * Returns: KERL_SUCCESS Success
421 * KERN_FAILURE Not success
422 *
423 * Implicit returns: args->pid Process ID
424 *
425 */
426 kern_return_t
427 pid_for_task(
428 struct pid_for_task_args *args)
429 {
430 mach_port_name_t t = args->t;
431 user_addr_t pid_addr = args->pid;
432 proc_t p;
433 task_t t1;
434 int pid = -1;
435 kern_return_t err = KERN_SUCCESS;
436
437 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
438 AUDIT_ARG(mach_port1, t);
439
440 t1 = port_name_to_task(t);
441
442 if (t1 == TASK_NULL) {
443 err = KERN_FAILURE;
444 goto pftout;
445 } else {
446 p = get_bsdtask_info(t1);
447 if (p) {
448 pid = proc_pid(p);
449 err = KERN_SUCCESS;
450 } else {
451 err = KERN_FAILURE;
452 }
453 }
454 task_deallocate(t1);
455 pftout:
456 AUDIT_ARG(pid, pid);
457 (void) copyout((char *) &pid, pid_addr, sizeof(int));
458 AUDIT_MACH_SYSCALL_EXIT(err);
459 return(err);
460 }
461
462 /*
463 *
464 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
465 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
466 *
467 */
468 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
469
470 /*
471 * Routine: task_for_pid_posix_check
472 * Purpose:
473 * Verify that the current process should be allowed to
474 * get the target process's task port. This is only
475 * permitted if:
476 * - The current process is root
477 * OR all of the following are true:
478 * - The target process's real, effective, and saved uids
479 * are the same as the current proc's euid,
480 * - The target process's group set is a subset of the
481 * calling process's group set, and
482 * - The target process hasn't switched credentials.
483 *
484 * Returns: TRUE: permitted
485 * FALSE: denied
486 */
487 static int
488 task_for_pid_posix_check(proc_t target)
489 {
490 kauth_cred_t targetcred, mycred;
491 uid_t myuid;
492 int allowed;
493
494 /* No task_for_pid on bad targets */
495 if (target->p_stat == SZOMB) {
496 return FALSE;
497 }
498
499 mycred = kauth_cred_get();
500 myuid = kauth_cred_getuid(mycred);
501
502 /* If we're running as root, the check passes */
503 if (kauth_cred_issuser(mycred))
504 return TRUE;
505
506 /* We're allowed to get our own task port */
507 if (target == current_proc())
508 return TRUE;
509
510 /*
511 * Under DENY, only root can get another proc's task port,
512 * so no more checks are needed.
513 */
514 if (tfp_policy == KERN_TFP_POLICY_DENY) {
515 return FALSE;
516 }
517
518 targetcred = kauth_cred_proc_ref(target);
519 allowed = TRUE;
520
521 /* Do target's ruid, euid, and saved uid match my euid? */
522 if ((kauth_cred_getuid(targetcred) != myuid) ||
523 (kauth_cred_getruid(targetcred) != myuid) ||
524 (kauth_cred_getsvuid(targetcred) != myuid)) {
525 allowed = FALSE;
526 goto out;
527 }
528
529 /* Are target's groups a subset of my groups? */
530 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
531 allowed == 0) {
532 allowed = FALSE;
533 goto out;
534 }
535
536 /* Has target switched credentials? */
537 if (target->p_flag & P_SUGID) {
538 allowed = FALSE;
539 goto out;
540 }
541
542 out:
543 kauth_cred_unref(&targetcred);
544 return allowed;
545 }
546
547 /*
548 * Routine: task_for_pid
549 * Purpose:
550 * Get the task port for another "process", named by its
551 * process ID on the same host as "target_task".
552 *
553 * Only permitted to privileged processes, or processes
554 * with the same user ID.
555 *
556 * Note: if pid == 0, an error is return no matter who is calling.
557 *
558 * XXX This should be a BSD system call, not a Mach trap!!!
559 */
560 kern_return_t
561 task_for_pid(
562 struct task_for_pid_args *args)
563 {
564 mach_port_name_t target_tport = args->target_tport;
565 int pid = args->pid;
566 user_addr_t task_addr = args->t;
567 proc_t p = PROC_NULL;
568 task_t t1 = TASK_NULL;
569 mach_port_name_t tret = MACH_PORT_NULL;
570 ipc_port_t tfpport;
571 void * sright;
572 int error = 0;
573
574 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
575 AUDIT_ARG(pid, pid);
576 AUDIT_ARG(mach_port1, target_tport);
577
578 /* Always check if pid == 0 */
579 if (pid == 0) {
580 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
581 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
582 return(KERN_FAILURE);
583 }
584
585 t1 = port_name_to_task(target_tport);
586 if (t1 == TASK_NULL) {
587 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
588 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
589 return(KERN_FAILURE);
590 }
591
592
593 p = proc_find(pid);
594 if (p == PROC_NULL) {
595 error = KERN_FAILURE;
596 goto tfpout;
597 }
598
599 #if CONFIG_AUDIT
600 AUDIT_ARG(process, p);
601 #endif
602
603 if (!(task_for_pid_posix_check(p))) {
604 error = KERN_FAILURE;
605 goto tfpout;
606 }
607
608 if (p->task != TASK_NULL) {
609 /* If we aren't root and target's task access port is set... */
610 if (!kauth_cred_issuser(kauth_cred_get()) &&
611 p != current_proc() &&
612 (task_get_task_access_port(p->task, &tfpport) == 0) &&
613 (tfpport != IPC_PORT_NULL)) {
614
615 if (tfpport == IPC_PORT_DEAD) {
616 error = KERN_PROTECTION_FAILURE;
617 goto tfpout;
618 }
619
620 /* Call up to the task access server */
621 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
622
623 if (error != MACH_MSG_SUCCESS) {
624 if (error == MACH_RCV_INTERRUPTED)
625 error = KERN_ABORTED;
626 else
627 error = KERN_FAILURE;
628 goto tfpout;
629 }
630 }
631 #if CONFIG_MACF
632 error = mac_proc_check_get_task(kauth_cred_get(), p);
633 if (error) {
634 error = KERN_FAILURE;
635 goto tfpout;
636 }
637 #endif
638
639 /* Grant task port access */
640 task_reference(p->task);
641 extmod_statistics_incr_task_for_pid(p->task);
642
643 sright = (void *) convert_task_to_port(p->task);
644 tret = ipc_port_copyout_send(
645 sright,
646 get_task_ipcspace(current_task()));
647 }
648 error = KERN_SUCCESS;
649
650 tfpout:
651 task_deallocate(t1);
652 AUDIT_ARG(mach_port2, tret);
653 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
654 if (p != PROC_NULL)
655 proc_rele(p);
656 AUDIT_MACH_SYSCALL_EXIT(error);
657 return(error);
658 }
659
660 /*
661 * Routine: task_name_for_pid
662 * Purpose:
663 * Get the task name port for another "process", named by its
664 * process ID on the same host as "target_task".
665 *
666 * Only permitted to privileged processes, or processes
667 * with the same user ID.
668 *
669 * XXX This should be a BSD system call, not a Mach trap!!!
670 */
671
672 kern_return_t
673 task_name_for_pid(
674 struct task_name_for_pid_args *args)
675 {
676 mach_port_name_t target_tport = args->target_tport;
677 int pid = args->pid;
678 user_addr_t task_addr = args->t;
679 proc_t p = PROC_NULL;
680 task_t t1;
681 mach_port_name_t tret;
682 void * sright;
683 int error = 0, refheld = 0;
684 kauth_cred_t target_cred;
685
686 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
687 AUDIT_ARG(pid, pid);
688 AUDIT_ARG(mach_port1, target_tport);
689
690 t1 = port_name_to_task(target_tport);
691 if (t1 == TASK_NULL) {
692 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
693 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
694 return(KERN_FAILURE);
695 }
696
697 p = proc_find(pid);
698 if (p != PROC_NULL) {
699 AUDIT_ARG(process, p);
700 target_cred = kauth_cred_proc_ref(p);
701 refheld = 1;
702
703 if ((p->p_stat != SZOMB)
704 && ((current_proc() == p)
705 || kauth_cred_issuser(kauth_cred_get())
706 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
707 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
708
709 if (p->task != TASK_NULL) {
710 task_reference(p->task);
711 #if CONFIG_MACF
712 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
713 if (error) {
714 task_deallocate(p->task);
715 goto noperm;
716 }
717 #endif
718 sright = (void *)convert_task_name_to_port(p->task);
719 tret = ipc_port_copyout_send(sright,
720 get_task_ipcspace(current_task()));
721 } else
722 tret = MACH_PORT_NULL;
723
724 AUDIT_ARG(mach_port2, tret);
725 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
726 task_deallocate(t1);
727 error = KERN_SUCCESS;
728 goto tnfpout;
729 }
730 }
731
732 #if CONFIG_MACF
733 noperm:
734 #endif
735 task_deallocate(t1);
736 tret = MACH_PORT_NULL;
737 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
738 error = KERN_FAILURE;
739 tnfpout:
740 if (refheld != 0)
741 kauth_cred_unref(&target_cred);
742 if (p != PROC_NULL)
743 proc_rele(p);
744 AUDIT_MACH_SYSCALL_EXIT(error);
745 return(error);
746 }
747
748 kern_return_t
749 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
750 {
751 task_t target = NULL;
752 proc_t targetproc = PROC_NULL;
753 int pid = args->pid;
754 int error = 0;
755
756 #if CONFIG_MACF
757 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
758 if (error) {
759 error = EPERM;
760 goto out;
761 }
762 #endif
763
764 if (pid == 0) {
765 error = EPERM;
766 goto out;
767 }
768
769 targetproc = proc_find(pid);
770 if (targetproc == PROC_NULL) {
771 error = ESRCH;
772 goto out;
773 }
774
775 if (!task_for_pid_posix_check(targetproc)) {
776 error = EPERM;
777 goto out;
778 }
779
780 target = targetproc->task;
781 if (target != TASK_NULL) {
782 mach_port_t tfpport;
783
784 /* If we aren't root and target's task access port is set... */
785 if (!kauth_cred_issuser(kauth_cred_get()) &&
786 targetproc != current_proc() &&
787 (task_get_task_access_port(target, &tfpport) == 0) &&
788 (tfpport != IPC_PORT_NULL)) {
789
790 if (tfpport == IPC_PORT_DEAD) {
791 error = EACCES;
792 goto out;
793 }
794
795 /* Call up to the task access server */
796 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
797
798 if (error != MACH_MSG_SUCCESS) {
799 if (error == MACH_RCV_INTERRUPTED)
800 error = EINTR;
801 else
802 error = EPERM;
803 goto out;
804 }
805 }
806 }
807
808 task_reference(target);
809 error = task_pidsuspend(target);
810 if (error) {
811 if (error == KERN_INVALID_ARGUMENT) {
812 error = EINVAL;
813 } else {
814 error = EPERM;
815 }
816 }
817 #if CONFIG_MEMORYSTATUS
818 else {
819 memorystatus_on_suspend(targetproc);
820 }
821 #endif
822
823 task_deallocate(target);
824
825 out:
826 if (targetproc != PROC_NULL)
827 proc_rele(targetproc);
828 *ret = error;
829 return error;
830 }
831
832 kern_return_t
833 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
834 {
835 task_t target = NULL;
836 proc_t targetproc = PROC_NULL;
837 int pid = args->pid;
838 int error = 0;
839
840 #if CONFIG_MACF
841 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
842 if (error) {
843 error = EPERM;
844 goto out;
845 }
846 #endif
847
848 if (pid == 0) {
849 error = EPERM;
850 goto out;
851 }
852
853 targetproc = proc_find(pid);
854 if (targetproc == PROC_NULL) {
855 error = ESRCH;
856 goto out;
857 }
858
859 if (!task_for_pid_posix_check(targetproc)) {
860 error = EPERM;
861 goto out;
862 }
863
864 target = targetproc->task;
865 if (target != TASK_NULL) {
866 mach_port_t tfpport;
867
868 /* If we aren't root and target's task access port is set... */
869 if (!kauth_cred_issuser(kauth_cred_get()) &&
870 targetproc != current_proc() &&
871 (task_get_task_access_port(target, &tfpport) == 0) &&
872 (tfpport != IPC_PORT_NULL)) {
873
874 if (tfpport == IPC_PORT_DEAD) {
875 error = EACCES;
876 goto out;
877 }
878
879 /* Call up to the task access server */
880 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
881
882 if (error != MACH_MSG_SUCCESS) {
883 if (error == MACH_RCV_INTERRUPTED)
884 error = EINTR;
885 else
886 error = EPERM;
887 goto out;
888 }
889 }
890 }
891
892 task_reference(target);
893
894 #if CONFIG_MEMORYSTATUS
895 memorystatus_on_resume(targetproc);
896 #endif
897
898 error = task_pidresume(target);
899 if (error) {
900 if (error == KERN_INVALID_ARGUMENT) {
901 error = EINVAL;
902 } else {
903 if (error == KERN_MEMORY_ERROR) {
904 psignal(targetproc, SIGKILL);
905 error = EIO;
906 } else
907 error = EPERM;
908 }
909 }
910
911 task_deallocate(target);
912
913 out:
914 if (targetproc != PROC_NULL)
915 proc_rele(targetproc);
916
917 *ret = error;
918 return error;
919 }
920
921
922 static int
923 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
924 __unused int arg2, struct sysctl_req *req)
925 {
926 int error = 0;
927 int new_value;
928
929 error = SYSCTL_OUT(req, arg1, sizeof(int));
930 if (error || req->newptr == USER_ADDR_NULL)
931 return(error);
932
933 if (!kauth_cred_issuser(kauth_cred_get()))
934 return(EPERM);
935
936 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
937 goto out;
938 }
939 if ((new_value == KERN_TFP_POLICY_DENY)
940 || (new_value == KERN_TFP_POLICY_DEFAULT))
941 tfp_policy = new_value;
942 else
943 error = EINVAL;
944 out:
945 return(error);
946
947 }
948
949 #if defined(SECURE_KERNEL)
950 static int kern_secure_kernel = 1;
951 #else
952 static int kern_secure_kernel = 0;
953 #endif
954
955 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
956
957 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
958 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
959 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
960
961 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
962 &shared_region_trace_level, 0, "");
963 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
964 &shared_region_version, 0, "");
965 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
966 &shared_region_persistence, 0, "");
967
968 /*
969 * shared_region_check_np:
970 *
971 * This system call is intended for dyld.
972 *
973 * dyld calls this when any process starts to see if the process's shared
974 * region is already set up and ready to use.
975 * This call returns the base address of the first mapping in the
976 * process's shared region's first mapping.
977 * dyld will then check what's mapped at that address.
978 *
979 * If the shared region is empty, dyld will then attempt to map the shared
980 * cache file in the shared region via the shared_region_map_np() system call.
981 *
982 * If something's already mapped in the shared region, dyld will check if it
983 * matches the shared cache it would like to use for that process.
984 * If it matches, evrything's ready and the process can proceed and use the
985 * shared region.
986 * If it doesn't match, dyld will unmap the shared region and map the shared
987 * cache into the process's address space via mmap().
988 *
989 * ERROR VALUES
990 * EINVAL no shared region
991 * ENOMEM shared region is empty
992 * EFAULT bad address for "start_address"
993 */
994 int
995 shared_region_check_np(
996 __unused struct proc *p,
997 struct shared_region_check_np_args *uap,
998 __unused int *retvalp)
999 {
1000 vm_shared_region_t shared_region;
1001 mach_vm_offset_t start_address = 0;
1002 int error;
1003 kern_return_t kr;
1004
1005 SHARED_REGION_TRACE_DEBUG(
1006 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1007 current_thread(), p->p_pid, p->p_comm,
1008 (uint64_t)uap->start_address));
1009
1010 /* retrieve the current tasks's shared region */
1011 shared_region = vm_shared_region_get(current_task());
1012 if (shared_region != NULL) {
1013 /* retrieve address of its first mapping... */
1014 kr = vm_shared_region_start_address(shared_region,
1015 &start_address);
1016 if (kr != KERN_SUCCESS) {
1017 error = ENOMEM;
1018 } else {
1019 /* ... and give it to the caller */
1020 error = copyout(&start_address,
1021 (user_addr_t) uap->start_address,
1022 sizeof (start_address));
1023 if (error) {
1024 SHARED_REGION_TRACE_ERROR(
1025 ("shared_region: %p [%d(%s)] "
1026 "check_np(0x%llx) "
1027 "copyout(0x%llx) error %d\n",
1028 current_thread(), p->p_pid, p->p_comm,
1029 (uint64_t)uap->start_address, (uint64_t)start_address,
1030 error));
1031 }
1032 }
1033 vm_shared_region_deallocate(shared_region);
1034 } else {
1035 /* no shared region ! */
1036 error = EINVAL;
1037 }
1038
1039 SHARED_REGION_TRACE_DEBUG(
1040 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1041 current_thread(), p->p_pid, p->p_comm,
1042 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1043
1044 return error;
1045 }
1046
1047
1048 int
1049 shared_region_copyin_mappings(
1050 struct proc *p,
1051 user_addr_t user_mappings,
1052 unsigned int mappings_count,
1053 struct shared_file_mapping_np *mappings)
1054 {
1055 int error = 0;
1056 vm_size_t mappings_size = 0;
1057
1058 /* get the list of mappings the caller wants us to establish */
1059 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1060 error = copyin(user_mappings,
1061 mappings,
1062 mappings_size);
1063 if (error) {
1064 SHARED_REGION_TRACE_ERROR(
1065 ("shared_region: %p [%d(%s)] map(): "
1066 "copyin(0x%llx, %d) failed (error=%d)\n",
1067 current_thread(), p->p_pid, p->p_comm,
1068 (uint64_t)user_mappings, mappings_count, error));
1069 }
1070 return error;
1071 }
1072 /*
1073 * shared_region_map_np()
1074 *
1075 * This system call is intended for dyld.
1076 *
1077 * dyld uses this to map a shared cache file into a shared region.
1078 * This is usually done only the first time a shared cache is needed.
1079 * Subsequent processes will just use the populated shared region without
1080 * requiring any further setup.
1081 */
1082 int
1083 _shared_region_map_and_slide(
1084 struct proc *p,
1085 int fd,
1086 uint32_t mappings_count,
1087 struct shared_file_mapping_np *mappings,
1088 uint32_t slide,
1089 user_addr_t slide_start,
1090 user_addr_t slide_size)
1091 {
1092 int error;
1093 kern_return_t kr;
1094 struct fileproc *fp;
1095 struct vnode *vp, *root_vp;
1096 struct vnode_attr va;
1097 off_t fs;
1098 memory_object_size_t file_size;
1099 #if CONFIG_MACF
1100 vm_prot_t maxprot = VM_PROT_ALL;
1101 #endif
1102 memory_object_control_t file_control;
1103 struct vm_shared_region *shared_region;
1104
1105 SHARED_REGION_TRACE_DEBUG(
1106 ("shared_region: %p [%d(%s)] -> map\n",
1107 current_thread(), p->p_pid, p->p_comm));
1108
1109 shared_region = NULL;
1110 fp = NULL;
1111 vp = NULL;
1112
1113 /* get file structure from file descriptor */
1114 error = fp_lookup(p, fd, &fp, 0);
1115 if (error) {
1116 SHARED_REGION_TRACE_ERROR(
1117 ("shared_region: %p [%d(%s)] map: "
1118 "fd=%d lookup failed (error=%d)\n",
1119 current_thread(), p->p_pid, p->p_comm, fd, error));
1120 goto done;
1121 }
1122
1123 /* make sure we're attempting to map a vnode */
1124 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1125 SHARED_REGION_TRACE_ERROR(
1126 ("shared_region: %p [%d(%s)] map: "
1127 "fd=%d not a vnode (type=%d)\n",
1128 current_thread(), p->p_pid, p->p_comm,
1129 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1130 error = EINVAL;
1131 goto done;
1132 }
1133
1134 /* we need at least read permission on the file */
1135 if (! (fp->f_fglob->fg_flag & FREAD)) {
1136 SHARED_REGION_TRACE_ERROR(
1137 ("shared_region: %p [%d(%s)] map: "
1138 "fd=%d not readable\n",
1139 current_thread(), p->p_pid, p->p_comm, fd));
1140 error = EPERM;
1141 goto done;
1142 }
1143
1144 /* get vnode from file structure */
1145 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1146 if (error) {
1147 SHARED_REGION_TRACE_ERROR(
1148 ("shared_region: %p [%d(%s)] map: "
1149 "fd=%d getwithref failed (error=%d)\n",
1150 current_thread(), p->p_pid, p->p_comm, fd, error));
1151 goto done;
1152 }
1153 vp = (struct vnode *) fp->f_fglob->fg_data;
1154
1155 /* make sure the vnode is a regular file */
1156 if (vp->v_type != VREG) {
1157 SHARED_REGION_TRACE_ERROR(
1158 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1159 "not a file (type=%d)\n",
1160 current_thread(), p->p_pid, p->p_comm,
1161 vp, vp->v_name, vp->v_type));
1162 error = EINVAL;
1163 goto done;
1164 }
1165
1166 #if CONFIG_MACF
1167 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1168 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1169 if (error) {
1170 goto done;
1171 }
1172 #endif /* MAC */
1173
1174 #if CONFIG_PROTECT
1175 /* check for content protection access */
1176 {
1177 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1178 if (error) {
1179 goto done;
1180 }
1181 }
1182 #endif /* CONFIG_PROTECT */
1183
1184 /* make sure vnode is on the process's root volume */
1185 root_vp = p->p_fd->fd_rdir;
1186 if (root_vp == NULL) {
1187 root_vp = rootvnode;
1188 } else {
1189 /*
1190 * Chroot-ed processes can't use the shared_region.
1191 */
1192 error = EINVAL;
1193 goto done;
1194 }
1195
1196 if (vp->v_mount != root_vp->v_mount) {
1197 SHARED_REGION_TRACE_ERROR(
1198 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1199 "not on process's root volume\n",
1200 current_thread(), p->p_pid, p->p_comm,
1201 vp, vp->v_name));
1202 error = EPERM;
1203 goto done;
1204 }
1205
1206 /* make sure vnode is owned by "root" */
1207 VATTR_INIT(&va);
1208 VATTR_WANTED(&va, va_uid);
1209 error = vnode_getattr(vp, &va, vfs_context_current());
1210 if (error) {
1211 SHARED_REGION_TRACE_ERROR(
1212 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1213 "vnode_getattr(%p) failed (error=%d)\n",
1214 current_thread(), p->p_pid, p->p_comm,
1215 vp, vp->v_name, vp, error));
1216 goto done;
1217 }
1218 if (va.va_uid != 0) {
1219 SHARED_REGION_TRACE_ERROR(
1220 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1221 "owned by uid=%d instead of 0\n",
1222 current_thread(), p->p_pid, p->p_comm,
1223 vp, vp->v_name, va.va_uid));
1224 error = EPERM;
1225 goto done;
1226 }
1227
1228 /* get vnode size */
1229 error = vnode_size(vp, &fs, vfs_context_current());
1230 if (error) {
1231 SHARED_REGION_TRACE_ERROR(
1232 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1233 "vnode_size(%p) failed (error=%d)\n",
1234 current_thread(), p->p_pid, p->p_comm,
1235 vp, vp->v_name, vp, error));
1236 goto done;
1237 }
1238 file_size = fs;
1239
1240 /* get the file's memory object handle */
1241 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1242 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1243 SHARED_REGION_TRACE_ERROR(
1244 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1245 "no memory object\n",
1246 current_thread(), p->p_pid, p->p_comm,
1247 vp, vp->v_name));
1248 error = EINVAL;
1249 goto done;
1250 }
1251
1252
1253 /* get the process's shared region (setup in vm_map_exec()) */
1254 shared_region = vm_shared_region_get(current_task());
1255 if (shared_region == NULL) {
1256 SHARED_REGION_TRACE_ERROR(
1257 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1258 "no shared region\n",
1259 current_thread(), p->p_pid, p->p_comm,
1260 vp, vp->v_name));
1261 goto done;
1262 }
1263
1264 /* map the file into that shared region's submap */
1265 kr = vm_shared_region_map_file(shared_region,
1266 mappings_count,
1267 mappings,
1268 file_control,
1269 file_size,
1270 (void *) p->p_fd->fd_rdir,
1271 slide,
1272 slide_start,
1273 slide_size);
1274 if (kr != KERN_SUCCESS) {
1275 SHARED_REGION_TRACE_ERROR(
1276 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1277 "vm_shared_region_map_file() failed kr=0x%x\n",
1278 current_thread(), p->p_pid, p->p_comm,
1279 vp, vp->v_name, kr));
1280 switch (kr) {
1281 case KERN_INVALID_ADDRESS:
1282 error = EFAULT;
1283 break;
1284 case KERN_PROTECTION_FAILURE:
1285 error = EPERM;
1286 break;
1287 case KERN_NO_SPACE:
1288 error = ENOMEM;
1289 break;
1290 case KERN_FAILURE:
1291 case KERN_INVALID_ARGUMENT:
1292 default:
1293 error = EINVAL;
1294 break;
1295 }
1296 goto done;
1297 }
1298
1299 error = 0;
1300
1301 vnode_lock_spin(vp);
1302
1303 vp->v_flag |= VSHARED_DYLD;
1304
1305 vnode_unlock(vp);
1306
1307 /* update the vnode's access time */
1308 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1309 VATTR_INIT(&va);
1310 nanotime(&va.va_access_time);
1311 VATTR_SET_ACTIVE(&va, va_access_time);
1312 vnode_setattr(vp, &va, vfs_context_current());
1313 }
1314
1315 if (p->p_flag & P_NOSHLIB) {
1316 /* signal that this process is now using split libraries */
1317 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1318 }
1319
1320 done:
1321 if (vp != NULL) {
1322 /*
1323 * release the vnode...
1324 * ubc_map() still holds it for us in the non-error case
1325 */
1326 (void) vnode_put(vp);
1327 vp = NULL;
1328 }
1329 if (fp != NULL) {
1330 /* release the file descriptor */
1331 fp_drop(p, fd, fp, 0);
1332 fp = NULL;
1333 }
1334
1335 if (shared_region != NULL) {
1336 vm_shared_region_deallocate(shared_region);
1337 }
1338
1339 SHARED_REGION_TRACE_DEBUG(
1340 ("shared_region: %p [%d(%s)] <- map\n",
1341 current_thread(), p->p_pid, p->p_comm));
1342
1343 return error;
1344 }
1345
1346 int
1347 shared_region_map_and_slide_np(
1348 struct proc *p,
1349 struct shared_region_map_and_slide_np_args *uap,
1350 __unused int *retvalp)
1351 {
1352 struct shared_file_mapping_np *mappings;
1353 unsigned int mappings_count = uap->count;
1354 kern_return_t kr = KERN_SUCCESS;
1355 uint32_t slide = uap->slide;
1356
1357 #define SFM_MAX_STACK 8
1358 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1359
1360 /* Is the process chrooted?? */
1361 if (p->p_fd->fd_rdir != NULL) {
1362 kr = EINVAL;
1363 goto done;
1364 }
1365
1366 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1367 if (kr == KERN_INVALID_ARGUMENT) {
1368 /*
1369 * This will happen if we request sliding again
1370 * with the same slide value that was used earlier
1371 * for the very first sliding.
1372 */
1373 kr = KERN_SUCCESS;
1374 }
1375 goto done;
1376 }
1377
1378 if (mappings_count == 0) {
1379 SHARED_REGION_TRACE_INFO(
1380 ("shared_region: %p [%d(%s)] map(): "
1381 "no mappings\n",
1382 current_thread(), p->p_pid, p->p_comm));
1383 kr = 0; /* no mappings: we're done ! */
1384 goto done;
1385 } else if (mappings_count <= SFM_MAX_STACK) {
1386 mappings = &stack_mappings[0];
1387 } else {
1388 SHARED_REGION_TRACE_ERROR(
1389 ("shared_region: %p [%d(%s)] map(): "
1390 "too many mappings (%d)\n",
1391 current_thread(), p->p_pid, p->p_comm,
1392 mappings_count));
1393 kr = KERN_FAILURE;
1394 goto done;
1395 }
1396
1397 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1398 goto done;
1399 }
1400
1401
1402 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1403 slide,
1404 uap->slide_start, uap->slide_size);
1405 if (kr != KERN_SUCCESS) {
1406 return kr;
1407 }
1408
1409 done:
1410 return kr;
1411 }
1412
1413 /* sysctl overflow room */
1414
1415 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1416 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1417 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1418 extern unsigned int vm_page_free_target;
1419 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1420 &vm_page_free_target, 0, "Pageout daemon free target");
1421
1422 extern unsigned int vm_memory_pressure;
1423 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1424 &vm_memory_pressure, 0, "Memory pressure indicator");
1425
1426 static int
1427 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1428 {
1429 #pragma unused(oidp, arg1, arg2)
1430 unsigned int page_free_wanted;
1431
1432 page_free_wanted = mach_vm_ctl_page_free_wanted();
1433 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1434 }
1435 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1436 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1437 0, 0, vm_ctl_page_free_wanted, "I", "");
1438
1439 extern unsigned int vm_page_purgeable_count;
1440 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1441 &vm_page_purgeable_count, 0, "Purgeable page count");
1442
1443 extern unsigned int vm_page_purgeable_wired_count;
1444 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1445 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1446
1447 extern int madvise_free_debug;
1448 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1449 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1450
1451 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1452 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1453 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1454 &vm_page_stats_reusable.reusable_pages_success, "");
1455 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1456 &vm_page_stats_reusable.reusable_pages_failure, "");
1457 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1458 &vm_page_stats_reusable.reusable_pages_shared, "");
1459 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1460 &vm_page_stats_reusable.all_reusable_calls, "");
1461 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1462 &vm_page_stats_reusable.partial_reusable_calls, "");
1463 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1464 &vm_page_stats_reusable.reuse_pages_success, "");
1465 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1466 &vm_page_stats_reusable.reuse_pages_failure, "");
1467 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1468 &vm_page_stats_reusable.all_reuse_calls, "");
1469 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1470 &vm_page_stats_reusable.partial_reuse_calls, "");
1471 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1472 &vm_page_stats_reusable.can_reuse_success, "");
1473 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1474 &vm_page_stats_reusable.can_reuse_failure, "");
1475 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1476 &vm_page_stats_reusable.reusable_reclaimed, "");
1477
1478
1479 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1480 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1481 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1482
1483 extern unsigned int vm_page_cleaned_count;
1484 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1485
1486 /* pageout counts */
1487 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1488 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1489 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1490 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1491 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1492 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1493 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1494 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1495 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1496
1497 extern unsigned int vm_pageout_freed_from_cleaned;
1498 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1499
1500 /* counts of pages entering the cleaned queue */
1501 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1502 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1503 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1504 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1505
1506 /* counts of pages leaving the cleaned queue */
1507 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1508 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1509 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1510 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1511 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1512 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1513 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1514 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1515 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1516
1517 #include <kern/thread.h>
1518 #include <sys/user.h>
1519
1520 void vm_pageout_io_throttle(void);
1521
1522 void vm_pageout_io_throttle(void) {
1523 struct uthread *uthread = get_bsdthread_info(current_thread());
1524
1525 /*
1526 * thread is marked as a low priority I/O type
1527 * and the I/O we issued while in this cleaning operation
1528 * collided with normal I/O operations... we'll
1529 * delay in order to mitigate the impact of this
1530 * task on the normal operation of the system
1531 */
1532
1533 if (uthread->uu_lowpri_window) {
1534 throttle_lowpri_io(1);
1535 }
1536
1537 }
1538
1539 int
1540 vm_pressure_monitor(
1541 __unused struct proc *p,
1542 struct vm_pressure_monitor_args *uap,
1543 int *retval)
1544 {
1545 kern_return_t kr;
1546 uint32_t pages_reclaimed;
1547 uint32_t pages_wanted;
1548
1549 kr = mach_vm_pressure_monitor(
1550 (boolean_t) uap->wait_for_pressure,
1551 uap->nsecs_monitored,
1552 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1553 &pages_wanted);
1554
1555 switch (kr) {
1556 case KERN_SUCCESS:
1557 break;
1558 case KERN_ABORTED:
1559 return EINTR;
1560 default:
1561 return EINVAL;
1562 }
1563
1564 if (uap->pages_reclaimed) {
1565 if (copyout((void *)&pages_reclaimed,
1566 uap->pages_reclaimed,
1567 sizeof (pages_reclaimed)) != 0) {
1568 return EFAULT;
1569 }
1570 }
1571
1572 *retval = (int) pages_wanted;
1573 return 0;
1574 }
1575
1576 int
1577 kas_info(struct proc *p,
1578 struct kas_info_args *uap,
1579 int *retval __unused)
1580 {
1581 #ifdef SECURE_KERNEL
1582 (void)p;
1583 (void)uap;
1584 return ENOTSUP;
1585 #else /* !SECURE_KERNEL */
1586 int selector = uap->selector;
1587 user_addr_t valuep = uap->value;
1588 user_addr_t sizep = uap->size;
1589 user_size_t size;
1590 int error;
1591
1592 if (!kauth_cred_issuser(kauth_cred_get())) {
1593 return EPERM;
1594 }
1595
1596 #if CONFIG_MACF
1597 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1598 if (error) {
1599 return error;
1600 }
1601 #endif
1602
1603 if (IS_64BIT_PROCESS(p)) {
1604 user64_size_t size64;
1605 error = copyin(sizep, &size64, sizeof(size64));
1606 size = (user_size_t)size64;
1607 } else {
1608 user32_size_t size32;
1609 error = copyin(sizep, &size32, sizeof(size32));
1610 size = (user_size_t)size32;
1611 }
1612 if (error) {
1613 return error;
1614 }
1615
1616 switch (selector) {
1617 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1618 {
1619 uint64_t slide = vm_kernel_slide;
1620
1621 if (sizeof(slide) != size) {
1622 return EINVAL;
1623 }
1624
1625 if (IS_64BIT_PROCESS(p)) {
1626 user64_size_t size64 = (user64_size_t)size;
1627 error = copyout(&size64, sizep, sizeof(size64));
1628 } else {
1629 user32_size_t size32 = (user32_size_t)size;
1630 error = copyout(&size32, sizep, sizeof(size32));
1631 }
1632 if (error) {
1633 return error;
1634 }
1635
1636 error = copyout(&slide, valuep, sizeof(slide));
1637 if (error) {
1638 return error;
1639 }
1640 }
1641 break;
1642 default:
1643 return EINVAL;
1644 }
1645
1646 return 0;
1647 #endif /* !SECURE_KERNEL */
1648 }