]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-2422.1.72.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <kern/extmod_statistics.h>
48 #include <mach/mach_traps.h>
49 #include <mach/port.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/trace.h>
70 #include <sys/kernel.h>
71 #include <sys/ubc_internal.h>
72 #include <sys/user.h>
73 #include <sys/syslog.h>
74 #include <sys/stat.h>
75 #include <sys/sysproto.h>
76 #include <sys/mman.h>
77 #include <sys/sysctl.h>
78 #include <sys/cprotect.h>
79 #include <sys/kpi_socket.h>
80 #include <sys/kas_info.h>
81
82 #include <security/audit/audit.h>
83 #include <security/mac.h>
84 #include <bsm/audit_kevents.h>
85
86 #include <kern/kalloc.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_pageout.h>
90
91 #include <machine/spl.h>
92
93 #include <mach/shared_region.h>
94 #include <vm/vm_shared_region.h>
95
96 #include <vm/vm_protos.h>
97
98 #include <sys/kern_memorystatus.h>
99
100
101 int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*);
102 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
103
104 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
105
106
107 /*
108 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
109 */
110
111 #ifndef SECURE_KERNEL
112 extern int allow_stack_exec, allow_data_exec;
113
114 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
115 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
116 #endif /* !SECURE_KERNEL */
117
118 static const char *prot_values[] = {
119 "none",
120 "read-only",
121 "write-only",
122 "read-write",
123 "execute-only",
124 "read-execute",
125 "write-execute",
126 "read-write-execute"
127 };
128
129 void
130 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
131 {
132 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
133 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
134 }
135
136 int shared_region_unnest_logging = 1;
137
138 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
139 &shared_region_unnest_logging, 0, "");
140
141 int vm_shared_region_unnest_log_interval = 10;
142 int shared_region_unnest_log_count_threshold = 5;
143
144 /* These log rate throttling state variables aren't thread safe, but
145 * are sufficient unto the task.
146 */
147 static int64_t last_unnest_log_time = 0;
148 static int shared_region_unnest_log_count = 0;
149
150 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
151 struct timeval tv;
152 const char *pcommstr;
153
154 if (shared_region_unnest_logging == 0)
155 return;
156
157 if (shared_region_unnest_logging == 1) {
158 microtime(&tv);
159 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
160 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
161 return;
162 }
163 else {
164 last_unnest_log_time = tv.tv_sec;
165 shared_region_unnest_log_count = 0;
166 }
167 }
168
169 pcommstr = current_proc()->p_comm;
170
171 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
172 }
173
174 int
175 useracc(
176 user_addr_t addr,
177 user_size_t len,
178 int prot)
179 {
180 vm_map_t map;
181
182 map = current_map();
183 return (vm_map_check_protection(
184 map,
185 vm_map_trunc_page(addr,
186 vm_map_page_mask(map)),
187 vm_map_round_page(addr+len,
188 vm_map_page_mask(map)),
189 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
190 }
191
192 int
193 vslock(
194 user_addr_t addr,
195 user_size_t len)
196 {
197 kern_return_t kret;
198 vm_map_t map;
199
200 map = current_map();
201 kret = vm_map_wire(map,
202 vm_map_trunc_page(addr,
203 vm_map_page_mask(map)),
204 vm_map_round_page(addr+len,
205 vm_map_page_mask(map)),
206 VM_PROT_READ | VM_PROT_WRITE,
207 FALSE);
208
209 switch (kret) {
210 case KERN_SUCCESS:
211 return (0);
212 case KERN_INVALID_ADDRESS:
213 case KERN_NO_SPACE:
214 return (ENOMEM);
215 case KERN_PROTECTION_FAILURE:
216 return (EACCES);
217 default:
218 return (EINVAL);
219 }
220 }
221
222 int
223 vsunlock(
224 user_addr_t addr,
225 user_size_t len,
226 __unused int dirtied)
227 {
228 #if FIXME /* [ */
229 pmap_t pmap;
230 vm_page_t pg;
231 vm_map_offset_t vaddr;
232 ppnum_t paddr;
233 #endif /* FIXME ] */
234 kern_return_t kret;
235 vm_map_t map;
236
237 map = current_map();
238
239 #if FIXME /* [ */
240 if (dirtied) {
241 pmap = get_task_pmap(current_task());
242 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
243 vaddr < vm_map_round_page(addr+len, PAGE_MASK);
244 vaddr += PAGE_SIZE) {
245 paddr = pmap_extract(pmap, vaddr);
246 pg = PHYS_TO_VM_PAGE(paddr);
247 vm_page_set_modified(pg);
248 }
249 }
250 #endif /* FIXME ] */
251 #ifdef lint
252 dirtied++;
253 #endif /* lint */
254 kret = vm_map_unwire(map,
255 vm_map_trunc_page(addr,
256 vm_map_page_mask(map)),
257 vm_map_round_page(addr+len,
258 vm_map_page_mask(map)),
259 FALSE);
260 switch (kret) {
261 case KERN_SUCCESS:
262 return (0);
263 case KERN_INVALID_ADDRESS:
264 case KERN_NO_SPACE:
265 return (ENOMEM);
266 case KERN_PROTECTION_FAILURE:
267 return (EACCES);
268 default:
269 return (EINVAL);
270 }
271 }
272
273 int
274 subyte(
275 user_addr_t addr,
276 int byte)
277 {
278 char character;
279
280 character = (char)byte;
281 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
282 }
283
284 int
285 suibyte(
286 user_addr_t addr,
287 int byte)
288 {
289 char character;
290
291 character = (char)byte;
292 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
293 }
294
295 int fubyte(user_addr_t addr)
296 {
297 unsigned char byte;
298
299 if (copyin(addr, (void *) &byte, sizeof(char)))
300 return(-1);
301 return(byte);
302 }
303
304 int fuibyte(user_addr_t addr)
305 {
306 unsigned char byte;
307
308 if (copyin(addr, (void *) &(byte), sizeof(char)))
309 return(-1);
310 return(byte);
311 }
312
313 int
314 suword(
315 user_addr_t addr,
316 long word)
317 {
318 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
319 }
320
321 long fuword(user_addr_t addr)
322 {
323 long word = 0;
324
325 if (copyin(addr, (void *) &word, sizeof(int)))
326 return(-1);
327 return(word);
328 }
329
330 /* suiword and fuiword are the same as suword and fuword, respectively */
331
332 int
333 suiword(
334 user_addr_t addr,
335 long word)
336 {
337 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
338 }
339
340 long fuiword(user_addr_t addr)
341 {
342 long word = 0;
343
344 if (copyin(addr, (void *) &word, sizeof(int)))
345 return(-1);
346 return(word);
347 }
348
349 /*
350 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
351 * fetching and setting of process-sized size_t and pointer values.
352 */
353 int
354 sulong(user_addr_t addr, int64_t word)
355 {
356
357 if (IS_64BIT_PROCESS(current_proc())) {
358 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
359 } else {
360 return(suiword(addr, (long)word));
361 }
362 }
363
364 int64_t
365 fulong(user_addr_t addr)
366 {
367 int64_t longword;
368
369 if (IS_64BIT_PROCESS(current_proc())) {
370 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
371 return(-1);
372 return(longword);
373 } else {
374 return((int64_t)fuiword(addr));
375 }
376 }
377
378 int
379 suulong(user_addr_t addr, uint64_t uword)
380 {
381
382 if (IS_64BIT_PROCESS(current_proc())) {
383 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
384 } else {
385 return(suiword(addr, (uint32_t)uword));
386 }
387 }
388
389 uint64_t
390 fuulong(user_addr_t addr)
391 {
392 uint64_t ulongword;
393
394 if (IS_64BIT_PROCESS(current_proc())) {
395 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
396 return(-1ULL);
397 return(ulongword);
398 } else {
399 return((uint64_t)fuiword(addr));
400 }
401 }
402
403 int
404 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
405 {
406 return(ENOTSUP);
407 }
408
409 /*
410 * pid_for_task
411 *
412 * Find the BSD process ID for the Mach task associated with the given Mach port
413 * name
414 *
415 * Parameters: args User argument descriptor (see below)
416 *
417 * Indirect parameters: args->t Mach port name
418 * args->pid Process ID (returned value; see below)
419 *
420 * Returns: KERL_SUCCESS Success
421 * KERN_FAILURE Not success
422 *
423 * Implicit returns: args->pid Process ID
424 *
425 */
426 kern_return_t
427 pid_for_task(
428 struct pid_for_task_args *args)
429 {
430 mach_port_name_t t = args->t;
431 user_addr_t pid_addr = args->pid;
432 proc_t p;
433 task_t t1;
434 int pid = -1;
435 kern_return_t err = KERN_SUCCESS;
436
437 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
438 AUDIT_ARG(mach_port1, t);
439
440 t1 = port_name_to_task(t);
441
442 if (t1 == TASK_NULL) {
443 err = KERN_FAILURE;
444 goto pftout;
445 } else {
446 p = get_bsdtask_info(t1);
447 if (p) {
448 pid = proc_pid(p);
449 err = KERN_SUCCESS;
450 } else {
451 err = KERN_FAILURE;
452 }
453 }
454 task_deallocate(t1);
455 pftout:
456 AUDIT_ARG(pid, pid);
457 (void) copyout((char *) &pid, pid_addr, sizeof(int));
458 AUDIT_MACH_SYSCALL_EXIT(err);
459 return(err);
460 }
461
462 /*
463 *
464 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
465 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
466 *
467 */
468 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
469
470 /*
471 * Routine: task_for_pid_posix_check
472 * Purpose:
473 * Verify that the current process should be allowed to
474 * get the target process's task port. This is only
475 * permitted if:
476 * - The current process is root
477 * OR all of the following are true:
478 * - The target process's real, effective, and saved uids
479 * are the same as the current proc's euid,
480 * - The target process's group set is a subset of the
481 * calling process's group set, and
482 * - The target process hasn't switched credentials.
483 *
484 * Returns: TRUE: permitted
485 * FALSE: denied
486 */
487 static int
488 task_for_pid_posix_check(proc_t target)
489 {
490 kauth_cred_t targetcred, mycred;
491 uid_t myuid;
492 int allowed;
493
494 /* No task_for_pid on bad targets */
495 if (target->p_stat == SZOMB) {
496 return FALSE;
497 }
498
499 mycred = kauth_cred_get();
500 myuid = kauth_cred_getuid(mycred);
501
502 /* If we're running as root, the check passes */
503 if (kauth_cred_issuser(mycred))
504 return TRUE;
505
506 /* We're allowed to get our own task port */
507 if (target == current_proc())
508 return TRUE;
509
510 /*
511 * Under DENY, only root can get another proc's task port,
512 * so no more checks are needed.
513 */
514 if (tfp_policy == KERN_TFP_POLICY_DENY) {
515 return FALSE;
516 }
517
518 targetcred = kauth_cred_proc_ref(target);
519 allowed = TRUE;
520
521 /* Do target's ruid, euid, and saved uid match my euid? */
522 if ((kauth_cred_getuid(targetcred) != myuid) ||
523 (kauth_cred_getruid(targetcred) != myuid) ||
524 (kauth_cred_getsvuid(targetcred) != myuid)) {
525 allowed = FALSE;
526 goto out;
527 }
528
529 /* Are target's groups a subset of my groups? */
530 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
531 allowed == 0) {
532 allowed = FALSE;
533 goto out;
534 }
535
536 /* Has target switched credentials? */
537 if (target->p_flag & P_SUGID) {
538 allowed = FALSE;
539 goto out;
540 }
541
542 out:
543 kauth_cred_unref(&targetcred);
544 return allowed;
545 }
546
547 /*
548 * Routine: task_for_pid
549 * Purpose:
550 * Get the task port for another "process", named by its
551 * process ID on the same host as "target_task".
552 *
553 * Only permitted to privileged processes, or processes
554 * with the same user ID.
555 *
556 * Note: if pid == 0, an error is return no matter who is calling.
557 *
558 * XXX This should be a BSD system call, not a Mach trap!!!
559 */
560 kern_return_t
561 task_for_pid(
562 struct task_for_pid_args *args)
563 {
564 mach_port_name_t target_tport = args->target_tport;
565 int pid = args->pid;
566 user_addr_t task_addr = args->t;
567 proc_t p = PROC_NULL;
568 task_t t1 = TASK_NULL;
569 mach_port_name_t tret = MACH_PORT_NULL;
570 ipc_port_t tfpport;
571 void * sright;
572 int error = 0;
573
574 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
575 AUDIT_ARG(pid, pid);
576 AUDIT_ARG(mach_port1, target_tport);
577
578 /* Always check if pid == 0 */
579 if (pid == 0) {
580 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
581 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
582 return(KERN_FAILURE);
583 }
584
585 t1 = port_name_to_task(target_tport);
586 if (t1 == TASK_NULL) {
587 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
588 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
589 return(KERN_FAILURE);
590 }
591
592
593 p = proc_find(pid);
594 if (p == PROC_NULL) {
595 error = KERN_FAILURE;
596 goto tfpout;
597 }
598
599 #if CONFIG_AUDIT
600 AUDIT_ARG(process, p);
601 #endif
602
603 if (!(task_for_pid_posix_check(p))) {
604 error = KERN_FAILURE;
605 goto tfpout;
606 }
607
608 if (p->task != TASK_NULL) {
609 /* If we aren't root and target's task access port is set... */
610 if (!kauth_cred_issuser(kauth_cred_get()) &&
611 p != current_proc() &&
612 (task_get_task_access_port(p->task, &tfpport) == 0) &&
613 (tfpport != IPC_PORT_NULL)) {
614
615 if (tfpport == IPC_PORT_DEAD) {
616 error = KERN_PROTECTION_FAILURE;
617 goto tfpout;
618 }
619
620 /* Call up to the task access server */
621 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
622
623 if (error != MACH_MSG_SUCCESS) {
624 if (error == MACH_RCV_INTERRUPTED)
625 error = KERN_ABORTED;
626 else
627 error = KERN_FAILURE;
628 goto tfpout;
629 }
630 }
631 #if CONFIG_MACF
632 error = mac_proc_check_get_task(kauth_cred_get(), p);
633 if (error) {
634 error = KERN_FAILURE;
635 goto tfpout;
636 }
637 #endif
638
639 /* Grant task port access */
640 task_reference(p->task);
641 extmod_statistics_incr_task_for_pid(p->task);
642
643 sright = (void *) convert_task_to_port(p->task);
644 tret = ipc_port_copyout_send(
645 sright,
646 get_task_ipcspace(current_task()));
647 }
648 error = KERN_SUCCESS;
649
650 tfpout:
651 task_deallocate(t1);
652 AUDIT_ARG(mach_port2, tret);
653 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
654 if (p != PROC_NULL)
655 proc_rele(p);
656 AUDIT_MACH_SYSCALL_EXIT(error);
657 return(error);
658 }
659
660 /*
661 * Routine: task_name_for_pid
662 * Purpose:
663 * Get the task name port for another "process", named by its
664 * process ID on the same host as "target_task".
665 *
666 * Only permitted to privileged processes, or processes
667 * with the same user ID.
668 *
669 * XXX This should be a BSD system call, not a Mach trap!!!
670 */
671
672 kern_return_t
673 task_name_for_pid(
674 struct task_name_for_pid_args *args)
675 {
676 mach_port_name_t target_tport = args->target_tport;
677 int pid = args->pid;
678 user_addr_t task_addr = args->t;
679 proc_t p = PROC_NULL;
680 task_t t1;
681 mach_port_name_t tret;
682 void * sright;
683 int error = 0, refheld = 0;
684 kauth_cred_t target_cred;
685
686 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
687 AUDIT_ARG(pid, pid);
688 AUDIT_ARG(mach_port1, target_tport);
689
690 t1 = port_name_to_task(target_tport);
691 if (t1 == TASK_NULL) {
692 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
693 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
694 return(KERN_FAILURE);
695 }
696
697 p = proc_find(pid);
698 if (p != PROC_NULL) {
699 AUDIT_ARG(process, p);
700 target_cred = kauth_cred_proc_ref(p);
701 refheld = 1;
702
703 if ((p->p_stat != SZOMB)
704 && ((current_proc() == p)
705 || kauth_cred_issuser(kauth_cred_get())
706 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
707 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
708
709 if (p->task != TASK_NULL) {
710 task_reference(p->task);
711 #if CONFIG_MACF
712 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
713 if (error) {
714 task_deallocate(p->task);
715 goto noperm;
716 }
717 #endif
718 sright = (void *)convert_task_name_to_port(p->task);
719 tret = ipc_port_copyout_send(sright,
720 get_task_ipcspace(current_task()));
721 } else
722 tret = MACH_PORT_NULL;
723
724 AUDIT_ARG(mach_port2, tret);
725 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
726 task_deallocate(t1);
727 error = KERN_SUCCESS;
728 goto tnfpout;
729 }
730 }
731
732 #if CONFIG_MACF
733 noperm:
734 #endif
735 task_deallocate(t1);
736 tret = MACH_PORT_NULL;
737 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
738 error = KERN_FAILURE;
739 tnfpout:
740 if (refheld != 0)
741 kauth_cred_unref(&target_cred);
742 if (p != PROC_NULL)
743 proc_rele(p);
744 AUDIT_MACH_SYSCALL_EXIT(error);
745 return(error);
746 }
747
748 kern_return_t
749 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
750 {
751 task_t target = NULL;
752 proc_t targetproc = PROC_NULL;
753 int pid = args->pid;
754 int error = 0;
755
756 #if CONFIG_MACF
757 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
758 if (error) {
759 error = EPERM;
760 goto out;
761 }
762 #endif
763
764 if (pid == 0) {
765 error = EPERM;
766 goto out;
767 }
768
769 targetproc = proc_find(pid);
770 if (targetproc == PROC_NULL) {
771 error = ESRCH;
772 goto out;
773 }
774
775 if (!task_for_pid_posix_check(targetproc)) {
776 error = EPERM;
777 goto out;
778 }
779
780 target = targetproc->task;
781 if (target != TASK_NULL) {
782 mach_port_t tfpport;
783
784 /* If we aren't root and target's task access port is set... */
785 if (!kauth_cred_issuser(kauth_cred_get()) &&
786 targetproc != current_proc() &&
787 (task_get_task_access_port(target, &tfpport) == 0) &&
788 (tfpport != IPC_PORT_NULL)) {
789
790 if (tfpport == IPC_PORT_DEAD) {
791 error = EACCES;
792 goto out;
793 }
794
795 /* Call up to the task access server */
796 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
797
798 if (error != MACH_MSG_SUCCESS) {
799 if (error == MACH_RCV_INTERRUPTED)
800 error = EINTR;
801 else
802 error = EPERM;
803 goto out;
804 }
805 }
806 }
807
808 task_reference(target);
809 error = task_pidsuspend(target);
810 if (error) {
811 if (error == KERN_INVALID_ARGUMENT) {
812 error = EINVAL;
813 } else {
814 error = EPERM;
815 }
816 }
817 #if CONFIG_MEMORYSTATUS
818 else {
819 memorystatus_on_suspend(targetproc);
820 }
821 #endif
822
823 task_deallocate(target);
824
825 out:
826 if (targetproc != PROC_NULL)
827 proc_rele(targetproc);
828 *ret = error;
829 return error;
830 }
831
832 kern_return_t
833 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
834 {
835 task_t target = NULL;
836 proc_t targetproc = PROC_NULL;
837 int pid = args->pid;
838 int error = 0;
839
840 #if CONFIG_MACF
841 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
842 if (error) {
843 error = EPERM;
844 goto out;
845 }
846 #endif
847
848 if (pid == 0) {
849 error = EPERM;
850 goto out;
851 }
852
853 targetproc = proc_find(pid);
854 if (targetproc == PROC_NULL) {
855 error = ESRCH;
856 goto out;
857 }
858
859 if (!task_for_pid_posix_check(targetproc)) {
860 error = EPERM;
861 goto out;
862 }
863
864 target = targetproc->task;
865 if (target != TASK_NULL) {
866 mach_port_t tfpport;
867
868 /* If we aren't root and target's task access port is set... */
869 if (!kauth_cred_issuser(kauth_cred_get()) &&
870 targetproc != current_proc() &&
871 (task_get_task_access_port(target, &tfpport) == 0) &&
872 (tfpport != IPC_PORT_NULL)) {
873
874 if (tfpport == IPC_PORT_DEAD) {
875 error = EACCES;
876 goto out;
877 }
878
879 /* Call up to the task access server */
880 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
881
882 if (error != MACH_MSG_SUCCESS) {
883 if (error == MACH_RCV_INTERRUPTED)
884 error = EINTR;
885 else
886 error = EPERM;
887 goto out;
888 }
889 }
890 }
891
892 task_reference(target);
893
894 #if CONFIG_MEMORYSTATUS
895 memorystatus_on_resume(targetproc);
896 #endif
897
898 error = task_pidresume(target);
899 if (error) {
900 if (error == KERN_INVALID_ARGUMENT) {
901 error = EINVAL;
902 } else {
903 if (error == KERN_MEMORY_ERROR) {
904 psignal(targetproc, SIGKILL);
905 error = EIO;
906 } else
907 error = EPERM;
908 }
909 }
910
911 task_deallocate(target);
912
913 out:
914 if (targetproc != PROC_NULL)
915 proc_rele(targetproc);
916
917 *ret = error;
918 return error;
919 }
920
921
922 static int
923 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
924 __unused int arg2, struct sysctl_req *req)
925 {
926 int error = 0;
927 int new_value;
928
929 error = SYSCTL_OUT(req, arg1, sizeof(int));
930 if (error || req->newptr == USER_ADDR_NULL)
931 return(error);
932
933 if (!kauth_cred_issuser(kauth_cred_get()))
934 return(EPERM);
935
936 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
937 goto out;
938 }
939 if ((new_value == KERN_TFP_POLICY_DENY)
940 || (new_value == KERN_TFP_POLICY_DEFAULT))
941 tfp_policy = new_value;
942 else
943 error = EINVAL;
944 out:
945 return(error);
946
947 }
948
949 #if defined(SECURE_KERNEL)
950 static int kern_secure_kernel = 1;
951 #else
952 static int kern_secure_kernel = 0;
953 #endif
954
955 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
956
957 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
958 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
959 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
960
961 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
962 &shared_region_trace_level, 0, "");
963 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
964 &shared_region_version, 0, "");
965 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
966 &shared_region_persistence, 0, "");
967
968 /*
969 * shared_region_check_np:
970 *
971 * This system call is intended for dyld.
972 *
973 * dyld calls this when any process starts to see if the process's shared
974 * region is already set up and ready to use.
975 * This call returns the base address of the first mapping in the
976 * process's shared region's first mapping.
977 * dyld will then check what's mapped at that address.
978 *
979 * If the shared region is empty, dyld will then attempt to map the shared
980 * cache file in the shared region via the shared_region_map_np() system call.
981 *
982 * If something's already mapped in the shared region, dyld will check if it
983 * matches the shared cache it would like to use for that process.
984 * If it matches, evrything's ready and the process can proceed and use the
985 * shared region.
986 * If it doesn't match, dyld will unmap the shared region and map the shared
987 * cache into the process's address space via mmap().
988 *
989 * ERROR VALUES
990 * EINVAL no shared region
991 * ENOMEM shared region is empty
992 * EFAULT bad address for "start_address"
993 */
994 int
995 shared_region_check_np(
996 __unused struct proc *p,
997 struct shared_region_check_np_args *uap,
998 __unused int *retvalp)
999 {
1000 vm_shared_region_t shared_region;
1001 mach_vm_offset_t start_address = 0;
1002 int error;
1003 kern_return_t kr;
1004
1005 SHARED_REGION_TRACE_DEBUG(
1006 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1007 current_thread(), p->p_pid, p->p_comm,
1008 (uint64_t)uap->start_address));
1009
1010 /* retrieve the current tasks's shared region */
1011 shared_region = vm_shared_region_get(current_task());
1012 if (shared_region != NULL) {
1013 /* retrieve address of its first mapping... */
1014 kr = vm_shared_region_start_address(shared_region,
1015 &start_address);
1016 if (kr != KERN_SUCCESS) {
1017 error = ENOMEM;
1018 } else {
1019 /* ... and give it to the caller */
1020 error = copyout(&start_address,
1021 (user_addr_t) uap->start_address,
1022 sizeof (start_address));
1023 if (error) {
1024 SHARED_REGION_TRACE_ERROR(
1025 ("shared_region: %p [%d(%s)] "
1026 "check_np(0x%llx) "
1027 "copyout(0x%llx) error %d\n",
1028 current_thread(), p->p_pid, p->p_comm,
1029 (uint64_t)uap->start_address, (uint64_t)start_address,
1030 error));
1031 }
1032 }
1033 vm_shared_region_deallocate(shared_region);
1034 } else {
1035 /* no shared region ! */
1036 error = EINVAL;
1037 }
1038
1039 SHARED_REGION_TRACE_DEBUG(
1040 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1041 current_thread(), p->p_pid, p->p_comm,
1042 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1043
1044 return error;
1045 }
1046
1047
1048 int
1049 shared_region_copyin_mappings(
1050 struct proc *p,
1051 user_addr_t user_mappings,
1052 unsigned int mappings_count,
1053 struct shared_file_mapping_np *mappings)
1054 {
1055 int error = 0;
1056 vm_size_t mappings_size = 0;
1057
1058 /* get the list of mappings the caller wants us to establish */
1059 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1060 error = copyin(user_mappings,
1061 mappings,
1062 mappings_size);
1063 if (error) {
1064 SHARED_REGION_TRACE_ERROR(
1065 ("shared_region: %p [%d(%s)] map(): "
1066 "copyin(0x%llx, %d) failed (error=%d)\n",
1067 current_thread(), p->p_pid, p->p_comm,
1068 (uint64_t)user_mappings, mappings_count, error));
1069 }
1070 return error;
1071 }
1072 /*
1073 * shared_region_map_np()
1074 *
1075 * This system call is intended for dyld.
1076 *
1077 * dyld uses this to map a shared cache file into a shared region.
1078 * This is usually done only the first time a shared cache is needed.
1079 * Subsequent processes will just use the populated shared region without
1080 * requiring any further setup.
1081 */
1082 int
1083 _shared_region_map(
1084 struct proc *p,
1085 int fd,
1086 uint32_t mappings_count,
1087 struct shared_file_mapping_np *mappings,
1088 memory_object_control_t *sr_file_control,
1089 struct shared_file_mapping_np *mapping_to_slide)
1090 {
1091 int error;
1092 kern_return_t kr;
1093 struct fileproc *fp;
1094 struct vnode *vp, *root_vp;
1095 struct vnode_attr va;
1096 off_t fs;
1097 memory_object_size_t file_size;
1098 #if CONFIG_MACF
1099 vm_prot_t maxprot = VM_PROT_ALL;
1100 #endif
1101 memory_object_control_t file_control;
1102 struct vm_shared_region *shared_region;
1103
1104 SHARED_REGION_TRACE_DEBUG(
1105 ("shared_region: %p [%d(%s)] -> map\n",
1106 current_thread(), p->p_pid, p->p_comm));
1107
1108 shared_region = NULL;
1109 fp = NULL;
1110 vp = NULL;
1111
1112 /* get file structure from file descriptor */
1113 error = fp_lookup(p, fd, &fp, 0);
1114 if (error) {
1115 SHARED_REGION_TRACE_ERROR(
1116 ("shared_region: %p [%d(%s)] map: "
1117 "fd=%d lookup failed (error=%d)\n",
1118 current_thread(), p->p_pid, p->p_comm, fd, error));
1119 goto done;
1120 }
1121
1122 /* make sure we're attempting to map a vnode */
1123 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1124 SHARED_REGION_TRACE_ERROR(
1125 ("shared_region: %p [%d(%s)] map: "
1126 "fd=%d not a vnode (type=%d)\n",
1127 current_thread(), p->p_pid, p->p_comm,
1128 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1129 error = EINVAL;
1130 goto done;
1131 }
1132
1133 /* we need at least read permission on the file */
1134 if (! (fp->f_fglob->fg_flag & FREAD)) {
1135 SHARED_REGION_TRACE_ERROR(
1136 ("shared_region: %p [%d(%s)] map: "
1137 "fd=%d not readable\n",
1138 current_thread(), p->p_pid, p->p_comm, fd));
1139 error = EPERM;
1140 goto done;
1141 }
1142
1143 /* get vnode from file structure */
1144 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1145 if (error) {
1146 SHARED_REGION_TRACE_ERROR(
1147 ("shared_region: %p [%d(%s)] map: "
1148 "fd=%d getwithref failed (error=%d)\n",
1149 current_thread(), p->p_pid, p->p_comm, fd, error));
1150 goto done;
1151 }
1152 vp = (struct vnode *) fp->f_fglob->fg_data;
1153
1154 /* make sure the vnode is a regular file */
1155 if (vp->v_type != VREG) {
1156 SHARED_REGION_TRACE_ERROR(
1157 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1158 "not a file (type=%d)\n",
1159 current_thread(), p->p_pid, p->p_comm,
1160 vp, vp->v_name, vp->v_type));
1161 error = EINVAL;
1162 goto done;
1163 }
1164
1165 #if CONFIG_MACF
1166 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1167 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1168 if (error) {
1169 goto done;
1170 }
1171 #endif /* MAC */
1172
1173 #if CONFIG_PROTECT
1174 /* check for content protection access */
1175 {
1176 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1177 if (error) {
1178 goto done;
1179 }
1180 }
1181 #endif /* CONFIG_PROTECT */
1182
1183 /* make sure vnode is on the process's root volume */
1184 root_vp = p->p_fd->fd_rdir;
1185 if (root_vp == NULL) {
1186 root_vp = rootvnode;
1187 } else {
1188 /*
1189 * Chroot-ed processes can't use the shared_region.
1190 */
1191 error = EINVAL;
1192 goto done;
1193 }
1194
1195 if (vp->v_mount != root_vp->v_mount) {
1196 SHARED_REGION_TRACE_ERROR(
1197 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1198 "not on process's root volume\n",
1199 current_thread(), p->p_pid, p->p_comm,
1200 vp, vp->v_name));
1201 error = EPERM;
1202 goto done;
1203 }
1204
1205 /* make sure vnode is owned by "root" */
1206 VATTR_INIT(&va);
1207 VATTR_WANTED(&va, va_uid);
1208 error = vnode_getattr(vp, &va, vfs_context_current());
1209 if (error) {
1210 SHARED_REGION_TRACE_ERROR(
1211 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1212 "vnode_getattr(%p) failed (error=%d)\n",
1213 current_thread(), p->p_pid, p->p_comm,
1214 vp, vp->v_name, vp, error));
1215 goto done;
1216 }
1217 if (va.va_uid != 0) {
1218 SHARED_REGION_TRACE_ERROR(
1219 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1220 "owned by uid=%d instead of 0\n",
1221 current_thread(), p->p_pid, p->p_comm,
1222 vp, vp->v_name, va.va_uid));
1223 error = EPERM;
1224 goto done;
1225 }
1226
1227 /* get vnode size */
1228 error = vnode_size(vp, &fs, vfs_context_current());
1229 if (error) {
1230 SHARED_REGION_TRACE_ERROR(
1231 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1232 "vnode_size(%p) failed (error=%d)\n",
1233 current_thread(), p->p_pid, p->p_comm,
1234 vp, vp->v_name, vp, error));
1235 goto done;
1236 }
1237 file_size = fs;
1238
1239 /* get the file's memory object handle */
1240 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1241 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1242 SHARED_REGION_TRACE_ERROR(
1243 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1244 "no memory object\n",
1245 current_thread(), p->p_pid, p->p_comm,
1246 vp, vp->v_name));
1247 error = EINVAL;
1248 goto done;
1249 }
1250
1251 if (sr_file_control != NULL) {
1252 *sr_file_control = file_control;
1253 }
1254
1255
1256
1257 /* get the process's shared region (setup in vm_map_exec()) */
1258 shared_region = vm_shared_region_get(current_task());
1259 if (shared_region == NULL) {
1260 SHARED_REGION_TRACE_ERROR(
1261 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1262 "no shared region\n",
1263 current_thread(), p->p_pid, p->p_comm,
1264 vp, vp->v_name));
1265 goto done;
1266 }
1267
1268 /* map the file into that shared region's submap */
1269 kr = vm_shared_region_map_file(shared_region,
1270 mappings_count,
1271 mappings,
1272 file_control,
1273 file_size,
1274 (void *) p->p_fd->fd_rdir,
1275 mapping_to_slide);
1276 if (kr != KERN_SUCCESS) {
1277 SHARED_REGION_TRACE_ERROR(
1278 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1279 "vm_shared_region_map_file() failed kr=0x%x\n",
1280 current_thread(), p->p_pid, p->p_comm,
1281 vp, vp->v_name, kr));
1282 switch (kr) {
1283 case KERN_INVALID_ADDRESS:
1284 error = EFAULT;
1285 break;
1286 case KERN_PROTECTION_FAILURE:
1287 error = EPERM;
1288 break;
1289 case KERN_NO_SPACE:
1290 error = ENOMEM;
1291 break;
1292 case KERN_FAILURE:
1293 case KERN_INVALID_ARGUMENT:
1294 default:
1295 error = EINVAL;
1296 break;
1297 }
1298 goto done;
1299 }
1300
1301 error = 0;
1302
1303 vnode_lock_spin(vp);
1304
1305 vp->v_flag |= VSHARED_DYLD;
1306
1307 vnode_unlock(vp);
1308
1309 /* update the vnode's access time */
1310 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1311 VATTR_INIT(&va);
1312 nanotime(&va.va_access_time);
1313 VATTR_SET_ACTIVE(&va, va_access_time);
1314 vnode_setattr(vp, &va, vfs_context_current());
1315 }
1316
1317 if (p->p_flag & P_NOSHLIB) {
1318 /* signal that this process is now using split libraries */
1319 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1320 }
1321
1322 done:
1323 if (vp != NULL) {
1324 /*
1325 * release the vnode...
1326 * ubc_map() still holds it for us in the non-error case
1327 */
1328 (void) vnode_put(vp);
1329 vp = NULL;
1330 }
1331 if (fp != NULL) {
1332 /* release the file descriptor */
1333 fp_drop(p, fd, fp, 0);
1334 fp = NULL;
1335 }
1336
1337 if (shared_region != NULL) {
1338 vm_shared_region_deallocate(shared_region);
1339 }
1340
1341 SHARED_REGION_TRACE_DEBUG(
1342 ("shared_region: %p [%d(%s)] <- map\n",
1343 current_thread(), p->p_pid, p->p_comm));
1344
1345 return error;
1346 }
1347
1348 int
1349 shared_region_map_and_slide_np(
1350 struct proc *p,
1351 struct shared_region_map_and_slide_np_args *uap,
1352 __unused int *retvalp)
1353 {
1354 struct shared_file_mapping_np mapping_to_slide;
1355 struct shared_file_mapping_np *mappings;
1356 unsigned int mappings_count = uap->count;
1357
1358 memory_object_control_t sr_file_control;
1359 kern_return_t kr = KERN_SUCCESS;
1360 uint32_t slide = uap->slide;
1361
1362 #define SFM_MAX_STACK 8
1363 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1364
1365 /* Is the process chrooted?? */
1366 if (p->p_fd->fd_rdir != NULL) {
1367 kr = EINVAL;
1368 goto done;
1369 }
1370
1371 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1372 if (kr == KERN_INVALID_ARGUMENT) {
1373 /*
1374 * This will happen if we request sliding again
1375 * with the same slide value that was used earlier
1376 * for the very first sliding.
1377 */
1378 kr = KERN_SUCCESS;
1379 }
1380 goto done;
1381 }
1382
1383 if (mappings_count == 0) {
1384 SHARED_REGION_TRACE_INFO(
1385 ("shared_region: %p [%d(%s)] map(): "
1386 "no mappings\n",
1387 current_thread(), p->p_pid, p->p_comm));
1388 kr = 0; /* no mappings: we're done ! */
1389 goto done;
1390 } else if (mappings_count <= SFM_MAX_STACK) {
1391 mappings = &stack_mappings[0];
1392 } else {
1393 SHARED_REGION_TRACE_ERROR(
1394 ("shared_region: %p [%d(%s)] map(): "
1395 "too many mappings (%d)\n",
1396 current_thread(), p->p_pid, p->p_comm,
1397 mappings_count));
1398 kr = KERN_FAILURE;
1399 goto done;
1400 }
1401
1402 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1403 goto done;
1404 }
1405
1406
1407 kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
1408 if (kr != KERN_SUCCESS) {
1409 return kr;
1410 }
1411
1412 if (slide) {
1413 kr = vm_shared_region_slide(slide,
1414 mapping_to_slide.sfm_file_offset,
1415 mapping_to_slide.sfm_size,
1416 uap->slide_start,
1417 uap->slide_size,
1418 sr_file_control);
1419 if (kr != KERN_SUCCESS) {
1420 vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
1421 return kr;
1422 }
1423 }
1424 done:
1425 return kr;
1426 }
1427
1428 /* sysctl overflow room */
1429
1430 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1431 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1432 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1433 extern unsigned int vm_page_free_target;
1434 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1435 &vm_page_free_target, 0, "Pageout daemon free target");
1436
1437 extern unsigned int vm_memory_pressure;
1438 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1439 &vm_memory_pressure, 0, "Memory pressure indicator");
1440
1441 static int
1442 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1443 {
1444 #pragma unused(oidp, arg1, arg2)
1445 unsigned int page_free_wanted;
1446
1447 page_free_wanted = mach_vm_ctl_page_free_wanted();
1448 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1449 }
1450 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1451 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1452 0, 0, vm_ctl_page_free_wanted, "I", "");
1453
1454 extern unsigned int vm_page_purgeable_count;
1455 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1456 &vm_page_purgeable_count, 0, "Purgeable page count");
1457
1458 extern unsigned int vm_page_purgeable_wired_count;
1459 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1460 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1461
1462 extern int madvise_free_debug;
1463 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1464 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1465
1466 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1467 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1468 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1469 &vm_page_stats_reusable.reusable_pages_success, "");
1470 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1471 &vm_page_stats_reusable.reusable_pages_failure, "");
1472 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1473 &vm_page_stats_reusable.reusable_pages_shared, "");
1474 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1475 &vm_page_stats_reusable.all_reusable_calls, "");
1476 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1477 &vm_page_stats_reusable.partial_reusable_calls, "");
1478 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1479 &vm_page_stats_reusable.reuse_pages_success, "");
1480 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1481 &vm_page_stats_reusable.reuse_pages_failure, "");
1482 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1483 &vm_page_stats_reusable.all_reuse_calls, "");
1484 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1485 &vm_page_stats_reusable.partial_reuse_calls, "");
1486 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1487 &vm_page_stats_reusable.can_reuse_success, "");
1488 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1489 &vm_page_stats_reusable.can_reuse_failure, "");
1490 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1491 &vm_page_stats_reusable.reusable_reclaimed, "");
1492
1493
1494 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1495 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1496 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1497
1498 extern unsigned int vm_page_cleaned_count;
1499 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1500
1501 /* pageout counts */
1502 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1503 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1504 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1505 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1506 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1507 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1508 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1509 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1510 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1511
1512 extern unsigned int vm_pageout_freed_from_cleaned;
1513 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1514
1515 /* counts of pages entering the cleaned queue */
1516 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1517 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1518 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1519 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1520
1521 /* counts of pages leaving the cleaned queue */
1522 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1523 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1524 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1525 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1526 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1527 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1528 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1529 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1530 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1531
1532 #include <kern/thread.h>
1533 #include <sys/user.h>
1534
1535 void vm_pageout_io_throttle(void);
1536
1537 void vm_pageout_io_throttle(void) {
1538 struct uthread *uthread = get_bsdthread_info(current_thread());
1539
1540 /*
1541 * thread is marked as a low priority I/O type
1542 * and the I/O we issued while in this cleaning operation
1543 * collided with normal I/O operations... we'll
1544 * delay in order to mitigate the impact of this
1545 * task on the normal operation of the system
1546 */
1547
1548 if (uthread->uu_lowpri_window) {
1549 throttle_lowpri_io(1);
1550 }
1551
1552 }
1553
1554 int
1555 vm_pressure_monitor(
1556 __unused struct proc *p,
1557 struct vm_pressure_monitor_args *uap,
1558 int *retval)
1559 {
1560 kern_return_t kr;
1561 uint32_t pages_reclaimed;
1562 uint32_t pages_wanted;
1563
1564 kr = mach_vm_pressure_monitor(
1565 (boolean_t) uap->wait_for_pressure,
1566 uap->nsecs_monitored,
1567 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1568 &pages_wanted);
1569
1570 switch (kr) {
1571 case KERN_SUCCESS:
1572 break;
1573 case KERN_ABORTED:
1574 return EINTR;
1575 default:
1576 return EINVAL;
1577 }
1578
1579 if (uap->pages_reclaimed) {
1580 if (copyout((void *)&pages_reclaimed,
1581 uap->pages_reclaimed,
1582 sizeof (pages_reclaimed)) != 0) {
1583 return EFAULT;
1584 }
1585 }
1586
1587 *retval = (int) pages_wanted;
1588 return 0;
1589 }
1590
1591 int
1592 kas_info(struct proc *p,
1593 struct kas_info_args *uap,
1594 int *retval __unused)
1595 {
1596 #ifdef SECURE_KERNEL
1597 (void)p;
1598 (void)uap;
1599 return ENOTSUP;
1600 #else /* !SECURE_KERNEL */
1601 int selector = uap->selector;
1602 user_addr_t valuep = uap->value;
1603 user_addr_t sizep = uap->size;
1604 user_size_t size;
1605 int error;
1606
1607 if (!kauth_cred_issuser(kauth_cred_get())) {
1608 return EPERM;
1609 }
1610
1611 #if CONFIG_MACF
1612 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1613 if (error) {
1614 return error;
1615 }
1616 #endif
1617
1618 if (IS_64BIT_PROCESS(p)) {
1619 user64_size_t size64;
1620 error = copyin(sizep, &size64, sizeof(size64));
1621 size = (user_size_t)size64;
1622 } else {
1623 user32_size_t size32;
1624 error = copyin(sizep, &size32, sizeof(size32));
1625 size = (user_size_t)size32;
1626 }
1627 if (error) {
1628 return error;
1629 }
1630
1631 switch (selector) {
1632 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1633 {
1634 uint64_t slide = vm_kernel_slide;
1635
1636 if (sizeof(slide) != size) {
1637 return EINVAL;
1638 }
1639
1640 if (IS_64BIT_PROCESS(p)) {
1641 user64_size_t size64 = (user64_size_t)size;
1642 error = copyout(&size64, sizep, sizeof(size64));
1643 } else {
1644 user32_size_t size32 = (user32_size_t)size;
1645 error = copyout(&size32, sizep, sizeof(size32));
1646 }
1647 if (error) {
1648 return error;
1649 }
1650
1651 error = copyout(&slide, valuep, sizeof(slide));
1652 if (error) {
1653 return error;
1654 }
1655 }
1656 break;
1657 default:
1658 return EINVAL;
1659 }
1660
1661 return 0;
1662 #endif /* !SECURE_KERNEL */
1663 }