]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <kern/extmod_statistics.h>
48 #include <mach/mach_traps.h>
49 #include <mach/port.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/trace.h>
70 #include <sys/kernel.h>
71 #include <sys/ubc_internal.h>
72 #include <sys/user.h>
73 #include <sys/syslog.h>
74 #include <sys/stat.h>
75 #include <sys/sysproto.h>
76 #include <sys/mman.h>
77 #include <sys/sysctl.h>
78 #include <sys/cprotect.h>
79 #include <sys/kpi_socket.h>
80
81 #include <security/audit/audit.h>
82 #include <security/mac.h>
83 #include <bsm/audit_kevents.h>
84
85 #include <kern/kalloc.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_kern.h>
88 #include <vm/vm_pageout.h>
89
90 #include <machine/spl.h>
91
92 #include <mach/shared_region.h>
93 #include <vm/vm_shared_region.h>
94
95 #include <vm/vm_protos.h>
96
97 #if CONFIG_FREEZE
98 #include <sys/kern_memorystatus.h>
99 #endif
100
101
102 int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*);
103 int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t);
104 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
105
106 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
107
108
109 /*
110 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
111 */
112
113 #ifndef SECURE_KERNEL
114 extern int allow_stack_exec, allow_data_exec;
115
116 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
117 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
118 #endif /* !SECURE_KERNEL */
119
120 static const char *prot_values[] = {
121 "none",
122 "read-only",
123 "write-only",
124 "read-write",
125 "execute-only",
126 "read-execute",
127 "write-execute",
128 "read-write-execute"
129 };
130
131 void
132 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
133 {
134 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
135 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
136 }
137
138 int shared_region_unnest_logging = 1;
139
140 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
141 &shared_region_unnest_logging, 0, "");
142
143 int vm_shared_region_unnest_log_interval = 10;
144 int shared_region_unnest_log_count_threshold = 5;
145
146 /* These log rate throttling state variables aren't thread safe, but
147 * are sufficient unto the task.
148 */
149 static int64_t last_unnest_log_time = 0;
150 static int shared_region_unnest_log_count = 0;
151
152 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
153 struct timeval tv;
154 const char *pcommstr;
155
156 if (shared_region_unnest_logging == 0)
157 return;
158
159 if (shared_region_unnest_logging == 1) {
160 microtime(&tv);
161 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
162 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
163 return;
164 }
165 else {
166 last_unnest_log_time = tv.tv_sec;
167 shared_region_unnest_log_count = 0;
168 }
169 }
170
171 pcommstr = current_proc()->p_comm;
172
173 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
174 }
175
176 int
177 useracc(
178 user_addr_t addr,
179 user_size_t len,
180 int prot)
181 {
182 return (vm_map_check_protection(
183 current_map(),
184 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
185 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
186 }
187
188 int
189 vslock(
190 user_addr_t addr,
191 user_size_t len)
192 {
193 kern_return_t kret;
194 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
195 vm_map_round_page(addr+len),
196 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
197
198 switch (kret) {
199 case KERN_SUCCESS:
200 return (0);
201 case KERN_INVALID_ADDRESS:
202 case KERN_NO_SPACE:
203 return (ENOMEM);
204 case KERN_PROTECTION_FAILURE:
205 return (EACCES);
206 default:
207 return (EINVAL);
208 }
209 }
210
211 int
212 vsunlock(
213 user_addr_t addr,
214 user_size_t len,
215 __unused int dirtied)
216 {
217 #if FIXME /* [ */
218 pmap_t pmap;
219 vm_page_t pg;
220 vm_map_offset_t vaddr;
221 ppnum_t paddr;
222 #endif /* FIXME ] */
223 kern_return_t kret;
224
225 #if FIXME /* [ */
226 if (dirtied) {
227 pmap = get_task_pmap(current_task());
228 for (vaddr = vm_map_trunc_page(addr);
229 vaddr < vm_map_round_page(addr+len);
230 vaddr += PAGE_SIZE) {
231 paddr = pmap_extract(pmap, vaddr);
232 pg = PHYS_TO_VM_PAGE(paddr);
233 vm_page_set_modified(pg);
234 }
235 }
236 #endif /* FIXME ] */
237 #ifdef lint
238 dirtied++;
239 #endif /* lint */
240 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
241 vm_map_round_page(addr+len), FALSE);
242 switch (kret) {
243 case KERN_SUCCESS:
244 return (0);
245 case KERN_INVALID_ADDRESS:
246 case KERN_NO_SPACE:
247 return (ENOMEM);
248 case KERN_PROTECTION_FAILURE:
249 return (EACCES);
250 default:
251 return (EINVAL);
252 }
253 }
254
255 int
256 subyte(
257 user_addr_t addr,
258 int byte)
259 {
260 char character;
261
262 character = (char)byte;
263 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
264 }
265
266 int
267 suibyte(
268 user_addr_t addr,
269 int byte)
270 {
271 char character;
272
273 character = (char)byte;
274 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
275 }
276
277 int fubyte(user_addr_t addr)
278 {
279 unsigned char byte;
280
281 if (copyin(addr, (void *) &byte, sizeof(char)))
282 return(-1);
283 return(byte);
284 }
285
286 int fuibyte(user_addr_t addr)
287 {
288 unsigned char byte;
289
290 if (copyin(addr, (void *) &(byte), sizeof(char)))
291 return(-1);
292 return(byte);
293 }
294
295 int
296 suword(
297 user_addr_t addr,
298 long word)
299 {
300 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
301 }
302
303 long fuword(user_addr_t addr)
304 {
305 long word = 0;
306
307 if (copyin(addr, (void *) &word, sizeof(int)))
308 return(-1);
309 return(word);
310 }
311
312 /* suiword and fuiword are the same as suword and fuword, respectively */
313
314 int
315 suiword(
316 user_addr_t addr,
317 long word)
318 {
319 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
320 }
321
322 long fuiword(user_addr_t addr)
323 {
324 long word = 0;
325
326 if (copyin(addr, (void *) &word, sizeof(int)))
327 return(-1);
328 return(word);
329 }
330
331 /*
332 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
333 * fetching and setting of process-sized size_t and pointer values.
334 */
335 int
336 sulong(user_addr_t addr, int64_t word)
337 {
338
339 if (IS_64BIT_PROCESS(current_proc())) {
340 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
341 } else {
342 return(suiword(addr, (long)word));
343 }
344 }
345
346 int64_t
347 fulong(user_addr_t addr)
348 {
349 int64_t longword;
350
351 if (IS_64BIT_PROCESS(current_proc())) {
352 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
353 return(-1);
354 return(longword);
355 } else {
356 return((int64_t)fuiword(addr));
357 }
358 }
359
360 int
361 suulong(user_addr_t addr, uint64_t uword)
362 {
363
364 if (IS_64BIT_PROCESS(current_proc())) {
365 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
366 } else {
367 return(suiword(addr, (uint32_t)uword));
368 }
369 }
370
371 uint64_t
372 fuulong(user_addr_t addr)
373 {
374 uint64_t ulongword;
375
376 if (IS_64BIT_PROCESS(current_proc())) {
377 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
378 return(-1ULL);
379 return(ulongword);
380 } else {
381 return((uint64_t)fuiword(addr));
382 }
383 }
384
385 int
386 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
387 {
388 return(ENOTSUP);
389 }
390
391 /*
392 * pid_for_task
393 *
394 * Find the BSD process ID for the Mach task associated with the given Mach port
395 * name
396 *
397 * Parameters: args User argument descriptor (see below)
398 *
399 * Indirect parameters: args->t Mach port name
400 * args->pid Process ID (returned value; see below)
401 *
402 * Returns: KERL_SUCCESS Success
403 * KERN_FAILURE Not success
404 *
405 * Implicit returns: args->pid Process ID
406 *
407 */
408 kern_return_t
409 pid_for_task(
410 struct pid_for_task_args *args)
411 {
412 mach_port_name_t t = args->t;
413 user_addr_t pid_addr = args->pid;
414 proc_t p;
415 task_t t1;
416 int pid = -1;
417 kern_return_t err = KERN_SUCCESS;
418
419 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
420 AUDIT_ARG(mach_port1, t);
421
422 t1 = port_name_to_task(t);
423
424 if (t1 == TASK_NULL) {
425 err = KERN_FAILURE;
426 goto pftout;
427 } else {
428 p = get_bsdtask_info(t1);
429 if (p) {
430 pid = proc_pid(p);
431 err = KERN_SUCCESS;
432 } else {
433 err = KERN_FAILURE;
434 }
435 }
436 task_deallocate(t1);
437 pftout:
438 AUDIT_ARG(pid, pid);
439 (void) copyout((char *) &pid, pid_addr, sizeof(int));
440 AUDIT_MACH_SYSCALL_EXIT(err);
441 return(err);
442 }
443
444 /*
445 *
446 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
447 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
448 *
449 */
450 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
451
452 /*
453 * Routine: task_for_pid_posix_check
454 * Purpose:
455 * Verify that the current process should be allowed to
456 * get the target process's task port. This is only
457 * permitted if:
458 * - The current process is root
459 * OR all of the following are true:
460 * - The target process's real, effective, and saved uids
461 * are the same as the current proc's euid,
462 * - The target process's group set is a subset of the
463 * calling process's group set, and
464 * - The target process hasn't switched credentials.
465 *
466 * Returns: TRUE: permitted
467 * FALSE: denied
468 */
469 static int
470 task_for_pid_posix_check(proc_t target)
471 {
472 kauth_cred_t targetcred, mycred;
473 uid_t myuid;
474 int allowed;
475
476 /* No task_for_pid on bad targets */
477 if (target == PROC_NULL || target->p_stat == SZOMB) {
478 return FALSE;
479 }
480
481 mycred = kauth_cred_get();
482 myuid = kauth_cred_getuid(mycred);
483
484 /* If we're running as root, the check passes */
485 if (kauth_cred_issuser(mycred))
486 return TRUE;
487
488 /* We're allowed to get our own task port */
489 if (target == current_proc())
490 return TRUE;
491
492 /*
493 * Under DENY, only root can get another proc's task port,
494 * so no more checks are needed.
495 */
496 if (tfp_policy == KERN_TFP_POLICY_DENY) {
497 return FALSE;
498 }
499
500 targetcred = kauth_cred_proc_ref(target);
501 allowed = TRUE;
502
503 /* Do target's ruid, euid, and saved uid match my euid? */
504 if ((kauth_cred_getuid(targetcred) != myuid) ||
505 (kauth_cred_getruid(targetcred) != myuid) ||
506 (kauth_cred_getsvuid(targetcred) != myuid)) {
507 allowed = FALSE;
508 goto out;
509 }
510
511 /* Are target's groups a subset of my groups? */
512 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
513 allowed == 0) {
514 allowed = FALSE;
515 goto out;
516 }
517
518 /* Has target switched credentials? */
519 if (target->p_flag & P_SUGID) {
520 allowed = FALSE;
521 goto out;
522 }
523
524 out:
525 kauth_cred_unref(&targetcred);
526 return allowed;
527 }
528
529 /*
530 * Routine: task_for_pid
531 * Purpose:
532 * Get the task port for another "process", named by its
533 * process ID on the same host as "target_task".
534 *
535 * Only permitted to privileged processes, or processes
536 * with the same user ID.
537 *
538 * Note: if pid == 0, an error is return no matter who is calling.
539 *
540 * XXX This should be a BSD system call, not a Mach trap!!!
541 */
542 kern_return_t
543 task_for_pid(
544 struct task_for_pid_args *args)
545 {
546 mach_port_name_t target_tport = args->target_tport;
547 int pid = args->pid;
548 user_addr_t task_addr = args->t;
549 proc_t p = PROC_NULL;
550 task_t t1 = TASK_NULL;
551 mach_port_name_t tret = MACH_PORT_NULL;
552 ipc_port_t tfpport;
553 void * sright;
554 int error = 0;
555
556 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
557 AUDIT_ARG(pid, pid);
558 AUDIT_ARG(mach_port1, target_tport);
559
560 /* Always check if pid == 0 */
561 if (pid == 0) {
562 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
563 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
564 return(KERN_FAILURE);
565 }
566
567 t1 = port_name_to_task(target_tport);
568 if (t1 == TASK_NULL) {
569 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
570 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
571 return(KERN_FAILURE);
572 }
573
574
575 p = proc_find(pid);
576 #if CONFIG_AUDIT
577 if (p != PROC_NULL)
578 AUDIT_ARG(process, p);
579 #endif
580
581 if (!(task_for_pid_posix_check(p))) {
582 error = KERN_FAILURE;
583 goto tfpout;
584 }
585
586 if (p->task != TASK_NULL) {
587 /* If we aren't root and target's task access port is set... */
588 if (!kauth_cred_issuser(kauth_cred_get()) &&
589 p != current_proc() &&
590 (task_get_task_access_port(p->task, &tfpport) == 0) &&
591 (tfpport != IPC_PORT_NULL)) {
592
593 if (tfpport == IPC_PORT_DEAD) {
594 error = KERN_PROTECTION_FAILURE;
595 goto tfpout;
596 }
597
598 /* Call up to the task access server */
599 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
600
601 if (error != MACH_MSG_SUCCESS) {
602 if (error == MACH_RCV_INTERRUPTED)
603 error = KERN_ABORTED;
604 else
605 error = KERN_FAILURE;
606 goto tfpout;
607 }
608 }
609 #if CONFIG_MACF
610 error = mac_proc_check_get_task(kauth_cred_get(), p);
611 if (error) {
612 error = KERN_FAILURE;
613 goto tfpout;
614 }
615 #endif
616
617 /* Grant task port access */
618 task_reference(p->task);
619 extmod_statistics_incr_task_for_pid(p->task);
620
621 sright = (void *) convert_task_to_port(p->task);
622 tret = ipc_port_copyout_send(
623 sright,
624 get_task_ipcspace(current_task()));
625 }
626 error = KERN_SUCCESS;
627
628 tfpout:
629 task_deallocate(t1);
630 AUDIT_ARG(mach_port2, tret);
631 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
632 if (p != PROC_NULL)
633 proc_rele(p);
634 AUDIT_MACH_SYSCALL_EXIT(error);
635 return(error);
636 }
637
638 /*
639 * Routine: task_name_for_pid
640 * Purpose:
641 * Get the task name port for another "process", named by its
642 * process ID on the same host as "target_task".
643 *
644 * Only permitted to privileged processes, or processes
645 * with the same user ID.
646 *
647 * XXX This should be a BSD system call, not a Mach trap!!!
648 */
649
650 kern_return_t
651 task_name_for_pid(
652 struct task_name_for_pid_args *args)
653 {
654 mach_port_name_t target_tport = args->target_tport;
655 int pid = args->pid;
656 user_addr_t task_addr = args->t;
657 proc_t p = PROC_NULL;
658 task_t t1;
659 mach_port_name_t tret;
660 void * sright;
661 int error = 0, refheld = 0;
662 kauth_cred_t target_cred;
663
664 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
665 AUDIT_ARG(pid, pid);
666 AUDIT_ARG(mach_port1, target_tport);
667
668 t1 = port_name_to_task(target_tport);
669 if (t1 == TASK_NULL) {
670 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
671 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
672 return(KERN_FAILURE);
673 }
674
675 p = proc_find(pid);
676 if (p != PROC_NULL) {
677 AUDIT_ARG(process, p);
678 target_cred = kauth_cred_proc_ref(p);
679 refheld = 1;
680
681 if ((p->p_stat != SZOMB)
682 && ((current_proc() == p)
683 || kauth_cred_issuser(kauth_cred_get())
684 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
685 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
686
687 if (p->task != TASK_NULL) {
688 task_reference(p->task);
689 #if CONFIG_MACF
690 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
691 if (error) {
692 task_deallocate(p->task);
693 goto noperm;
694 }
695 #endif
696 sright = (void *)convert_task_name_to_port(p->task);
697 tret = ipc_port_copyout_send(sright,
698 get_task_ipcspace(current_task()));
699 } else
700 tret = MACH_PORT_NULL;
701
702 AUDIT_ARG(mach_port2, tret);
703 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
704 task_deallocate(t1);
705 error = KERN_SUCCESS;
706 goto tnfpout;
707 }
708 }
709
710 #if CONFIG_MACF
711 noperm:
712 #endif
713 task_deallocate(t1);
714 tret = MACH_PORT_NULL;
715 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
716 error = KERN_FAILURE;
717 tnfpout:
718 if (refheld != 0)
719 kauth_cred_unref(&target_cred);
720 if (p != PROC_NULL)
721 proc_rele(p);
722 AUDIT_MACH_SYSCALL_EXIT(error);
723 return(error);
724 }
725
726 kern_return_t
727 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
728 {
729 task_t target = NULL;
730 proc_t targetproc = PROC_NULL;
731 int pid = args->pid;
732 int error = 0;
733
734 #if CONFIG_MACF
735 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
736 if (error) {
737 error = EPERM;
738 goto out;
739 }
740 #endif
741
742 if (pid == 0) {
743 error = EPERM;
744 goto out;
745 }
746
747 targetproc = proc_find(pid);
748 if (!task_for_pid_posix_check(targetproc)) {
749 error = EPERM;
750 goto out;
751 }
752
753 target = targetproc->task;
754 #ifndef CONFIG_EMBEDDED
755 if (target != TASK_NULL) {
756 mach_port_t tfpport;
757
758 /* If we aren't root and target's task access port is set... */
759 if (!kauth_cred_issuser(kauth_cred_get()) &&
760 targetproc != current_proc() &&
761 (task_get_task_access_port(target, &tfpport) == 0) &&
762 (tfpport != IPC_PORT_NULL)) {
763
764 if (tfpport == IPC_PORT_DEAD) {
765 error = EACCES;
766 goto out;
767 }
768
769 /* Call up to the task access server */
770 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
771
772 if (error != MACH_MSG_SUCCESS) {
773 if (error == MACH_RCV_INTERRUPTED)
774 error = EINTR;
775 else
776 error = EPERM;
777 goto out;
778 }
779 }
780 }
781 #endif
782
783 task_reference(target);
784 error = task_suspend(target);
785 if (error) {
786 if (error == KERN_INVALID_ARGUMENT) {
787 error = EINVAL;
788 } else {
789 error = EPERM;
790 }
791 }
792 task_deallocate(target);
793
794 #if CONFIG_FREEZE
795 kern_hibernation_on_pid_suspend(pid);
796 #endif
797
798 out:
799 if (targetproc != PROC_NULL)
800 proc_rele(targetproc);
801 *ret = error;
802 return error;
803 }
804
805 kern_return_t
806 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
807 {
808 task_t target = NULL;
809 proc_t targetproc = PROC_NULL;
810 int pid = args->pid;
811 int error = 0;
812
813 #if CONFIG_MACF
814 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
815 if (error) {
816 error = EPERM;
817 goto out;
818 }
819 #endif
820
821 if (pid == 0) {
822 error = EPERM;
823 goto out;
824 }
825
826 targetproc = proc_find(pid);
827 if (!task_for_pid_posix_check(targetproc)) {
828 error = EPERM;
829 goto out;
830 }
831
832 target = targetproc->task;
833 #ifndef CONFIG_EMBEDDED
834 if (target != TASK_NULL) {
835 mach_port_t tfpport;
836
837 /* If we aren't root and target's task access port is set... */
838 if (!kauth_cred_issuser(kauth_cred_get()) &&
839 targetproc != current_proc() &&
840 (task_get_task_access_port(target, &tfpport) == 0) &&
841 (tfpport != IPC_PORT_NULL)) {
842
843 if (tfpport == IPC_PORT_DEAD) {
844 error = EACCES;
845 goto out;
846 }
847
848 /* Call up to the task access server */
849 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
850
851 if (error != MACH_MSG_SUCCESS) {
852 if (error == MACH_RCV_INTERRUPTED)
853 error = EINTR;
854 else
855 error = EPERM;
856 goto out;
857 }
858 }
859 }
860 #endif
861
862 task_reference(target);
863
864 #if CONFIG_FREEZE
865 kern_hibernation_on_pid_resume(pid, target);
866 #endif
867
868 error = task_resume(target);
869 if (error) {
870 if (error == KERN_INVALID_ARGUMENT) {
871 error = EINVAL;
872 } else {
873 error = EPERM;
874 }
875 }
876 task_deallocate(target);
877
878 out:
879 if (targetproc != PROC_NULL)
880 proc_rele(targetproc);
881 *ret = error;
882 return error;
883
884 return 0;
885 }
886
887 #if CONFIG_EMBEDDED
888 kern_return_t
889 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
890 {
891 int error = 0;
892 proc_t targetproc = PROC_NULL;
893 int pid = args->pid;
894
895 #ifndef CONFIG_FREEZE
896 #pragma unused(pid)
897 #else
898
899 #if CONFIG_MACF
900 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
901 if (error) {
902 error = EPERM;
903 goto out;
904 }
905 #endif
906
907 /*
908 * The only accepted pid value here is currently -1, since we just kick off the hibernation thread
909 * here - individual ids aren't required. However, it's intended that that this call is to change
910 * in the future to initiate hibernation of individual processes. In anticipation, we'll obtain the
911 * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
912 * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
913 */
914 if (pid >= 0) {
915 targetproc = proc_find(pid);
916 if (!task_for_pid_posix_check(targetproc)) {
917 error = EPERM;
918 goto out;
919 }
920 }
921
922 if (pid == -1) {
923 kern_hibernation_on_pid_hibernate(pid);
924 } else {
925 error = EPERM;
926 }
927
928 out:
929
930 #endif /* CONFIG_FREEZE */
931
932 if (targetproc != PROC_NULL)
933 proc_rele(targetproc);
934 *ret = error;
935 return error;
936 }
937
938 int
939 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
940 {
941 int error = 0;
942 proc_t targetproc = PROC_NULL;
943 struct filedesc *fdp;
944 struct fileproc *fp;
945 int pid = args->pid;
946 int level = args->level;
947 int i;
948
949 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
950 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
951 {
952 error = EINVAL;
953 goto out;
954 }
955
956 #if CONFIG_MACF
957 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
958 if (error) {
959 error = EPERM;
960 goto out;
961 }
962 #endif
963
964 targetproc = proc_find(pid);
965 if (!task_for_pid_posix_check(targetproc)) {
966 error = EPERM;
967 goto out;
968 }
969
970 proc_fdlock(targetproc);
971 fdp = targetproc->p_fd;
972
973 for (i = 0; i < fdp->fd_nfiles; i++) {
974 struct socket *sockp;
975
976 fp = fdp->fd_ofiles[i];
977 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
978 fp->f_fglob->fg_type != DTYPE_SOCKET)
979 {
980 continue;
981 }
982
983 sockp = (struct socket *)fp->f_fglob->fg_data;
984
985 /* Call networking stack with socket and level */
986 (void) socket_defunct(targetproc, sockp, level);
987 }
988
989 proc_fdunlock(targetproc);
990
991 out:
992 if (targetproc != PROC_NULL)
993 proc_rele(targetproc);
994 *ret = error;
995 return error;
996 }
997 #endif /* CONFIG_EMBEDDED */
998
999 static int
1000 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1001 __unused int arg2, struct sysctl_req *req)
1002 {
1003 int error = 0;
1004 int new_value;
1005
1006 error = SYSCTL_OUT(req, arg1, sizeof(int));
1007 if (error || req->newptr == USER_ADDR_NULL)
1008 return(error);
1009
1010 if (!is_suser())
1011 return(EPERM);
1012
1013 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1014 goto out;
1015 }
1016 if ((new_value == KERN_TFP_POLICY_DENY)
1017 || (new_value == KERN_TFP_POLICY_DEFAULT))
1018 tfp_policy = new_value;
1019 else
1020 error = EINVAL;
1021 out:
1022 return(error);
1023
1024 }
1025
1026 #if defined(SECURE_KERNEL)
1027 static int kern_secure_kernel = 1;
1028 #else
1029 static int kern_secure_kernel = 0;
1030 #endif
1031
1032 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1033
1034 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1035 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1036 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1037
1038 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1039 &shared_region_trace_level, 0, "");
1040 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1041 &shared_region_version, 0, "");
1042 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1043 &shared_region_persistence, 0, "");
1044
1045 /*
1046 * shared_region_check_np:
1047 *
1048 * This system call is intended for dyld.
1049 *
1050 * dyld calls this when any process starts to see if the process's shared
1051 * region is already set up and ready to use.
1052 * This call returns the base address of the first mapping in the
1053 * process's shared region's first mapping.
1054 * dyld will then check what's mapped at that address.
1055 *
1056 * If the shared region is empty, dyld will then attempt to map the shared
1057 * cache file in the shared region via the shared_region_map_np() system call.
1058 *
1059 * If something's already mapped in the shared region, dyld will check if it
1060 * matches the shared cache it would like to use for that process.
1061 * If it matches, evrything's ready and the process can proceed and use the
1062 * shared region.
1063 * If it doesn't match, dyld will unmap the shared region and map the shared
1064 * cache into the process's address space via mmap().
1065 *
1066 * ERROR VALUES
1067 * EINVAL no shared region
1068 * ENOMEM shared region is empty
1069 * EFAULT bad address for "start_address"
1070 */
1071 int
1072 shared_region_check_np(
1073 __unused struct proc *p,
1074 struct shared_region_check_np_args *uap,
1075 __unused int *retvalp)
1076 {
1077 vm_shared_region_t shared_region;
1078 mach_vm_offset_t start_address;
1079 int error;
1080 kern_return_t kr;
1081
1082 SHARED_REGION_TRACE_DEBUG(
1083 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1084 current_thread(), p->p_pid, p->p_comm,
1085 (uint64_t)uap->start_address));
1086
1087 /* retrieve the current tasks's shared region */
1088 shared_region = vm_shared_region_get(current_task());
1089 if (shared_region != NULL) {
1090 /* retrieve address of its first mapping... */
1091 kr = vm_shared_region_start_address(shared_region,
1092 &start_address);
1093 if (kr != KERN_SUCCESS) {
1094 error = ENOMEM;
1095 } else {
1096 /* ... and give it to the caller */
1097 error = copyout(&start_address,
1098 (user_addr_t) uap->start_address,
1099 sizeof (start_address));
1100 if (error) {
1101 SHARED_REGION_TRACE_ERROR(
1102 ("shared_region: %p [%d(%s)] "
1103 "check_np(0x%llx) "
1104 "copyout(0x%llx) error %d\n",
1105 current_thread(), p->p_pid, p->p_comm,
1106 (uint64_t)uap->start_address, (uint64_t)start_address,
1107 error));
1108 }
1109 }
1110 vm_shared_region_deallocate(shared_region);
1111 } else {
1112 /* no shared region ! */
1113 error = EINVAL;
1114 }
1115
1116 SHARED_REGION_TRACE_DEBUG(
1117 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1118 current_thread(), p->p_pid, p->p_comm,
1119 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1120
1121 return error;
1122 }
1123
1124
1125 int
1126 shared_region_copyin_mappings(
1127 struct proc *p,
1128 user_addr_t user_mappings,
1129 unsigned int mappings_count,
1130 struct shared_file_mapping_np *mappings)
1131 {
1132 int error = 0;
1133 vm_size_t mappings_size = 0;
1134
1135 /* get the list of mappings the caller wants us to establish */
1136 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1137 error = copyin(user_mappings,
1138 mappings,
1139 mappings_size);
1140 if (error) {
1141 SHARED_REGION_TRACE_ERROR(
1142 ("shared_region: %p [%d(%s)] map(): "
1143 "copyin(0x%llx, %d) failed (error=%d)\n",
1144 current_thread(), p->p_pid, p->p_comm,
1145 (uint64_t)user_mappings, mappings_count, error));
1146 }
1147 return error;
1148 }
1149 /*
1150 * shared_region_map_np()
1151 *
1152 * This system call is intended for dyld.
1153 *
1154 * dyld uses this to map a shared cache file into a shared region.
1155 * This is usually done only the first time a shared cache is needed.
1156 * Subsequent processes will just use the populated shared region without
1157 * requiring any further setup.
1158 */
1159 int
1160 _shared_region_map(
1161 struct proc *p,
1162 int fd,
1163 uint32_t mappings_count,
1164 struct shared_file_mapping_np *mappings,
1165 memory_object_control_t *sr_file_control,
1166 struct shared_file_mapping_np *mapping_to_slide)
1167 {
1168 int error;
1169 kern_return_t kr;
1170 struct fileproc *fp;
1171 struct vnode *vp, *root_vp;
1172 struct vnode_attr va;
1173 off_t fs;
1174 memory_object_size_t file_size;
1175 vm_prot_t maxprot = VM_PROT_ALL;
1176 memory_object_control_t file_control;
1177 struct vm_shared_region *shared_region;
1178
1179 SHARED_REGION_TRACE_DEBUG(
1180 ("shared_region: %p [%d(%s)] -> map\n",
1181 current_thread(), p->p_pid, p->p_comm));
1182
1183 shared_region = NULL;
1184 fp = NULL;
1185 vp = NULL;
1186
1187 /* get file structure from file descriptor */
1188 error = fp_lookup(p, fd, &fp, 0);
1189 if (error) {
1190 SHARED_REGION_TRACE_ERROR(
1191 ("shared_region: %p [%d(%s)] map: "
1192 "fd=%d lookup failed (error=%d)\n",
1193 current_thread(), p->p_pid, p->p_comm, fd, error));
1194 goto done;
1195 }
1196
1197 /* make sure we're attempting to map a vnode */
1198 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
1199 SHARED_REGION_TRACE_ERROR(
1200 ("shared_region: %p [%d(%s)] map: "
1201 "fd=%d not a vnode (type=%d)\n",
1202 current_thread(), p->p_pid, p->p_comm,
1203 fd, fp->f_fglob->fg_type));
1204 error = EINVAL;
1205 goto done;
1206 }
1207
1208 /* we need at least read permission on the file */
1209 if (! (fp->f_fglob->fg_flag & FREAD)) {
1210 SHARED_REGION_TRACE_ERROR(
1211 ("shared_region: %p [%d(%s)] map: "
1212 "fd=%d not readable\n",
1213 current_thread(), p->p_pid, p->p_comm, fd));
1214 error = EPERM;
1215 goto done;
1216 }
1217
1218 /* get vnode from file structure */
1219 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1220 if (error) {
1221 SHARED_REGION_TRACE_ERROR(
1222 ("shared_region: %p [%d(%s)] map: "
1223 "fd=%d getwithref failed (error=%d)\n",
1224 current_thread(), p->p_pid, p->p_comm, fd, error));
1225 goto done;
1226 }
1227 vp = (struct vnode *) fp->f_fglob->fg_data;
1228
1229 /* make sure the vnode is a regular file */
1230 if (vp->v_type != VREG) {
1231 SHARED_REGION_TRACE_ERROR(
1232 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1233 "not a file (type=%d)\n",
1234 current_thread(), p->p_pid, p->p_comm,
1235 vp, vp->v_name, vp->v_type));
1236 error = EINVAL;
1237 goto done;
1238 }
1239
1240 #if CONFIG_MACF
1241 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1242 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1243 if (error) {
1244 goto done;
1245 }
1246 #endif /* MAC */
1247
1248 #if CONFIG_PROTECT
1249 /* check for content protection access */
1250 {
1251 void *cnode;
1252 if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
1253 error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
1254 if (error)
1255 goto done;
1256 }
1257 }
1258 #endif /* CONFIG_PROTECT */
1259
1260 /* make sure vnode is on the process's root volume */
1261 root_vp = p->p_fd->fd_rdir;
1262 if (root_vp == NULL) {
1263 root_vp = rootvnode;
1264 } else {
1265 /*
1266 * Chroot-ed processes can't use the shared_region.
1267 */
1268 error = EINVAL;
1269 goto done;
1270 }
1271
1272 if (vp->v_mount != root_vp->v_mount) {
1273 SHARED_REGION_TRACE_ERROR(
1274 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1275 "not on process's root volume\n",
1276 current_thread(), p->p_pid, p->p_comm,
1277 vp, vp->v_name));
1278 error = EPERM;
1279 goto done;
1280 }
1281
1282 /* make sure vnode is owned by "root" */
1283 VATTR_INIT(&va);
1284 VATTR_WANTED(&va, va_uid);
1285 error = vnode_getattr(vp, &va, vfs_context_current());
1286 if (error) {
1287 SHARED_REGION_TRACE_ERROR(
1288 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1289 "vnode_getattr(%p) failed (error=%d)\n",
1290 current_thread(), p->p_pid, p->p_comm,
1291 vp, vp->v_name, vp, error));
1292 goto done;
1293 }
1294 if (va.va_uid != 0) {
1295 SHARED_REGION_TRACE_ERROR(
1296 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1297 "owned by uid=%d instead of 0\n",
1298 current_thread(), p->p_pid, p->p_comm,
1299 vp, vp->v_name, va.va_uid));
1300 error = EPERM;
1301 goto done;
1302 }
1303
1304 /* get vnode size */
1305 error = vnode_size(vp, &fs, vfs_context_current());
1306 if (error) {
1307 SHARED_REGION_TRACE_ERROR(
1308 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1309 "vnode_size(%p) failed (error=%d)\n",
1310 current_thread(), p->p_pid, p->p_comm,
1311 vp, vp->v_name, vp, error));
1312 goto done;
1313 }
1314 file_size = fs;
1315
1316 /* get the file's memory object handle */
1317 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1318 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1319 SHARED_REGION_TRACE_ERROR(
1320 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1321 "no memory object\n",
1322 current_thread(), p->p_pid, p->p_comm,
1323 vp, vp->v_name));
1324 error = EINVAL;
1325 goto done;
1326 }
1327
1328 if (sr_file_control != NULL) {
1329 *sr_file_control = file_control;
1330 }
1331
1332
1333
1334 /* get the process's shared region (setup in vm_map_exec()) */
1335 shared_region = vm_shared_region_get(current_task());
1336 if (shared_region == NULL) {
1337 SHARED_REGION_TRACE_ERROR(
1338 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1339 "no shared region\n",
1340 current_thread(), p->p_pid, p->p_comm,
1341 vp, vp->v_name));
1342 goto done;
1343 }
1344
1345 /* map the file into that shared region's submap */
1346 kr = vm_shared_region_map_file(shared_region,
1347 mappings_count,
1348 mappings,
1349 file_control,
1350 file_size,
1351 (void *) p->p_fd->fd_rdir,
1352 mapping_to_slide);
1353 if (kr != KERN_SUCCESS) {
1354 SHARED_REGION_TRACE_ERROR(
1355 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1356 "vm_shared_region_map_file() failed kr=0x%x\n",
1357 current_thread(), p->p_pid, p->p_comm,
1358 vp, vp->v_name, kr));
1359 switch (kr) {
1360 case KERN_INVALID_ADDRESS:
1361 error = EFAULT;
1362 break;
1363 case KERN_PROTECTION_FAILURE:
1364 error = EPERM;
1365 break;
1366 case KERN_NO_SPACE:
1367 error = ENOMEM;
1368 break;
1369 case KERN_FAILURE:
1370 case KERN_INVALID_ARGUMENT:
1371 default:
1372 error = EINVAL;
1373 break;
1374 }
1375 goto done;
1376 }
1377
1378 error = 0;
1379
1380 vnode_lock_spin(vp);
1381
1382 vp->v_flag |= VSHARED_DYLD;
1383
1384 vnode_unlock(vp);
1385
1386 /* update the vnode's access time */
1387 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1388 VATTR_INIT(&va);
1389 nanotime(&va.va_access_time);
1390 VATTR_SET_ACTIVE(&va, va_access_time);
1391 vnode_setattr(vp, &va, vfs_context_current());
1392 }
1393
1394 if (p->p_flag & P_NOSHLIB) {
1395 /* signal that this process is now using split libraries */
1396 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1397 }
1398
1399 done:
1400 if (vp != NULL) {
1401 /*
1402 * release the vnode...
1403 * ubc_map() still holds it for us in the non-error case
1404 */
1405 (void) vnode_put(vp);
1406 vp = NULL;
1407 }
1408 if (fp != NULL) {
1409 /* release the file descriptor */
1410 fp_drop(p, fd, fp, 0);
1411 fp = NULL;
1412 }
1413
1414 if (shared_region != NULL) {
1415 vm_shared_region_deallocate(shared_region);
1416 }
1417
1418 SHARED_REGION_TRACE_DEBUG(
1419 ("shared_region: %p [%d(%s)] <- map\n",
1420 current_thread(), p->p_pid, p->p_comm));
1421
1422 return error;
1423 }
1424
1425 int
1426 _shared_region_slide(uint32_t slide,
1427 mach_vm_offset_t entry_start_address,
1428 mach_vm_size_t entry_size,
1429 mach_vm_offset_t slide_start,
1430 mach_vm_size_t slide_size,
1431 memory_object_control_t sr_file_control)
1432 {
1433 void *slide_info_entry = NULL;
1434 int error;
1435
1436 if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
1437 printf("slide_info initialization failed with kr=%d\n", error);
1438 goto done;
1439 }
1440
1441 slide_info_entry = vm_shared_region_get_slide_info_entry();
1442 if (slide_info_entry == NULL){
1443 error = EFAULT;
1444 } else {
1445 error = copyin(slide_start,
1446 slide_info_entry,
1447 (vm_size_t)slide_size);
1448 }
1449 if (error) {
1450 goto done;
1451 }
1452
1453 if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) {
1454 error = EFAULT;
1455 printf("Sanity Check failed for slide_info\n");
1456 } else {
1457 #if DEBUG
1458 printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
1459 (void*)(uintptr_t)entry_start_address,
1460 (unsigned long)entry_size,
1461 (unsigned long)slide_size);
1462 #endif
1463 }
1464 done:
1465 return error;
1466 }
1467
1468 int
1469 shared_region_map_and_slide_np(
1470 struct proc *p,
1471 struct shared_region_map_and_slide_np_args *uap,
1472 __unused int *retvalp)
1473 {
1474 struct shared_file_mapping_np mapping_to_slide;
1475 struct shared_file_mapping_np *mappings;
1476 unsigned int mappings_count = uap->count;
1477
1478 memory_object_control_t sr_file_control;
1479 kern_return_t kr = KERN_SUCCESS;
1480 uint32_t slide = uap->slide;
1481
1482 #define SFM_MAX_STACK 8
1483 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1484
1485 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1486 if (kr == KERN_INVALID_ARGUMENT) {
1487 /*
1488 * This will happen if we request sliding again
1489 * with the same slide value that was used earlier
1490 * for the very first sliding. We continue through
1491 * to the mapping layer. This is so that we can be
1492 * absolutely certain that the same mappings have
1493 * been requested.
1494 */
1495 kr = KERN_SUCCESS;
1496 } else {
1497 goto done;
1498 }
1499 }
1500
1501 if (mappings_count == 0) {
1502 SHARED_REGION_TRACE_INFO(
1503 ("shared_region: %p [%d(%s)] map(): "
1504 "no mappings\n",
1505 current_thread(), p->p_pid, p->p_comm));
1506 kr = 0; /* no mappings: we're done ! */
1507 goto done;
1508 } else if (mappings_count <= SFM_MAX_STACK) {
1509 mappings = &stack_mappings[0];
1510 } else {
1511 SHARED_REGION_TRACE_ERROR(
1512 ("shared_region: %p [%d(%s)] map(): "
1513 "too many mappings (%d)\n",
1514 current_thread(), p->p_pid, p->p_comm,
1515 mappings_count));
1516 kr = KERN_FAILURE;
1517 goto done;
1518 }
1519
1520 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1521 goto done;
1522 }
1523
1524
1525 kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
1526 if (kr != KERN_SUCCESS) {
1527 return kr;
1528 }
1529
1530 if (slide) {
1531 kr = _shared_region_slide(slide,
1532 mapping_to_slide.sfm_file_offset,
1533 mapping_to_slide.sfm_size,
1534 uap->slide_start,
1535 uap->slide_size,
1536 sr_file_control);
1537 if (kr != KERN_SUCCESS) {
1538 vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
1539 return kr;
1540 }
1541 }
1542 done:
1543 return kr;
1544 }
1545
1546 /* sysctl overflow room */
1547
1548 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1549 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1550 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1551 extern unsigned int vm_page_free_target;
1552 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1553 &vm_page_free_target, 0, "Pageout daemon free target");
1554
1555 extern unsigned int vm_memory_pressure;
1556 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1557 &vm_memory_pressure, 0, "Memory pressure indicator");
1558
1559 static int
1560 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1561 {
1562 #pragma unused(oidp, arg1, arg2)
1563 unsigned int page_free_wanted;
1564
1565 page_free_wanted = mach_vm_ctl_page_free_wanted();
1566 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1567 }
1568 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1569 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1570 0, 0, vm_ctl_page_free_wanted, "I", "");
1571
1572 extern unsigned int vm_page_purgeable_count;
1573 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1574 &vm_page_purgeable_count, 0, "Purgeable page count");
1575
1576 extern unsigned int vm_page_purgeable_wired_count;
1577 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1578 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1579
1580 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1581 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1582 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1583 &vm_page_stats_reusable.reusable_pages_success, "");
1584 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1585 &vm_page_stats_reusable.reusable_pages_failure, "");
1586 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1587 &vm_page_stats_reusable.reusable_pages_shared, "");
1588 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1589 &vm_page_stats_reusable.all_reusable_calls, "");
1590 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1591 &vm_page_stats_reusable.partial_reusable_calls, "");
1592 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1593 &vm_page_stats_reusable.reuse_pages_success, "");
1594 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1595 &vm_page_stats_reusable.reuse_pages_failure, "");
1596 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1597 &vm_page_stats_reusable.all_reuse_calls, "");
1598 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1599 &vm_page_stats_reusable.partial_reuse_calls, "");
1600 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1601 &vm_page_stats_reusable.can_reuse_success, "");
1602 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1603 &vm_page_stats_reusable.can_reuse_failure, "");
1604
1605
1606 int
1607 vm_pressure_monitor(
1608 __unused struct proc *p,
1609 struct vm_pressure_monitor_args *uap,
1610 int *retval)
1611 {
1612 kern_return_t kr;
1613 uint32_t pages_reclaimed;
1614 uint32_t pages_wanted;
1615
1616 kr = mach_vm_pressure_monitor(
1617 (boolean_t) uap->wait_for_pressure,
1618 uap->nsecs_monitored,
1619 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1620 &pages_wanted);
1621
1622 switch (kr) {
1623 case KERN_SUCCESS:
1624 break;
1625 case KERN_ABORTED:
1626 return EINTR;
1627 default:
1628 return EINVAL;
1629 }
1630
1631 if (uap->pages_reclaimed) {
1632 if (copyout((void *)&pages_reclaimed,
1633 uap->pages_reclaimed,
1634 sizeof (pages_reclaimed)) != 0) {
1635 return EFAULT;
1636 }
1637 }
1638
1639 *retval = (int) pages_wanted;
1640 return 0;
1641 }