]> git.saurik.com Git - apple/xnu.git/blame - bsd/vm/vm_unix.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
1c79356b 34/*
2d21ac55
A
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
1c79356b 39 */
9bccf70c 40
1c79356b
A
41#include <meta_features.h>
42
43#include <kern/task.h>
44#include <kern/thread.h>
45#include <kern/debug.h>
46#include <kern/lock.h>
91447636 47#include <mach/mach_traps.h>
2d21ac55
A
48#include <mach/port.h>
49#include <mach/task.h>
50#include <mach/task_access.h>
51#include <mach/task_special_ports.h>
1c79356b 52#include <mach/time_value.h>
91447636 53#include <mach/vm_map.h>
1c79356b
A
54#include <mach/vm_param.h>
55#include <mach/vm_prot.h>
1c79356b 56
91447636 57#include <sys/file_internal.h>
1c79356b
A
58#include <sys/param.h>
59#include <sys/systm.h>
60#include <sys/dir.h>
61#include <sys/namei.h>
91447636
A
62#include <sys/proc_internal.h>
63#include <sys/kauth.h>
1c79356b
A
64#include <sys/vm.h>
65#include <sys/file.h>
91447636 66#include <sys/vnode_internal.h>
1c79356b
A
67#include <sys/mount.h>
68#include <sys/trace.h>
69#include <sys/kernel.h>
91447636
A
70#include <sys/ubc_internal.h>
71#include <sys/user.h>
0c530ab8 72#include <sys/syslog.h>
9bccf70c 73#include <sys/stat.h>
91447636
A
74#include <sys/sysproto.h>
75#include <sys/mman.h>
0c530ab8 76#include <sys/sysctl.h>
1c79356b 77
b0d623f7 78#include <security/audit/audit.h>
e5568f75
A
79#include <bsm/audit_kevents.h>
80
1c79356b 81#include <kern/kalloc.h>
1c79356b
A
82#include <vm/vm_map.h>
83#include <vm/vm_kern.h>
b0d623f7 84#include <vm/vm_pageout.h>
1c79356b
A
85
86#include <machine/spl.h>
9bccf70c 87
2d21ac55
A
88#include <mach/shared_region.h>
89#include <vm/vm_shared_region.h>
9bccf70c 90
91447636 91#include <vm/vm_protos.h>
9bccf70c 92
2d21ac55
A
93/*
94 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
95 */
96
4a3eedf9 97#ifndef SECURE_KERNEL
2d21ac55
A
98extern int allow_stack_exec, allow_data_exec;
99
100SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
101SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
4a3eedf9 102#endif /* !SECURE_KERNEL */
2d21ac55 103
2d21ac55
A
104static const char *prot_values[] = {
105 "none",
106 "read-only",
107 "write-only",
108 "read-write",
109 "execute-only",
110 "read-execute",
111 "write-execute",
112 "read-write-execute"
113};
114
0c530ab8 115void
2d21ac55 116log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
0c530ab8 117{
2d21ac55
A
118 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
119 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
0c530ab8
A
120}
121
b0d623f7
A
122int shared_region_unnest_logging = 1;
123
124SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW,
125 &shared_region_unnest_logging, 0, "");
126
127int vm_shared_region_unnest_log_interval = 10;
128int shared_region_unnest_log_count_threshold = 5;
129
130/* These log rate throttling state variables aren't thread safe, but
131 * are sufficient unto the task.
132 */
133static int64_t last_unnest_log_time = 0;
134static int shared_region_unnest_log_count = 0;
135
136void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
137 struct timeval tv;
138 const char *pcommstr;
139
140 if (shared_region_unnest_logging == 0)
141 return;
142
143 if (shared_region_unnest_logging == 1) {
144 microtime(&tv);
145 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
146 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
147 return;
148 }
149 else {
150 last_unnest_log_time = tv.tv_sec;
151 shared_region_unnest_log_count = 0;
152 }
153 }
154
155 pcommstr = current_proc()->p_comm;
156
157 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
158}
1c79356b 159
91447636
A
160int
161useracc(
162 user_addr_t addr,
163 user_size_t len,
164 int prot)
1c79356b
A
165{
166 return (vm_map_check_protection(
167 current_map(),
91447636 168 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
1c79356b
A
169 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
170}
171
91447636
A
172int
173vslock(
174 user_addr_t addr,
175 user_size_t len)
1c79356b 176{
91447636
A
177 kern_return_t kret;
178 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
179 vm_map_round_page(addr+len),
1c79356b 180 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
0b4e3aa0
A
181
182 switch (kret) {
183 case KERN_SUCCESS:
184 return (0);
185 case KERN_INVALID_ADDRESS:
186 case KERN_NO_SPACE:
187 return (ENOMEM);
188 case KERN_PROTECTION_FAILURE:
189 return (EACCES);
190 default:
191 return (EINVAL);
192 }
1c79356b
A
193}
194
91447636
A
195int
196vsunlock(
197 user_addr_t addr,
198 user_size_t len,
199 __unused int dirtied)
1c79356b 200{
1c79356b 201#if FIXME /* [ */
91447636 202 pmap_t pmap;
1c79356b 203 vm_page_t pg;
91447636
A
204 vm_map_offset_t vaddr;
205 ppnum_t paddr;
1c79356b 206#endif /* FIXME ] */
0b4e3aa0 207 kern_return_t kret;
1c79356b
A
208
209#if FIXME /* [ */
210 if (dirtied) {
211 pmap = get_task_pmap(current_task());
91447636
A
212 for (vaddr = vm_map_trunc_page(addr);
213 vaddr < vm_map_round_page(addr+len);
1c79356b
A
214 vaddr += PAGE_SIZE) {
215 paddr = pmap_extract(pmap, vaddr);
216 pg = PHYS_TO_VM_PAGE(paddr);
217 vm_page_set_modified(pg);
218 }
219 }
220#endif /* FIXME ] */
221#ifdef lint
222 dirtied++;
223#endif /* lint */
91447636
A
224 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
225 vm_map_round_page(addr+len), FALSE);
0b4e3aa0
A
226 switch (kret) {
227 case KERN_SUCCESS:
228 return (0);
229 case KERN_INVALID_ADDRESS:
230 case KERN_NO_SPACE:
231 return (ENOMEM);
232 case KERN_PROTECTION_FAILURE:
233 return (EACCES);
234 default:
235 return (EINVAL);
236 }
1c79356b
A
237}
238
91447636
A
239int
240subyte(
241 user_addr_t addr,
242 int byte)
1c79356b
A
243{
244 char character;
245
246 character = (char)byte;
247 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
248}
249
91447636
A
250int
251suibyte(
252 user_addr_t addr,
253 int byte)
1c79356b
A
254{
255 char character;
256
257 character = (char)byte;
91447636 258 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
1c79356b
A
259}
260
91447636 261int fubyte(user_addr_t addr)
1c79356b
A
262{
263 unsigned char byte;
264
265 if (copyin(addr, (void *) &byte, sizeof(char)))
266 return(-1);
267 return(byte);
268}
269
91447636 270int fuibyte(user_addr_t addr)
1c79356b
A
271{
272 unsigned char byte;
273
274 if (copyin(addr, (void *) &(byte), sizeof(char)))
275 return(-1);
276 return(byte);
277}
278
91447636
A
279int
280suword(
281 user_addr_t addr,
282 long word)
1c79356b
A
283{
284 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
285}
286
91447636 287long fuword(user_addr_t addr)
1c79356b 288{
b0d623f7 289 long word = 0;
1c79356b
A
290
291 if (copyin(addr, (void *) &word, sizeof(int)))
292 return(-1);
293 return(word);
294}
295
296/* suiword and fuiword are the same as suword and fuword, respectively */
297
91447636
A
298int
299suiword(
300 user_addr_t addr,
301 long word)
1c79356b
A
302{
303 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
304}
305
91447636 306long fuiword(user_addr_t addr)
1c79356b 307{
b0d623f7 308 long word = 0;
1c79356b
A
309
310 if (copyin(addr, (void *) &word, sizeof(int)))
311 return(-1);
312 return(word);
313}
91447636
A
314
315/*
316 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
317 * fetching and setting of process-sized size_t and pointer values.
318 */
319int
320sulong(user_addr_t addr, int64_t word)
321{
322
323 if (IS_64BIT_PROCESS(current_proc())) {
324 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
325 } else {
326 return(suiword(addr, (long)word));
327 }
328}
329
330int64_t
331fulong(user_addr_t addr)
332{
333 int64_t longword;
334
335 if (IS_64BIT_PROCESS(current_proc())) {
336 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
337 return(-1);
338 return(longword);
339 } else {
340 return((int64_t)fuiword(addr));
341 }
342}
1c79356b
A
343
344int
91447636
A
345suulong(user_addr_t addr, uint64_t uword)
346{
347
348 if (IS_64BIT_PROCESS(current_proc())) {
349 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
350 } else {
b0d623f7 351 return(suiword(addr, (uint32_t)uword));
91447636
A
352 }
353}
354
355uint64_t
356fuulong(user_addr_t addr)
1c79356b 357{
91447636
A
358 uint64_t ulongword;
359
360 if (IS_64BIT_PROCESS(current_proc())) {
361 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
362 return(-1ULL);
363 return(ulongword);
364 } else {
365 return((uint64_t)fuiword(addr));
366 }
367}
368
369int
2d21ac55 370swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
91447636
A
371{
372 return(ENOTSUP);
1c79356b
A
373}
374
b0d623f7
A
375/*
376 * pid_for_task
377 *
378 * Find the BSD process ID for the Mach task associated with the given Mach port
379 * name
380 *
381 * Parameters: args User argument descriptor (see below)
382 *
383 * Indirect parameters: args->t Mach port name
384 * args->pid Process ID (returned value; see below)
385 *
386 * Returns: KERL_SUCCESS Success
387 * KERN_FAILURE Not success
388 *
389 * Implicit returns: args->pid Process ID
390 *
391 */
1c79356b 392kern_return_t
91447636
A
393pid_for_task(
394 struct pid_for_task_args *args)
1c79356b 395{
91447636
A
396 mach_port_name_t t = args->t;
397 user_addr_t pid_addr = args->pid;
2d21ac55 398 proc_t p;
1c79356b 399 task_t t1;
1c79356b 400 int pid = -1;
0b4e3aa0 401 kern_return_t err = KERN_SUCCESS;
1c79356b 402
e5568f75
A
403 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
404 AUDIT_ARG(mach_port1, t);
405
1c79356b
A
406 t1 = port_name_to_task(t);
407
408 if (t1 == TASK_NULL) {
409 err = KERN_FAILURE;
0b4e3aa0 410 goto pftout;
1c79356b
A
411 } else {
412 p = get_bsdtask_info(t1);
413 if (p) {
91447636 414 pid = proc_pid(p);
1c79356b
A
415 err = KERN_SUCCESS;
416 } else {
417 err = KERN_FAILURE;
418 }
419 }
420 task_deallocate(t1);
1c79356b 421pftout:
e5568f75 422 AUDIT_ARG(pid, pid);
91447636 423 (void) copyout((char *) &pid, pid_addr, sizeof(int));
e5568f75 424 AUDIT_MACH_SYSCALL_EXIT(err);
1c79356b
A
425 return(err);
426}
427
2d21ac55
A
428/*
429 *
430 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
431 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
432 *
433 */
434static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
435
436/*
437 * Routine: task_for_pid_posix_check
438 * Purpose:
439 * Verify that the current process should be allowed to
440 * get the target process's task port. This is only
441 * permitted if:
442 * - The current process is root
443 * OR all of the following are true:
444 * - The target process's real, effective, and saved uids
445 * are the same as the current proc's euid,
446 * - The target process's group set is a subset of the
447 * calling process's group set, and
448 * - The target process hasn't switched credentials.
449 *
450 * Returns: TRUE: permitted
451 * FALSE: denied
452 */
453static int
454task_for_pid_posix_check(proc_t target)
455{
456 kauth_cred_t targetcred, mycred;
457 uid_t myuid;
458 int allowed;
459
460 /* No task_for_pid on bad targets */
461 if (target == PROC_NULL || target->p_stat == SZOMB) {
462 return FALSE;
463 }
464
465 mycred = kauth_cred_get();
466 myuid = kauth_cred_getuid(mycred);
467
468 /* If we're running as root, the check passes */
469 if (kauth_cred_issuser(mycred))
470 return TRUE;
471
472 /* We're allowed to get our own task port */
473 if (target == current_proc())
474 return TRUE;
475
476 /*
477 * Under DENY, only root can get another proc's task port,
478 * so no more checks are needed.
479 */
480 if (tfp_policy == KERN_TFP_POLICY_DENY) {
481 return FALSE;
482 }
483
484 targetcred = kauth_cred_proc_ref(target);
485 allowed = TRUE;
486
487 /* Do target's ruid, euid, and saved uid match my euid? */
488 if ((kauth_cred_getuid(targetcred) != myuid) ||
489 (targetcred->cr_ruid != myuid) ||
490 (targetcred->cr_svuid != myuid)) {
491 allowed = FALSE;
492 goto out;
493 }
494
495 /* Are target's groups a subset of my groups? */
496 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
497 allowed == 0) {
498 allowed = FALSE;
499 goto out;
500 }
501
502 /* Has target switched credentials? */
503 if (target->p_flag & P_SUGID) {
504 allowed = FALSE;
505 goto out;
506 }
507
508out:
509 kauth_cred_unref(&targetcred);
510 return allowed;
511}
512
1c79356b
A
513/*
514 * Routine: task_for_pid
515 * Purpose:
516 * Get the task port for another "process", named by its
517 * process ID on the same host as "target_task".
518 *
519 * Only permitted to privileged processes, or processes
520 * with the same user ID.
91447636 521 *
b0d623f7
A
522 * Note: if pid == 0, an error is return no matter who is calling.
523 *
91447636 524 * XXX This should be a BSD system call, not a Mach trap!!!
1c79356b
A
525 */
526kern_return_t
91447636
A
527task_for_pid(
528 struct task_for_pid_args *args)
1c79356b 529{
91447636
A
530 mach_port_name_t target_tport = args->target_tport;
531 int pid = args->pid;
532 user_addr_t task_addr = args->t;
2d21ac55
A
533 proc_t p = PROC_NULL;
534 task_t t1 = TASK_NULL;
535 mach_port_name_t tret = MACH_PORT_NULL;
536 ipc_port_t tfpport;
1c79356b
A
537 void * sright;
538 int error = 0;
1c79356b 539
e5568f75
A
540 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
541 AUDIT_ARG(pid, pid);
542 AUDIT_ARG(mach_port1, target_tport);
543
b0d623f7
A
544 /* Always check if pid == 0 */
545 if (pid == 0) {
2d21ac55
A
546 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
547 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
548 return(KERN_FAILURE);
549 }
2d21ac55 550
1c79356b
A
551 t1 = port_name_to_task(target_tport);
552 if (t1 == TASK_NULL) {
2d21ac55 553 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
e5568f75 554 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
0b4e3aa0 555 return(KERN_FAILURE);
1c79356b
A
556 }
557
91447636 558
2d21ac55 559 p = proc_find(pid);
b0d623f7
A
560#if CONFIG_AUDIT
561 if (p != PROC_NULL)
562 AUDIT_ARG(process, p);
563#endif
91447636 564
2d21ac55
A
565 if (!(task_for_pid_posix_check(p))) {
566 error = KERN_FAILURE;
567 goto tfpout;
568 }
0c530ab8 569
2d21ac55
A
570 if (p->task != TASK_NULL) {
571 /* If we aren't root and target's task access port is set... */
572 if (!kauth_cred_issuser(kauth_cred_get()) &&
cf7d32b8 573 p != current_proc() &&
2d21ac55
A
574 (task_get_task_access_port(p->task, &tfpport) == 0) &&
575 (tfpport != IPC_PORT_NULL)) {
0c530ab8 576
2d21ac55
A
577 if (tfpport == IPC_PORT_DEAD) {
578 error = KERN_PROTECTION_FAILURE;
579 goto tfpout;
580 }
0c530ab8 581
2d21ac55
A
582 /* Call up to the task access server */
583 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
0c530ab8 584
2d21ac55
A
585 if (error != MACH_MSG_SUCCESS) {
586 if (error == MACH_RCV_INTERRUPTED)
587 error = KERN_ABORTED;
588 else
589 error = KERN_FAILURE;
590 goto tfpout;
591 }
592 }
593#if CONFIG_MACF
594 error = mac_proc_check_get_task(kauth_cred_get(), p);
595 if (error) {
596 error = KERN_FAILURE;
1c79356b 597 goto tfpout;
2d21ac55
A
598 }
599#endif
600
601 /* Grant task port access */
602 task_reference(p->task);
603 sright = (void *) convert_task_to_port(p->task);
604 tret = ipc_port_copyout_send(
605 sright,
606 get_task_ipcspace(current_task()));
607 }
608 error = KERN_SUCCESS;
0c530ab8 609
1c79356b 610tfpout:
2d21ac55
A
611 task_deallocate(t1);
612 AUDIT_ARG(mach_port2, tret);
613 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
614 if (p != PROC_NULL)
615 proc_rele(p);
e5568f75 616 AUDIT_MACH_SYSCALL_EXIT(error);
1c79356b
A
617 return(error);
618}
619
0c530ab8
A
620/*
621 * Routine: task_name_for_pid
622 * Purpose:
623 * Get the task name port for another "process", named by its
624 * process ID on the same host as "target_task".
625 *
626 * Only permitted to privileged processes, or processes
627 * with the same user ID.
628 *
629 * XXX This should be a BSD system call, not a Mach trap!!!
630 */
631
632kern_return_t
633task_name_for_pid(
634 struct task_name_for_pid_args *args)
635{
636 mach_port_name_t target_tport = args->target_tport;
637 int pid = args->pid;
638 user_addr_t task_addr = args->t;
2d21ac55 639 proc_t p = PROC_NULL;
0c530ab8
A
640 task_t t1;
641 mach_port_name_t tret;
642 void * sright;
2d21ac55
A
643 int error = 0, refheld = 0;
644 kauth_cred_t target_cred;
0c530ab8
A
645
646 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
647 AUDIT_ARG(pid, pid);
648 AUDIT_ARG(mach_port1, target_tport);
649
650 t1 = port_name_to_task(target_tport);
651 if (t1 == TASK_NULL) {
2d21ac55 652 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
0c530ab8
A
653 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
654 return(KERN_FAILURE);
655 }
656
2d21ac55 657 p = proc_find(pid);
2d21ac55 658 if (p != PROC_NULL) {
b0d623f7 659 AUDIT_ARG(process, p);
2d21ac55
A
660 target_cred = kauth_cred_proc_ref(p);
661 refheld = 1;
662
663 if ((p->p_stat != SZOMB)
664 && ((current_proc() == p)
665 || kauth_cred_issuser(kauth_cred_get())
666 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
667 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
668
669 if (p->task != TASK_NULL) {
670 task_reference(p->task);
671#if CONFIG_MACF
672 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
673 if (error) {
674 task_deallocate(p->task);
675 goto noperm;
676 }
677#endif
678 sright = (void *)convert_task_name_to_port(p->task);
679 tret = ipc_port_copyout_send(sright,
0c530ab8 680 get_task_ipcspace(current_task()));
2d21ac55
A
681 } else
682 tret = MACH_PORT_NULL;
683
684 AUDIT_ARG(mach_port2, tret);
685 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
686 task_deallocate(t1);
687 error = KERN_SUCCESS;
688 goto tnfpout;
689 }
0c530ab8
A
690 }
691
2d21ac55
A
692#if CONFIG_MACF
693noperm:
694#endif
695 task_deallocate(t1);
0c530ab8
A
696 tret = MACH_PORT_NULL;
697 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
698 error = KERN_FAILURE;
699tnfpout:
2d21ac55
A
700 if (refheld != 0)
701 kauth_cred_unref(&target_cred);
702 if (p != PROC_NULL)
703 proc_rele(p);
0c530ab8
A
704 AUDIT_MACH_SYSCALL_EXIT(error);
705 return(error);
706}
707
708static int
709sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
710 __unused int arg2, struct sysctl_req *req)
711{
712 int error = 0;
713 int new_value;
714
715 error = SYSCTL_OUT(req, arg1, sizeof(int));
716 if (error || req->newptr == USER_ADDR_NULL)
717 return(error);
718
719 if (!is_suser())
720 return(EPERM);
721
722 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
723 goto out;
724 }
725 if ((new_value == KERN_TFP_POLICY_DENY)
2d21ac55 726 || (new_value == KERN_TFP_POLICY_DEFAULT))
0c530ab8
A
727 tfp_policy = new_value;
728 else
729 error = EINVAL;
730out:
731 return(error);
732
733}
734
2d21ac55
A
735#if defined(SECURE_KERNEL)
736static int kern_secure_kernel = 1;
737#else
738static int kern_secure_kernel = 0;
739#endif
0c530ab8 740
2d21ac55 741SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
0c530ab8 742
2d21ac55 743SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
0c530ab8
A
744SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
745 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
0c530ab8 746
2d21ac55
A
747SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
748 &shared_region_trace_level, 0, "");
749SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
750 &shared_region_version, 0, "");
751SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
752 &shared_region_persistence, 0, "");
1c79356b 753
91447636 754/*
2d21ac55 755 * shared_region_check_np:
91447636 756 *
2d21ac55
A
757 * This system call is intended for dyld.
758 *
759 * dyld calls this when any process starts to see if the process's shared
760 * region is already set up and ready to use.
761 * This call returns the base address of the first mapping in the
762 * process's shared region's first mapping.
763 * dyld will then check what's mapped at that address.
764 *
765 * If the shared region is empty, dyld will then attempt to map the shared
766 * cache file in the shared region via the shared_region_map_np() system call.
767 *
768 * If something's already mapped in the shared region, dyld will check if it
769 * matches the shared cache it would like to use for that process.
770 * If it matches, evrything's ready and the process can proceed and use the
771 * shared region.
772 * If it doesn't match, dyld will unmap the shared region and map the shared
773 * cache into the process's address space via mmap().
774 *
775 * ERROR VALUES
776 * EINVAL no shared region
777 * ENOMEM shared region is empty
778 * EFAULT bad address for "start_address"
91447636
A
779 */
780int
2d21ac55
A
781shared_region_check_np(
782 __unused struct proc *p,
783 struct shared_region_check_np_args *uap,
784 __unused int *retvalp)
91447636 785{
2d21ac55
A
786 vm_shared_region_t shared_region;
787 mach_vm_offset_t start_address;
788 int error;
789 kern_return_t kr;
790
791 SHARED_REGION_TRACE_DEBUG(
792 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
793 current_thread(), p->p_pid, p->p_comm,
794 (uint64_t)uap->start_address));
795
796 /* retrieve the current tasks's shared region */
797 shared_region = vm_shared_region_get(current_task());
798 if (shared_region != NULL) {
799 /* retrieve address of its first mapping... */
800 kr = vm_shared_region_start_address(shared_region,
801 &start_address);
91447636
A
802 if (kr != KERN_SUCCESS) {
803 error = ENOMEM;
2d21ac55
A
804 } else {
805 /* ... and give it to the caller */
806 error = copyout(&start_address,
807 (user_addr_t) uap->start_address,
808 sizeof (start_address));
809 if (error) {
810 SHARED_REGION_TRACE_ERROR(
811 ("shared_region: %p [%d(%s)] "
812 "check_np(0x%llx) "
813 "copyout(0x%llx) error %d\n",
814 current_thread(), p->p_pid, p->p_comm,
815 (uint64_t)uap->start_address, (uint64_t)start_address,
816 error));
817 }
91447636 818 }
2d21ac55 819 vm_shared_region_deallocate(shared_region);
91447636 820 } else {
2d21ac55 821 /* no shared region ! */
91447636 822 error = EINVAL;
91447636 823 }
0c530ab8 824
2d21ac55
A
825 SHARED_REGION_TRACE_DEBUG(
826 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
0c530ab8 827 current_thread(), p->p_pid, p->p_comm,
2d21ac55 828 (uint64_t)uap->start_address, (uint64_t)start_address, error));
0c530ab8 829
91447636
A
830 return error;
831}
832
91447636 833/*
2d21ac55 834 * shared_region_map_np()
91447636 835 *
2d21ac55 836 * This system call is intended for dyld.
91447636 837 *
2d21ac55
A
838 * dyld uses this to map a shared cache file into a shared region.
839 * This is usually done only the first time a shared cache is needed.
840 * Subsequent processes will just use the populated shared region without
841 * requiring any further setup.
91447636
A
842 */
843int
2d21ac55 844shared_region_map_np(
91447636 845 struct proc *p,
2d21ac55 846 struct shared_region_map_np_args *uap,
91447636
A
847 __unused int *retvalp)
848{
2d21ac55
A
849 int error;
850 kern_return_t kr;
851 int fd;
852 struct fileproc *fp;
853 struct vnode *vp, *root_vp;
854 struct vnode_attr va;
855 off_t fs;
856 memory_object_size_t file_size;
857 user_addr_t user_mappings;
858 struct shared_file_mapping_np *mappings;
4a3eedf9 859#define SFM_MAX_STACK 8
2d21ac55
A
860 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
861 unsigned int mappings_count;
862 vm_size_t mappings_size;
863 memory_object_control_t file_control;
864 struct vm_shared_region *shared_region;
865
866 SHARED_REGION_TRACE_DEBUG(
867 ("shared_region: %p [%d(%s)] -> map\n",
868 current_thread(), p->p_pid, p->p_comm));
869
870 shared_region = NULL;
871 mappings_count = 0;
8ad349bb 872 mappings_size = 0;
91447636 873 mappings = NULL;
91447636
A
874 fp = NULL;
875 vp = NULL;
876
2d21ac55 877 /* get file descriptor for shared region cache file */
91447636
A
878 fd = uap->fd;
879
880 /* get file structure from file descriptor */
881 error = fp_lookup(p, fd, &fp, 0);
882 if (error) {
2d21ac55
A
883 SHARED_REGION_TRACE_ERROR(
884 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
885 "fd=%d lookup failed (error=%d)\n",
886 current_thread(), p->p_pid, p->p_comm, fd, error));
91447636
A
887 goto done;
888 }
889
890 /* make sure we're attempting to map a vnode */
891 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
2d21ac55
A
892 SHARED_REGION_TRACE_ERROR(
893 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
894 "fd=%d not a vnode (type=%d)\n",
895 current_thread(), p->p_pid, p->p_comm,
896 fd, fp->f_fglob->fg_type));
91447636
A
897 error = EINVAL;
898 goto done;
899 }
900
901 /* we need at least read permission on the file */
902 if (! (fp->f_fglob->fg_flag & FREAD)) {
2d21ac55
A
903 SHARED_REGION_TRACE_ERROR(
904 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
905 "fd=%d not readable\n",
906 current_thread(), p->p_pid, p->p_comm, fd));
91447636
A
907 error = EPERM;
908 goto done;
909 }
910
911 /* get vnode from file structure */
2d21ac55 912 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
91447636 913 if (error) {
2d21ac55
A
914 SHARED_REGION_TRACE_ERROR(
915 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
916 "fd=%d getwithref failed (error=%d)\n",
917 current_thread(), p->p_pid, p->p_comm, fd, error));
91447636
A
918 goto done;
919 }
920 vp = (struct vnode *) fp->f_fglob->fg_data;
921
922 /* make sure the vnode is a regular file */
923 if (vp->v_type != VREG) {
2d21ac55
A
924 SHARED_REGION_TRACE_ERROR(
925 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
0c530ab8
A
926 "not a file (type=%d)\n",
927 current_thread(), p->p_pid, p->p_comm,
928 vp, vp->v_name, vp->v_type));
91447636
A
929 error = EINVAL;
930 goto done;
931 }
932
2d21ac55
A
933 /* make sure vnode is on the process's root volume */
934 root_vp = p->p_fd->fd_rdir;
935 if (root_vp == NULL) {
936 root_vp = rootvnode;
91447636 937 }
2d21ac55
A
938 if (vp->v_mount != root_vp->v_mount) {
939 SHARED_REGION_TRACE_ERROR(
940 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
941 "not on process's root volume\n",
0c530ab8
A
942 current_thread(), p->p_pid, p->p_comm,
943 vp, vp->v_name));
2d21ac55 944 error = EPERM;
91447636 945 goto done;
91447636
A
946 }
947
2d21ac55
A
948 /* make sure vnode is owned by "root" */
949 VATTR_INIT(&va);
950 VATTR_WANTED(&va, va_uid);
951 error = vnode_getattr(vp, &va, vfs_context_current());
952 if (error) {
953 SHARED_REGION_TRACE_ERROR(
954 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
955 "vnode_getattr(%p) failed (error=%d)\n",
0c530ab8 956 current_thread(), p->p_pid, p->p_comm,
2d21ac55 957 vp, vp->v_name, vp, error));
91447636
A
958 goto done;
959 }
2d21ac55
A
960 if (va.va_uid != 0) {
961 SHARED_REGION_TRACE_ERROR(
962 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
963 "owned by uid=%d instead of 0\n",
964 current_thread(), p->p_pid, p->p_comm,
965 vp, vp->v_name, va.va_uid));
966 error = EPERM;
967 goto done;
91447636 968 }
2d21ac55
A
969
970 /* get vnode size */
971 error = vnode_size(vp, &fs, vfs_context_current());
972 if (error) {
973 SHARED_REGION_TRACE_ERROR(
974 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
975 "vnode_size(%p) failed (error=%d)\n",
976 current_thread(), p->p_pid, p->p_comm,
977 vp, vp->v_name, vp, error));
978 goto done;
91447636 979 }
2d21ac55 980 file_size = fs;
91447636
A
981
982 /* get the file's memory object handle */
91447636
A
983 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
984 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
2d21ac55
A
985 SHARED_REGION_TRACE_ERROR(
986 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
987 "no memory object\n",
0c530ab8
A
988 current_thread(), p->p_pid, p->p_comm,
989 vp, vp->v_name));
91447636
A
990 error = EINVAL;
991 goto done;
992 }
2d21ac55
A
993
994 /* get the list of mappings the caller wants us to establish */
995 mappings_count = uap->count; /* number of mappings */
996 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
997 if (mappings_count == 0) {
998 SHARED_REGION_TRACE_INFO(
999 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1000 "no mappings\n",
1001 current_thread(), p->p_pid, p->p_comm,
1002 vp, vp->v_name));
1003 error = 0; /* no mappings: we're done ! */
1004 goto done;
1005 } else if (mappings_count <= SFM_MAX_STACK) {
1006 mappings = &stack_mappings[0];
91447636 1007 } else {
2d21ac55
A
1008 SHARED_REGION_TRACE_ERROR(
1009 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1010 "too many mappings (%d)\n",
1011 current_thread(), p->p_pid, p->p_comm,
1012 vp, vp->v_name, mappings_count));
1013 error = EINVAL;
1014 goto done;
91447636 1015 }
91447636 1016
2d21ac55
A
1017 user_mappings = uap->mappings; /* the mappings, in user space */
1018 error = copyin(user_mappings,
1019 mappings,
1020 mappings_size);
1021 if (error) {
1022 SHARED_REGION_TRACE_ERROR(
1023 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1024 "copyin(0x%llx, %d) failed (error=%d)\n",
0c530ab8 1025 current_thread(), p->p_pid, p->p_comm,
2d21ac55 1026 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
91447636
A
1027 goto done;
1028 }
1029
2d21ac55
A
1030 /* get the process's shared region (setup in vm_map_exec()) */
1031 shared_region = vm_shared_region_get(current_task());
1032 if (shared_region == NULL) {
1033 SHARED_REGION_TRACE_ERROR(
1034 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1035 "no shared region\n",
1036 current_thread(), p->p_pid, p->p_comm,
1037 vp, vp->v_name));
1038 goto done;
1039 }
91447636 1040
2d21ac55
A
1041 /* map the file into that shared region's submap */
1042 kr = vm_shared_region_map_file(shared_region,
1043 mappings_count,
1044 mappings,
1045 file_control,
1046 file_size,
1047 (void *) p->p_fd->fd_rdir);
1048 if (kr != KERN_SUCCESS) {
1049 SHARED_REGION_TRACE_ERROR(
1050 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1051 "vm_shared_region_map_file() failed kr=0x%x\n",
0c530ab8
A
1052 current_thread(), p->p_pid, p->p_comm,
1053 vp, vp->v_name, kr));
1054 switch (kr) {
1055 case KERN_INVALID_ADDRESS:
1056 error = EFAULT;
2d21ac55 1057 break;
0c530ab8
A
1058 case KERN_PROTECTION_FAILURE:
1059 error = EPERM;
2d21ac55 1060 break;
0c530ab8
A
1061 case KERN_NO_SPACE:
1062 error = ENOMEM;
2d21ac55 1063 break;
0c530ab8
A
1064 case KERN_FAILURE:
1065 case KERN_INVALID_ARGUMENT:
1066 default:
1067 error = EINVAL;
2d21ac55 1068 break;
0c530ab8 1069 }
2d21ac55 1070 goto done;
91447636
A
1071 }
1072
2d21ac55
A
1073 error = 0;
1074
1075 /* update the vnode's access time */
1076 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1077 VATTR_INIT(&va);
1078 nanotime(&va.va_access_time);
1079 VATTR_SET_ACTIVE(&va, va_access_time);
1080 vnode_setattr(vp, &va, vfs_context_current());
91447636
A
1081 }
1082
2d21ac55
A
1083 if (p->p_flag & P_NOSHLIB) {
1084 /* signal that this process is now using split libraries */
b0d623f7 1085 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
91447636
A
1086 }
1087
1088done:
1089 if (vp != NULL) {
1090 /*
1091 * release the vnode...
1092 * ubc_map() still holds it for us in the non-error case
1093 */
1094 (void) vnode_put(vp);
1095 vp = NULL;
1096 }
1097 if (fp != NULL) {
1098 /* release the file descriptor */
1099 fp_drop(p, fd, fp, 0);
1100 fp = NULL;
1101 }
9bccf70c 1102
2d21ac55
A
1103 if (shared_region != NULL) {
1104 vm_shared_region_deallocate(shared_region);
9bccf70c
A
1105 }
1106
2d21ac55
A
1107 SHARED_REGION_TRACE_DEBUG(
1108 ("shared_region: %p [%d(%s)] <- map\n",
1109 current_thread(), p->p_pid, p->p_comm));
9bccf70c 1110
2d21ac55 1111 return error;
9bccf70c
A
1112}
1113
9bccf70c 1114
2d21ac55 1115/* sysctl overflow room */
9bccf70c 1116
2d21ac55
A
1117/* vm_page_free_target is provided as a makeshift solution for applications that want to
1118 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1119 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1120extern unsigned int vm_page_free_target;
1121SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1122 &vm_page_free_target, 0, "Pageout daemon free target");
9bccf70c 1123
b0d623f7
A
1124extern unsigned int vm_memory_pressure;
1125SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD,
1126 &vm_memory_pressure, 0, "Memory pressure indicator");
1127
1128static int
1129vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1130{
1131#pragma unused(oidp, arg1, arg2)
1132 unsigned int page_free_wanted;
1133
1134 page_free_wanted = mach_vm_ctl_page_free_wanted();
1135 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1136}
1137SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1138 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1139 0, 0, vm_ctl_page_free_wanted, "I", "");
1140
1141extern unsigned int vm_page_purgeable_count;
1142SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD,
1143 &vm_page_purgeable_count, 0, "Purgeable page count");
1144
1145extern unsigned int vm_page_purgeable_wired_count;
1146SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD,
1147 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1148
1149SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD,
1150 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1151SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD,
1152 &vm_page_stats_reusable.reusable_pages_success, "");
1153SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD,
1154 &vm_page_stats_reusable.reusable_pages_failure, "");
1155SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD,
1156 &vm_page_stats_reusable.reusable_pages_shared, "");
1157SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD,
1158 &vm_page_stats_reusable.all_reusable_calls, "");
1159SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD,
1160 &vm_page_stats_reusable.partial_reusable_calls, "");
1161SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD,
1162 &vm_page_stats_reusable.reuse_pages_success, "");
1163SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD,
1164 &vm_page_stats_reusable.reuse_pages_failure, "");
1165SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD,
1166 &vm_page_stats_reusable.all_reuse_calls, "");
1167SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD,
1168 &vm_page_stats_reusable.partial_reuse_calls, "");
1169SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD,
1170 &vm_page_stats_reusable.can_reuse_success, "");
1171SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD,
1172 &vm_page_stats_reusable.can_reuse_failure, "");
1173
1174
1175int
1176vm_pressure_monitor(
1177 __unused struct proc *p,
1178 struct vm_pressure_monitor_args *uap,
1179 int *retval)
1180{
1181 kern_return_t kr;
1182 uint32_t pages_reclaimed;
1183 uint32_t pages_wanted;
1184
1185 kr = mach_vm_pressure_monitor(
1186 (boolean_t) uap->wait_for_pressure,
1187 uap->nsecs_monitored,
1188 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1189 &pages_wanted);
1190
1191 switch (kr) {
1192 case KERN_SUCCESS:
1193 break;
1194 case KERN_ABORTED:
1195 return EINTR;
1196 default:
1197 return EINVAL;
1198 }
1199
1200 if (uap->pages_reclaimed) {
1201 if (copyout((void *)&pages_reclaimed,
1202 uap->pages_reclaimed,
1203 sizeof (pages_reclaimed)) != 0) {
1204 return EFAULT;
1205 }
1206 }
1207
1208 *retval = (int) pages_wanted;
1209 return 0;
1210}