]> git.saurik.com Git - apple/xnu.git/blame - bsd/vm/vm_unix.c
xnu-1228.12.14.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
1c79356b 34/*
2d21ac55
A
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
1c79356b 39 */
9bccf70c 40
1c79356b
A
41#include <meta_features.h>
42
43#include <kern/task.h>
44#include <kern/thread.h>
45#include <kern/debug.h>
46#include <kern/lock.h>
91447636 47#include <mach/mach_traps.h>
2d21ac55
A
48#include <mach/port.h>
49#include <mach/task.h>
50#include <mach/task_access.h>
51#include <mach/task_special_ports.h>
1c79356b 52#include <mach/time_value.h>
91447636 53#include <mach/vm_map.h>
1c79356b
A
54#include <mach/vm_param.h>
55#include <mach/vm_prot.h>
1c79356b 56
91447636 57#include <sys/file_internal.h>
1c79356b
A
58#include <sys/param.h>
59#include <sys/systm.h>
60#include <sys/dir.h>
61#include <sys/namei.h>
91447636
A
62#include <sys/proc_internal.h>
63#include <sys/kauth.h>
1c79356b
A
64#include <sys/vm.h>
65#include <sys/file.h>
91447636 66#include <sys/vnode_internal.h>
1c79356b
A
67#include <sys/mount.h>
68#include <sys/trace.h>
69#include <sys/kernel.h>
91447636
A
70#include <sys/ubc_internal.h>
71#include <sys/user.h>
0c530ab8 72#include <sys/syslog.h>
9bccf70c 73#include <sys/stat.h>
91447636
A
74#include <sys/sysproto.h>
75#include <sys/mman.h>
0c530ab8 76#include <sys/sysctl.h>
1c79356b 77
e5568f75
A
78#include <bsm/audit_kernel.h>
79#include <bsm/audit_kevents.h>
80
1c79356b 81#include <kern/kalloc.h>
1c79356b
A
82#include <vm/vm_map.h>
83#include <vm/vm_kern.h>
84
85#include <machine/spl.h>
9bccf70c 86
2d21ac55
A
87#include <mach/shared_region.h>
88#include <vm/vm_shared_region.h>
9bccf70c 89
91447636 90#include <vm/vm_protos.h>
9bccf70c 91
2d21ac55
A
92/*
93 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
94 */
95
4a3eedf9 96#ifndef SECURE_KERNEL
2d21ac55
A
97extern int allow_stack_exec, allow_data_exec;
98
99SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
100SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
4a3eedf9 101#endif /* !SECURE_KERNEL */
2d21ac55
A
102
103#if CONFIG_NO_PRINTF_STRINGS
104void
105log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b)
106{
107}
108#else
109static const char *prot_values[] = {
110 "none",
111 "read-only",
112 "write-only",
113 "read-write",
114 "execute-only",
115 "read-execute",
116 "write-execute",
117 "read-write-execute"
118};
119
0c530ab8 120void
2d21ac55 121log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
0c530ab8 122{
2d21ac55
A
123 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
124 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
0c530ab8 125}
2d21ac55 126#endif
0c530ab8 127
1c79356b 128
91447636
A
129int
130useracc(
131 user_addr_t addr,
132 user_size_t len,
133 int prot)
1c79356b
A
134{
135 return (vm_map_check_protection(
136 current_map(),
91447636 137 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
1c79356b
A
138 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
139}
140
91447636
A
141int
142vslock(
143 user_addr_t addr,
144 user_size_t len)
1c79356b 145{
91447636
A
146 kern_return_t kret;
147 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
148 vm_map_round_page(addr+len),
1c79356b 149 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
0b4e3aa0
A
150
151 switch (kret) {
152 case KERN_SUCCESS:
153 return (0);
154 case KERN_INVALID_ADDRESS:
155 case KERN_NO_SPACE:
156 return (ENOMEM);
157 case KERN_PROTECTION_FAILURE:
158 return (EACCES);
159 default:
160 return (EINVAL);
161 }
1c79356b
A
162}
163
91447636
A
164int
165vsunlock(
166 user_addr_t addr,
167 user_size_t len,
168 __unused int dirtied)
1c79356b 169{
1c79356b 170#if FIXME /* [ */
91447636 171 pmap_t pmap;
1c79356b 172 vm_page_t pg;
91447636
A
173 vm_map_offset_t vaddr;
174 ppnum_t paddr;
1c79356b 175#endif /* FIXME ] */
0b4e3aa0 176 kern_return_t kret;
1c79356b
A
177
178#if FIXME /* [ */
179 if (dirtied) {
180 pmap = get_task_pmap(current_task());
91447636
A
181 for (vaddr = vm_map_trunc_page(addr);
182 vaddr < vm_map_round_page(addr+len);
1c79356b
A
183 vaddr += PAGE_SIZE) {
184 paddr = pmap_extract(pmap, vaddr);
185 pg = PHYS_TO_VM_PAGE(paddr);
186 vm_page_set_modified(pg);
187 }
188 }
189#endif /* FIXME ] */
190#ifdef lint
191 dirtied++;
192#endif /* lint */
91447636
A
193 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
194 vm_map_round_page(addr+len), FALSE);
0b4e3aa0
A
195 switch (kret) {
196 case KERN_SUCCESS:
197 return (0);
198 case KERN_INVALID_ADDRESS:
199 case KERN_NO_SPACE:
200 return (ENOMEM);
201 case KERN_PROTECTION_FAILURE:
202 return (EACCES);
203 default:
204 return (EINVAL);
205 }
1c79356b
A
206}
207
91447636
A
208int
209subyte(
210 user_addr_t addr,
211 int byte)
1c79356b
A
212{
213 char character;
214
215 character = (char)byte;
216 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
217}
218
91447636
A
219int
220suibyte(
221 user_addr_t addr,
222 int byte)
1c79356b
A
223{
224 char character;
225
226 character = (char)byte;
91447636 227 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
1c79356b
A
228}
229
91447636 230int fubyte(user_addr_t addr)
1c79356b
A
231{
232 unsigned char byte;
233
234 if (copyin(addr, (void *) &byte, sizeof(char)))
235 return(-1);
236 return(byte);
237}
238
91447636 239int fuibyte(user_addr_t addr)
1c79356b
A
240{
241 unsigned char byte;
242
243 if (copyin(addr, (void *) &(byte), sizeof(char)))
244 return(-1);
245 return(byte);
246}
247
91447636
A
248int
249suword(
250 user_addr_t addr,
251 long word)
1c79356b
A
252{
253 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
254}
255
91447636 256long fuword(user_addr_t addr)
1c79356b
A
257{
258 long word;
259
260 if (copyin(addr, (void *) &word, sizeof(int)))
261 return(-1);
262 return(word);
263}
264
265/* suiword and fuiword are the same as suword and fuword, respectively */
266
91447636
A
267int
268suiword(
269 user_addr_t addr,
270 long word)
1c79356b
A
271{
272 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
273}
274
91447636 275long fuiword(user_addr_t addr)
1c79356b
A
276{
277 long word;
278
279 if (copyin(addr, (void *) &word, sizeof(int)))
280 return(-1);
281 return(word);
282}
91447636
A
283
284/*
285 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
286 * fetching and setting of process-sized size_t and pointer values.
287 */
288int
289sulong(user_addr_t addr, int64_t word)
290{
291
292 if (IS_64BIT_PROCESS(current_proc())) {
293 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
294 } else {
295 return(suiword(addr, (long)word));
296 }
297}
298
299int64_t
300fulong(user_addr_t addr)
301{
302 int64_t longword;
303
304 if (IS_64BIT_PROCESS(current_proc())) {
305 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
306 return(-1);
307 return(longword);
308 } else {
309 return((int64_t)fuiword(addr));
310 }
311}
1c79356b
A
312
313int
91447636
A
314suulong(user_addr_t addr, uint64_t uword)
315{
316
317 if (IS_64BIT_PROCESS(current_proc())) {
318 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
319 } else {
320 return(suiword(addr, (u_long)uword));
321 }
322}
323
324uint64_t
325fuulong(user_addr_t addr)
1c79356b 326{
91447636
A
327 uint64_t ulongword;
328
329 if (IS_64BIT_PROCESS(current_proc())) {
330 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
331 return(-1ULL);
332 return(ulongword);
333 } else {
334 return((uint64_t)fuiword(addr));
335 }
336}
337
338int
2d21ac55 339swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
91447636
A
340{
341 return(ENOTSUP);
1c79356b
A
342}
343
1c79356b
A
344
345kern_return_t
91447636
A
346pid_for_task(
347 struct pid_for_task_args *args)
1c79356b 348{
91447636
A
349 mach_port_name_t t = args->t;
350 user_addr_t pid_addr = args->pid;
2d21ac55 351 proc_t p;
1c79356b 352 task_t t1;
1c79356b 353 int pid = -1;
0b4e3aa0 354 kern_return_t err = KERN_SUCCESS;
1c79356b 355
e5568f75
A
356 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
357 AUDIT_ARG(mach_port1, t);
358
1c79356b
A
359 t1 = port_name_to_task(t);
360
361 if (t1 == TASK_NULL) {
362 err = KERN_FAILURE;
0b4e3aa0 363 goto pftout;
1c79356b
A
364 } else {
365 p = get_bsdtask_info(t1);
366 if (p) {
91447636 367 pid = proc_pid(p);
1c79356b
A
368 err = KERN_SUCCESS;
369 } else {
370 err = KERN_FAILURE;
371 }
372 }
373 task_deallocate(t1);
1c79356b 374pftout:
e5568f75 375 AUDIT_ARG(pid, pid);
91447636 376 (void) copyout((char *) &pid, pid_addr, sizeof(int));
e5568f75 377 AUDIT_MACH_SYSCALL_EXIT(err);
1c79356b
A
378 return(err);
379}
380
2d21ac55
A
381/*
382 *
383 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
384 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
385 *
386 */
387static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
388
389/*
390 * Routine: task_for_pid_posix_check
391 * Purpose:
392 * Verify that the current process should be allowed to
393 * get the target process's task port. This is only
394 * permitted if:
395 * - The current process is root
396 * OR all of the following are true:
397 * - The target process's real, effective, and saved uids
398 * are the same as the current proc's euid,
399 * - The target process's group set is a subset of the
400 * calling process's group set, and
401 * - The target process hasn't switched credentials.
402 *
403 * Returns: TRUE: permitted
404 * FALSE: denied
405 */
406static int
407task_for_pid_posix_check(proc_t target)
408{
409 kauth_cred_t targetcred, mycred;
410 uid_t myuid;
411 int allowed;
412
413 /* No task_for_pid on bad targets */
414 if (target == PROC_NULL || target->p_stat == SZOMB) {
415 return FALSE;
416 }
417
418 mycred = kauth_cred_get();
419 myuid = kauth_cred_getuid(mycred);
420
421 /* If we're running as root, the check passes */
422 if (kauth_cred_issuser(mycred))
423 return TRUE;
424
425 /* We're allowed to get our own task port */
426 if (target == current_proc())
427 return TRUE;
428
429 /*
430 * Under DENY, only root can get another proc's task port,
431 * so no more checks are needed.
432 */
433 if (tfp_policy == KERN_TFP_POLICY_DENY) {
434 return FALSE;
435 }
436
437 targetcred = kauth_cred_proc_ref(target);
438 allowed = TRUE;
439
440 /* Do target's ruid, euid, and saved uid match my euid? */
441 if ((kauth_cred_getuid(targetcred) != myuid) ||
442 (targetcred->cr_ruid != myuid) ||
443 (targetcred->cr_svuid != myuid)) {
444 allowed = FALSE;
445 goto out;
446 }
447
448 /* Are target's groups a subset of my groups? */
449 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
450 allowed == 0) {
451 allowed = FALSE;
452 goto out;
453 }
454
455 /* Has target switched credentials? */
456 if (target->p_flag & P_SUGID) {
457 allowed = FALSE;
458 goto out;
459 }
460
461out:
462 kauth_cred_unref(&targetcred);
463 return allowed;
464}
465
1c79356b
A
466/*
467 * Routine: task_for_pid
468 * Purpose:
469 * Get the task port for another "process", named by its
470 * process ID on the same host as "target_task".
471 *
472 * Only permitted to privileged processes, or processes
473 * with the same user ID.
91447636
A
474 *
475 * XXX This should be a BSD system call, not a Mach trap!!!
1c79356b
A
476 */
477kern_return_t
91447636
A
478task_for_pid(
479 struct task_for_pid_args *args)
1c79356b 480{
91447636
A
481 mach_port_name_t target_tport = args->target_tport;
482 int pid = args->pid;
483 user_addr_t task_addr = args->t;
484 struct uthread *uthread;
2d21ac55
A
485 proc_t p = PROC_NULL;
486 task_t t1 = TASK_NULL;
487 mach_port_name_t tret = MACH_PORT_NULL;
488 ipc_port_t tfpport;
1c79356b
A
489 void * sright;
490 int error = 0;
1c79356b 491
e5568f75
A
492 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
493 AUDIT_ARG(pid, pid);
494 AUDIT_ARG(mach_port1, target_tport);
495
2d21ac55
A
496#if defined(SECURE_KERNEL)
497 if (0 == pid) {
498 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
499 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
500 return(KERN_FAILURE);
501 }
502#endif
503
1c79356b
A
504 t1 = port_name_to_task(target_tport);
505 if (t1 == TASK_NULL) {
2d21ac55 506 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
e5568f75 507 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
0b4e3aa0 508 return(KERN_FAILURE);
1c79356b
A
509 }
510
91447636
A
511
512 /*
513 * Delayed binding of thread credential to process credential, if we
514 * are not running with an explicitly set thread credential.
515 */
516 uthread = get_bsdthread_info(current_thread());
2d21ac55 517 kauth_cred_uthread_update(uthread, current_proc());
91447636 518
2d21ac55 519 p = proc_find(pid);
e5568f75 520 AUDIT_ARG(process, p);
91447636 521
2d21ac55
A
522 if (!(task_for_pid_posix_check(p))) {
523 error = KERN_FAILURE;
524 goto tfpout;
525 }
0c530ab8 526
2d21ac55
A
527 if (p->task != TASK_NULL) {
528 /* If we aren't root and target's task access port is set... */
529 if (!kauth_cred_issuser(kauth_cred_get()) &&
cf7d32b8 530 p != current_proc() &&
2d21ac55
A
531 (task_get_task_access_port(p->task, &tfpport) == 0) &&
532 (tfpport != IPC_PORT_NULL)) {
0c530ab8 533
2d21ac55
A
534 if (tfpport == IPC_PORT_DEAD) {
535 error = KERN_PROTECTION_FAILURE;
536 goto tfpout;
537 }
0c530ab8 538
2d21ac55
A
539 /* Call up to the task access server */
540 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
0c530ab8 541
2d21ac55
A
542 if (error != MACH_MSG_SUCCESS) {
543 if (error == MACH_RCV_INTERRUPTED)
544 error = KERN_ABORTED;
545 else
546 error = KERN_FAILURE;
547 goto tfpout;
548 }
549 }
550#if CONFIG_MACF
551 error = mac_proc_check_get_task(kauth_cred_get(), p);
552 if (error) {
553 error = KERN_FAILURE;
1c79356b 554 goto tfpout;
2d21ac55
A
555 }
556#endif
557
558 /* Grant task port access */
559 task_reference(p->task);
560 sright = (void *) convert_task_to_port(p->task);
561 tret = ipc_port_copyout_send(
562 sright,
563 get_task_ipcspace(current_task()));
564 }
565 error = KERN_SUCCESS;
0c530ab8 566
1c79356b 567tfpout:
2d21ac55
A
568 task_deallocate(t1);
569 AUDIT_ARG(mach_port2, tret);
570 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
571 if (p != PROC_NULL)
572 proc_rele(p);
e5568f75 573 AUDIT_MACH_SYSCALL_EXIT(error);
1c79356b
A
574 return(error);
575}
576
0c530ab8
A
577/*
578 * Routine: task_name_for_pid
579 * Purpose:
580 * Get the task name port for another "process", named by its
581 * process ID on the same host as "target_task".
582 *
583 * Only permitted to privileged processes, or processes
584 * with the same user ID.
585 *
586 * XXX This should be a BSD system call, not a Mach trap!!!
587 */
588
589kern_return_t
590task_name_for_pid(
591 struct task_name_for_pid_args *args)
592{
593 mach_port_name_t target_tport = args->target_tport;
594 int pid = args->pid;
595 user_addr_t task_addr = args->t;
596 struct uthread *uthread;
2d21ac55 597 proc_t p = PROC_NULL;
0c530ab8
A
598 task_t t1;
599 mach_port_name_t tret;
600 void * sright;
2d21ac55
A
601 int error = 0, refheld = 0;
602 kauth_cred_t target_cred;
0c530ab8
A
603
604 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
605 AUDIT_ARG(pid, pid);
606 AUDIT_ARG(mach_port1, target_tport);
607
608 t1 = port_name_to_task(target_tport);
609 if (t1 == TASK_NULL) {
2d21ac55 610 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
0c530ab8
A
611 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
612 return(KERN_FAILURE);
613 }
614
0c530ab8
A
615
616 /*
617 * Delayed binding of thread credential to process credential, if we
618 * are not running with an explicitly set thread credential.
619 */
0c530ab8 620 uthread = get_bsdthread_info(current_thread());
2d21ac55 621 kauth_cred_uthread_update(uthread, current_proc());
0c530ab8 622
2d21ac55 623 p = proc_find(pid);
0c530ab8 624 AUDIT_ARG(process, p);
2d21ac55
A
625 if (p != PROC_NULL) {
626 target_cred = kauth_cred_proc_ref(p);
627 refheld = 1;
628
629 if ((p->p_stat != SZOMB)
630 && ((current_proc() == p)
631 || kauth_cred_issuser(kauth_cred_get())
632 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
633 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
634
635 if (p->task != TASK_NULL) {
636 task_reference(p->task);
637#if CONFIG_MACF
638 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
639 if (error) {
640 task_deallocate(p->task);
641 goto noperm;
642 }
643#endif
644 sright = (void *)convert_task_name_to_port(p->task);
645 tret = ipc_port_copyout_send(sright,
0c530ab8 646 get_task_ipcspace(current_task()));
2d21ac55
A
647 } else
648 tret = MACH_PORT_NULL;
649
650 AUDIT_ARG(mach_port2, tret);
651 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
652 task_deallocate(t1);
653 error = KERN_SUCCESS;
654 goto tnfpout;
655 }
0c530ab8
A
656 }
657
2d21ac55
A
658#if CONFIG_MACF
659noperm:
660#endif
661 task_deallocate(t1);
0c530ab8
A
662 tret = MACH_PORT_NULL;
663 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
664 error = KERN_FAILURE;
665tnfpout:
2d21ac55
A
666 if (refheld != 0)
667 kauth_cred_unref(&target_cred);
668 if (p != PROC_NULL)
669 proc_rele(p);
0c530ab8
A
670 AUDIT_MACH_SYSCALL_EXIT(error);
671 return(error);
672}
673
674static int
675sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
676 __unused int arg2, struct sysctl_req *req)
677{
678 int error = 0;
679 int new_value;
680
681 error = SYSCTL_OUT(req, arg1, sizeof(int));
682 if (error || req->newptr == USER_ADDR_NULL)
683 return(error);
684
685 if (!is_suser())
686 return(EPERM);
687
688 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
689 goto out;
690 }
691 if ((new_value == KERN_TFP_POLICY_DENY)
2d21ac55 692 || (new_value == KERN_TFP_POLICY_DEFAULT))
0c530ab8
A
693 tfp_policy = new_value;
694 else
695 error = EINVAL;
696out:
697 return(error);
698
699}
700
2d21ac55
A
701#if defined(SECURE_KERNEL)
702static int kern_secure_kernel = 1;
703#else
704static int kern_secure_kernel = 0;
705#endif
0c530ab8 706
2d21ac55 707SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
0c530ab8 708
2d21ac55 709SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
0c530ab8
A
710SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
711 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
0c530ab8 712
2d21ac55
A
713SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
714 &shared_region_trace_level, 0, "");
715SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
716 &shared_region_version, 0, "");
717SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
718 &shared_region_persistence, 0, "");
1c79356b 719
91447636 720/*
2d21ac55 721 * shared_region_check_np:
91447636 722 *
2d21ac55
A
723 * This system call is intended for dyld.
724 *
725 * dyld calls this when any process starts to see if the process's shared
726 * region is already set up and ready to use.
727 * This call returns the base address of the first mapping in the
728 * process's shared region's first mapping.
729 * dyld will then check what's mapped at that address.
730 *
731 * If the shared region is empty, dyld will then attempt to map the shared
732 * cache file in the shared region via the shared_region_map_np() system call.
733 *
734 * If something's already mapped in the shared region, dyld will check if it
735 * matches the shared cache it would like to use for that process.
736 * If it matches, evrything's ready and the process can proceed and use the
737 * shared region.
738 * If it doesn't match, dyld will unmap the shared region and map the shared
739 * cache into the process's address space via mmap().
740 *
741 * ERROR VALUES
742 * EINVAL no shared region
743 * ENOMEM shared region is empty
744 * EFAULT bad address for "start_address"
91447636
A
745 */
746int
2d21ac55
A
747shared_region_check_np(
748 __unused struct proc *p,
749 struct shared_region_check_np_args *uap,
750 __unused int *retvalp)
91447636 751{
2d21ac55
A
752 vm_shared_region_t shared_region;
753 mach_vm_offset_t start_address;
754 int error;
755 kern_return_t kr;
756
757 SHARED_REGION_TRACE_DEBUG(
758 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
759 current_thread(), p->p_pid, p->p_comm,
760 (uint64_t)uap->start_address));
761
762 /* retrieve the current tasks's shared region */
763 shared_region = vm_shared_region_get(current_task());
764 if (shared_region != NULL) {
765 /* retrieve address of its first mapping... */
766 kr = vm_shared_region_start_address(shared_region,
767 &start_address);
91447636
A
768 if (kr != KERN_SUCCESS) {
769 error = ENOMEM;
2d21ac55
A
770 } else {
771 /* ... and give it to the caller */
772 error = copyout(&start_address,
773 (user_addr_t) uap->start_address,
774 sizeof (start_address));
775 if (error) {
776 SHARED_REGION_TRACE_ERROR(
777 ("shared_region: %p [%d(%s)] "
778 "check_np(0x%llx) "
779 "copyout(0x%llx) error %d\n",
780 current_thread(), p->p_pid, p->p_comm,
781 (uint64_t)uap->start_address, (uint64_t)start_address,
782 error));
783 }
91447636 784 }
2d21ac55 785 vm_shared_region_deallocate(shared_region);
91447636 786 } else {
2d21ac55 787 /* no shared region ! */
91447636 788 error = EINVAL;
91447636 789 }
0c530ab8 790
2d21ac55
A
791 SHARED_REGION_TRACE_DEBUG(
792 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
0c530ab8 793 current_thread(), p->p_pid, p->p_comm,
2d21ac55 794 (uint64_t)uap->start_address, (uint64_t)start_address, error));
0c530ab8 795
91447636
A
796 return error;
797}
798
91447636 799/*
2d21ac55 800 * shared_region_map_np()
91447636 801 *
2d21ac55 802 * This system call is intended for dyld.
91447636 803 *
2d21ac55
A
804 * dyld uses this to map a shared cache file into a shared region.
805 * This is usually done only the first time a shared cache is needed.
806 * Subsequent processes will just use the populated shared region without
807 * requiring any further setup.
91447636
A
808 */
809int
2d21ac55 810shared_region_map_np(
91447636 811 struct proc *p,
2d21ac55 812 struct shared_region_map_np_args *uap,
91447636
A
813 __unused int *retvalp)
814{
2d21ac55
A
815 int error;
816 kern_return_t kr;
817 int fd;
818 struct fileproc *fp;
819 struct vnode *vp, *root_vp;
820 struct vnode_attr va;
821 off_t fs;
822 memory_object_size_t file_size;
823 user_addr_t user_mappings;
824 struct shared_file_mapping_np *mappings;
4a3eedf9 825#define SFM_MAX_STACK 8
2d21ac55
A
826 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
827 unsigned int mappings_count;
828 vm_size_t mappings_size;
829 memory_object_control_t file_control;
830 struct vm_shared_region *shared_region;
831
832 SHARED_REGION_TRACE_DEBUG(
833 ("shared_region: %p [%d(%s)] -> map\n",
834 current_thread(), p->p_pid, p->p_comm));
835
836 shared_region = NULL;
837 mappings_count = 0;
8ad349bb 838 mappings_size = 0;
91447636 839 mappings = NULL;
91447636
A
840 fp = NULL;
841 vp = NULL;
842
2d21ac55 843 /* get file descriptor for shared region cache file */
91447636
A
844 fd = uap->fd;
845
846 /* get file structure from file descriptor */
847 error = fp_lookup(p, fd, &fp, 0);
848 if (error) {
2d21ac55
A
849 SHARED_REGION_TRACE_ERROR(
850 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
851 "fd=%d lookup failed (error=%d)\n",
852 current_thread(), p->p_pid, p->p_comm, fd, error));
91447636
A
853 goto done;
854 }
855
856 /* make sure we're attempting to map a vnode */
857 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
2d21ac55
A
858 SHARED_REGION_TRACE_ERROR(
859 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
860 "fd=%d not a vnode (type=%d)\n",
861 current_thread(), p->p_pid, p->p_comm,
862 fd, fp->f_fglob->fg_type));
91447636
A
863 error = EINVAL;
864 goto done;
865 }
866
867 /* we need at least read permission on the file */
868 if (! (fp->f_fglob->fg_flag & FREAD)) {
2d21ac55
A
869 SHARED_REGION_TRACE_ERROR(
870 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
871 "fd=%d not readable\n",
872 current_thread(), p->p_pid, p->p_comm, fd));
91447636
A
873 error = EPERM;
874 goto done;
875 }
876
877 /* get vnode from file structure */
2d21ac55 878 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
91447636 879 if (error) {
2d21ac55
A
880 SHARED_REGION_TRACE_ERROR(
881 ("shared_region: %p [%d(%s)] map: "
0c530ab8
A
882 "fd=%d getwithref failed (error=%d)\n",
883 current_thread(), p->p_pid, p->p_comm, fd, error));
91447636
A
884 goto done;
885 }
886 vp = (struct vnode *) fp->f_fglob->fg_data;
887
888 /* make sure the vnode is a regular file */
889 if (vp->v_type != VREG) {
2d21ac55
A
890 SHARED_REGION_TRACE_ERROR(
891 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
0c530ab8
A
892 "not a file (type=%d)\n",
893 current_thread(), p->p_pid, p->p_comm,
894 vp, vp->v_name, vp->v_type));
91447636
A
895 error = EINVAL;
896 goto done;
897 }
898
2d21ac55
A
899 /* make sure vnode is on the process's root volume */
900 root_vp = p->p_fd->fd_rdir;
901 if (root_vp == NULL) {
902 root_vp = rootvnode;
91447636 903 }
2d21ac55
A
904 if (vp->v_mount != root_vp->v_mount) {
905 SHARED_REGION_TRACE_ERROR(
906 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
907 "not on process's root volume\n",
0c530ab8
A
908 current_thread(), p->p_pid, p->p_comm,
909 vp, vp->v_name));
2d21ac55 910 error = EPERM;
91447636 911 goto done;
91447636
A
912 }
913
2d21ac55
A
914 /* make sure vnode is owned by "root" */
915 VATTR_INIT(&va);
916 VATTR_WANTED(&va, va_uid);
917 error = vnode_getattr(vp, &va, vfs_context_current());
918 if (error) {
919 SHARED_REGION_TRACE_ERROR(
920 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
921 "vnode_getattr(%p) failed (error=%d)\n",
0c530ab8 922 current_thread(), p->p_pid, p->p_comm,
2d21ac55 923 vp, vp->v_name, vp, error));
91447636
A
924 goto done;
925 }
2d21ac55
A
926 if (va.va_uid != 0) {
927 SHARED_REGION_TRACE_ERROR(
928 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
929 "owned by uid=%d instead of 0\n",
930 current_thread(), p->p_pid, p->p_comm,
931 vp, vp->v_name, va.va_uid));
932 error = EPERM;
933 goto done;
91447636 934 }
2d21ac55
A
935
936 /* get vnode size */
937 error = vnode_size(vp, &fs, vfs_context_current());
938 if (error) {
939 SHARED_REGION_TRACE_ERROR(
940 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
941 "vnode_size(%p) failed (error=%d)\n",
942 current_thread(), p->p_pid, p->p_comm,
943 vp, vp->v_name, vp, error));
944 goto done;
91447636 945 }
2d21ac55 946 file_size = fs;
91447636
A
947
948 /* get the file's memory object handle */
91447636
A
949 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
950 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
2d21ac55
A
951 SHARED_REGION_TRACE_ERROR(
952 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
953 "no memory object\n",
0c530ab8
A
954 current_thread(), p->p_pid, p->p_comm,
955 vp, vp->v_name));
91447636
A
956 error = EINVAL;
957 goto done;
958 }
2d21ac55
A
959
960 /* get the list of mappings the caller wants us to establish */
961 mappings_count = uap->count; /* number of mappings */
962 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
963 if (mappings_count == 0) {
964 SHARED_REGION_TRACE_INFO(
965 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
966 "no mappings\n",
967 current_thread(), p->p_pid, p->p_comm,
968 vp, vp->v_name));
969 error = 0; /* no mappings: we're done ! */
970 goto done;
971 } else if (mappings_count <= SFM_MAX_STACK) {
972 mappings = &stack_mappings[0];
91447636 973 } else {
2d21ac55
A
974 SHARED_REGION_TRACE_ERROR(
975 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
976 "too many mappings (%d)\n",
977 current_thread(), p->p_pid, p->p_comm,
978 vp, vp->v_name, mappings_count));
979 error = EINVAL;
980 goto done;
91447636 981 }
91447636 982
2d21ac55
A
983 user_mappings = uap->mappings; /* the mappings, in user space */
984 error = copyin(user_mappings,
985 mappings,
986 mappings_size);
987 if (error) {
988 SHARED_REGION_TRACE_ERROR(
989 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
990 "copyin(0x%llx, %d) failed (error=%d)\n",
0c530ab8 991 current_thread(), p->p_pid, p->p_comm,
2d21ac55 992 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
91447636
A
993 goto done;
994 }
995
2d21ac55
A
996 /* get the process's shared region (setup in vm_map_exec()) */
997 shared_region = vm_shared_region_get(current_task());
998 if (shared_region == NULL) {
999 SHARED_REGION_TRACE_ERROR(
1000 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1001 "no shared region\n",
1002 current_thread(), p->p_pid, p->p_comm,
1003 vp, vp->v_name));
1004 goto done;
1005 }
91447636 1006
2d21ac55
A
1007 /* map the file into that shared region's submap */
1008 kr = vm_shared_region_map_file(shared_region,
1009 mappings_count,
1010 mappings,
1011 file_control,
1012 file_size,
1013 (void *) p->p_fd->fd_rdir);
1014 if (kr != KERN_SUCCESS) {
1015 SHARED_REGION_TRACE_ERROR(
1016 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1017 "vm_shared_region_map_file() failed kr=0x%x\n",
0c530ab8
A
1018 current_thread(), p->p_pid, p->p_comm,
1019 vp, vp->v_name, kr));
1020 switch (kr) {
1021 case KERN_INVALID_ADDRESS:
1022 error = EFAULT;
2d21ac55 1023 break;
0c530ab8
A
1024 case KERN_PROTECTION_FAILURE:
1025 error = EPERM;
2d21ac55 1026 break;
0c530ab8
A
1027 case KERN_NO_SPACE:
1028 error = ENOMEM;
2d21ac55 1029 break;
0c530ab8
A
1030 case KERN_FAILURE:
1031 case KERN_INVALID_ARGUMENT:
1032 default:
1033 error = EINVAL;
2d21ac55 1034 break;
0c530ab8 1035 }
2d21ac55 1036 goto done;
91447636
A
1037 }
1038
2d21ac55
A
1039 error = 0;
1040
1041 /* update the vnode's access time */
1042 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1043 VATTR_INIT(&va);
1044 nanotime(&va.va_access_time);
1045 VATTR_SET_ACTIVE(&va, va_access_time);
1046 vnode_setattr(vp, &va, vfs_context_current());
91447636
A
1047 }
1048
2d21ac55
A
1049 if (p->p_flag & P_NOSHLIB) {
1050 /* signal that this process is now using split libraries */
1051 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
91447636
A
1052 }
1053
1054done:
1055 if (vp != NULL) {
1056 /*
1057 * release the vnode...
1058 * ubc_map() still holds it for us in the non-error case
1059 */
1060 (void) vnode_put(vp);
1061 vp = NULL;
1062 }
1063 if (fp != NULL) {
1064 /* release the file descriptor */
1065 fp_drop(p, fd, fp, 0);
1066 fp = NULL;
1067 }
9bccf70c 1068
2d21ac55
A
1069 if (shared_region != NULL) {
1070 vm_shared_region_deallocate(shared_region);
9bccf70c
A
1071 }
1072
2d21ac55
A
1073 SHARED_REGION_TRACE_DEBUG(
1074 ("shared_region: %p [%d(%s)] <- map\n",
1075 current_thread(), p->p_pid, p->p_comm));
9bccf70c 1076
2d21ac55 1077 return error;
9bccf70c
A
1078}
1079
9bccf70c 1080
2d21ac55 1081/* sysctl overflow room */
9bccf70c 1082
2d21ac55
A
1083/* vm_page_free_target is provided as a makeshift solution for applications that want to
1084 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1085 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1086extern unsigned int vm_page_free_target;
1087SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1088 &vm_page_free_target, 0, "Pageout daemon free target");
9bccf70c 1089