bsd/vm/vm_unix.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1987 Carnegie-Mellon University
  31  * All rights reserved.  The CMU software License Agreement specifies
  32  * the terms and conditions for use and redistribution.
  33  */
  34 /*
  35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  36  * support for mandatory and extensible security protections.  This notice
  37  * is included in support of clause 2.2 (b) of the Apple Public License,
  38  * Version 2.0.
  39  */
  40
  41 #include <meta_features.h>
  42
  43 #include <kern/task.h>
  44 #include <kern/thread.h>
  45 #include <kern/debug.h>
  46 #include <kern/lock.h>
  47 #include <mach/mach_traps.h>
  48 #include <mach/port.h>
  49 #include <mach/task.h>
  50 #include <mach/task_access.h>
  51 #include <mach/task_special_ports.h>
  52 #include <mach/time_value.h>
  53 #include <mach/vm_map.h>
  54 #include <mach/vm_param.h>
  55 #include <mach/vm_prot.h>
  56
  57 #include <sys/file_internal.h>
  58 #include <sys/param.h>
  59 #include <sys/systm.h>
  60 #include <sys/dir.h>
  61 #include <sys/namei.h>
  62 #include <sys/proc_internal.h>
  63 #include <sys/kauth.h>
  64 #include <sys/vm.h>
  65 #include <sys/file.h>
  66 #include <sys/vnode_internal.h>
  67 #include <sys/mount.h>
  68 #include <sys/trace.h>
  69 #include <sys/kernel.h>
  70 #include <sys/ubc_internal.h>
  71 #include <sys/user.h>
  72 #include <sys/syslog.h>
  73 #include <sys/stat.h>
  74 #include <sys/sysproto.h>
  75 #include <sys/mman.h>
  76 #include <sys/sysctl.h>
  77
  78 #include <security/audit/audit.h>
  79 #include <bsm/audit_kevents.h>
  80
  81 #include <kern/kalloc.h>
  82 #include <vm/vm_map.h>
  83 #include <vm/vm_kern.h>
  84 #include <vm/vm_pageout.h>
  85
  86 #include <machine/spl.h>
  87
  88 #include <mach/shared_region.h>
  89 #include <vm/vm_shared_region.h>
  90
  91 #include <vm/vm_protos.h>
  92
  93 /*
  94  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
  95  */
  96
  97 #ifndef SECURE_KERNEL
  98 extern int allow_stack_exec, allow_data_exec;
  99
 100 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
 101 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
 102 #endif /* !SECURE_KERNEL */
 103
 104 static const char *prot_values[] = {
 105         "none",
 106         "read-only",
 107         "write-only",
 108         "read-write",
 109         "execute-only",
 110         "read-execute",
 111         "write-execute",
 112         "read-write-execute"
 113 };
 114
 115 void
 116 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
 117 {
 118         printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
 119                 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
 120 }
 121
 122 int shared_region_unnest_logging = 1;
 123
 124 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW,
 125            &shared_region_unnest_logging, 0, "");
 126
 127 int vm_shared_region_unnest_log_interval = 10;
 128 int shared_region_unnest_log_count_threshold = 5;
 129
 130 /* These log rate throttling state variables aren't thread safe, but
 131  * are sufficient unto the task.
 132  */
 133 static int64_t last_unnest_log_time = 0;
 134 static int shared_region_unnest_log_count = 0;
 135
 136 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
 137         struct timeval tv;
 138         const char *pcommstr;
 139
 140         if (shared_region_unnest_logging == 0)
 141                 return;
 142
 143         if (shared_region_unnest_logging == 1) {
 144                 microtime(&tv);
 145                 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
 146                         if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
 147                                 return;
 148                 }
 149                 else {
 150                         last_unnest_log_time = tv.tv_sec;
 151                         shared_region_unnest_log_count = 0;
 152                 }
 153         }
 154
 155         pcommstr = current_proc()->p_comm;
 156
 157         printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
 158 }
 159
 160 int
 161 useracc(
 162         user_addr_t     addr,
 163         user_size_t     len,
 164         int     prot)
 165 {
 166         return (vm_map_check_protection(
 167                         current_map(),
 168                         vm_map_trunc_page(addr), vm_map_round_page(addr+len),
 169                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
 170 }
 171
 172 int
 173 vslock(
 174         user_addr_t     addr,
 175         user_size_t     len)
 176 {
 177         kern_return_t kret;
 178         kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
 179                         vm_map_round_page(addr+len),
 180                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
 181
 182         switch (kret) {
 183         case KERN_SUCCESS:
 184                 return (0);
 185         case KERN_INVALID_ADDRESS:
 186         case KERN_NO_SPACE:
 187                 return (ENOMEM);
 188         case KERN_PROTECTION_FAILURE:
 189                 return (EACCES);
 190         default:
 191                 return (EINVAL);
 192         }
 193 }
 194
 195 int
 196 vsunlock(
 197         user_addr_t addr,
 198         user_size_t len,
 199         __unused int dirtied)
 200 {
 201 #if FIXME  /* [ */
 202         pmap_t          pmap;
 203         vm_page_t       pg;
 204         vm_map_offset_t vaddr;
 205         ppnum_t         paddr;
 206 #endif  /* FIXME ] */
 207         kern_return_t kret;
 208
 209 #if FIXME  /* [ */
 210         if (dirtied) {
 211                 pmap = get_task_pmap(current_task());
 212                 for (vaddr = vm_map_trunc_page(addr);
 213                      vaddr < vm_map_round_page(addr+len);
 214                                 vaddr += PAGE_SIZE) {
 215                         paddr = pmap_extract(pmap, vaddr);
 216                         pg = PHYS_TO_VM_PAGE(paddr);
 217                         vm_page_set_modified(pg);
 218                 }
 219         }
 220 #endif  /* FIXME ] */
 221 #ifdef  lint
 222         dirtied++;
 223 #endif  /* lint */
 224         kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
 225                                 vm_map_round_page(addr+len), FALSE);
 226         switch (kret) {
 227         case KERN_SUCCESS:
 228                 return (0);
 229         case KERN_INVALID_ADDRESS:
 230         case KERN_NO_SPACE:
 231                 return (ENOMEM);
 232         case KERN_PROTECTION_FAILURE:
 233                 return (EACCES);
 234         default:
 235                 return (EINVAL);
 236         }
 237 }
 238
 239 int
 240 subyte(
 241         user_addr_t addr,
 242         int byte)
 243 {
 244         char character;
 245
 246         character = (char)byte;
 247         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 248 }
 249
 250 int
 251 suibyte(
 252         user_addr_t addr,
 253         int byte)
 254 {
 255         char character;
 256
 257         character = (char)byte;
 258         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 259 }
 260
 261 int fubyte(user_addr_t addr)
 262 {
 263         unsigned char byte;
 264
 265         if (copyin(addr, (void *) &byte, sizeof(char)))
 266                 return(-1);
 267         return(byte);
 268 }
 269
 270 int fuibyte(user_addr_t addr)
 271 {
 272         unsigned char byte;
 273
 274         if (copyin(addr, (void *) &(byte), sizeof(char)))
 275                 return(-1);
 276         return(byte);
 277 }
 278
 279 int
 280 suword(
 281         user_addr_t addr,
 282         long word)
 283 {
 284         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 285 }
 286
 287 long fuword(user_addr_t addr)
 288 {
 289         long word = 0;
 290
 291         if (copyin(addr, (void *) &word, sizeof(int)))
 292                 return(-1);
 293         return(word);
 294 }
 295
 296 /* suiword and fuiword are the same as suword and fuword, respectively */
 297
 298 int
 299 suiword(
 300         user_addr_t addr,
 301         long word)
 302 {
 303         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 304 }
 305
 306 long fuiword(user_addr_t addr)
 307 {
 308         long word = 0;
 309
 310         if (copyin(addr, (void *) &word, sizeof(int)))
 311                 return(-1);
 312         return(word);
 313 }
 314
 315 /*
 316  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
 317  * fetching and setting of process-sized size_t and pointer values.
 318  */
 319 int
 320 sulong(user_addr_t addr, int64_t word)
 321 {
 322
 323         if (IS_64BIT_PROCESS(current_proc())) {
 324                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
 325         } else {
 326                 return(suiword(addr, (long)word));
 327         }
 328 }
 329
 330 int64_t
 331 fulong(user_addr_t addr)
 332 {
 333         int64_t longword;
 334
 335         if (IS_64BIT_PROCESS(current_proc())) {
 336                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
 337                         return(-1);
 338                 return(longword);
 339         } else {
 340                 return((int64_t)fuiword(addr));
 341         }
 342 }
 343
 344 int
 345 suulong(user_addr_t addr, uint64_t uword)
 346 {
 347
 348         if (IS_64BIT_PROCESS(current_proc())) {
 349                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
 350         } else {
 351                 return(suiword(addr, (uint32_t)uword));
 352         }
 353 }
 354
 355 uint64_t
 356 fuulong(user_addr_t addr)
 357 {
 358         uint64_t ulongword;
 359
 360         if (IS_64BIT_PROCESS(current_proc())) {
 361                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
 362                         return(-1ULL);
 363                 return(ulongword);
 364         } else {
 365                 return((uint64_t)fuiword(addr));
 366         }
 367 }
 368
 369 int
 370 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
 371 {
 372         return(ENOTSUP);
 373 }
 374
 375 /*
 376  * pid_for_task
 377  *
 378  * Find the BSD process ID for the Mach task associated with the given Mach port
 379  * name
 380  *
 381  * Parameters:  args            User argument descriptor (see below)
 382  *
 383  * Indirect parameters: args->t         Mach port name
 384  *                      args->pid       Process ID (returned value; see below)
 385  *
 386  * Returns:     KERL_SUCCESS    Success
 387  *              KERN_FAILURE    Not success
 388  *
 389  * Implicit returns: args->pid          Process ID
 390  *
 391  */
 392 kern_return_t
 393 pid_for_task(
 394         struct pid_for_task_args *args)
 395 {
 396         mach_port_name_t        t = args->t;
 397         user_addr_t             pid_addr  = args->pid;
 398         proc_t p;
 399         task_t          t1;
 400         int     pid = -1;
 401         kern_return_t   err = KERN_SUCCESS;
 402
 403         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
 404         AUDIT_ARG(mach_port1, t);
 405
 406         t1 = port_name_to_task(t);
 407
 408         if (t1 == TASK_NULL) {
 409                 err = KERN_FAILURE;
 410                 goto pftout;
 411         } else {
 412                 p = get_bsdtask_info(t1);
 413                 if (p) {
 414                         pid  = proc_pid(p);
 415                         err = KERN_SUCCESS;
 416                 } else {
 417                         err = KERN_FAILURE;
 418                 }
 419         }
 420         task_deallocate(t1);
 421 pftout:
 422         AUDIT_ARG(pid, pid);
 423         (void) copyout((char *) &pid, pid_addr, sizeof(int));
 424         AUDIT_MACH_SYSCALL_EXIT(err);
 425         return(err);
 426 }
 427
 428 /*
 429  *
 430  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
 431  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
 432  *
 433  */
 434 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
 435
 436 /*
 437  *      Routine:        task_for_pid_posix_check
 438  *      Purpose:
 439  *                      Verify that the current process should be allowed to
 440  *                      get the target process's task port. This is only
 441  *                      permitted if:
 442  *                      - The current process is root
 443  *                      OR all of the following are true:
 444  *                      - The target process's real, effective, and saved uids
 445  *                        are the same as the current proc's euid,
 446  *                      - The target process's group set is a subset of the
 447  *                        calling process's group set, and
 448  *                      - The target process hasn't switched credentials.
 449  *
 450  *      Returns:        TRUE: permitted
 451  *                      FALSE: denied
 452  */
 453 static int
 454 task_for_pid_posix_check(proc_t target)
 455 {
 456         kauth_cred_t targetcred, mycred;
 457         uid_t myuid;
 458         int allowed;
 459
 460         /* No task_for_pid on bad targets */
 461         if (target == PROC_NULL || target->p_stat == SZOMB) {
 462                 return FALSE;
 463         }
 464
 465         mycred = kauth_cred_get();
 466         myuid = kauth_cred_getuid(mycred);
 467
 468         /* If we're running as root, the check passes */
 469         if (kauth_cred_issuser(mycred))
 470                 return TRUE;
 471
 472         /* We're allowed to get our own task port */
 473         if (target == current_proc())
 474                 return TRUE;
 475
 476         /*
 477          * Under DENY, only root can get another proc's task port,
 478          * so no more checks are needed.
 479          */
 480         if (tfp_policy == KERN_TFP_POLICY_DENY) {
 481                 return FALSE;
 482         }
 483
 484         targetcred = kauth_cred_proc_ref(target);
 485         allowed = TRUE;
 486
 487         /* Do target's ruid, euid, and saved uid match my euid? */
 488         if ((kauth_cred_getuid(targetcred) != myuid) ||
 489                         (targetcred->cr_ruid != myuid) ||
 490                         (targetcred->cr_svuid != myuid)) {
 491                 allowed = FALSE;
 492                 goto out;
 493         }
 494
 495         /* Are target's groups a subset of my groups? */
 496         if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
 497                         allowed == 0) {
 498                 allowed = FALSE;
 499                 goto out;
 500         }
 501
 502         /* Has target switched credentials? */
 503         if (target->p_flag & P_SUGID) {
 504                 allowed = FALSE;
 505                 goto out;
 506         }
 507
 508 out:
 509         kauth_cred_unref(&targetcred);
 510         return allowed;
 511 }
 512
 513 /*
 514  *      Routine:        task_for_pid
 515  *      Purpose:
 516  *              Get the task port for another "process", named by its
 517  *              process ID on the same host as "target_task".
 518  *
 519  *              Only permitted to privileged processes, or processes
 520  *              with the same user ID.
 521  *
 522  *              Note: if pid == 0, an error is return no matter who is calling.
 523  *
 524  * XXX This should be a BSD system call, not a Mach trap!!!
 525  */
 526 kern_return_t
 527 task_for_pid(
 528         struct task_for_pid_args *args)
 529 {
 530         mach_port_name_t        target_tport = args->target_tport;
 531         int                     pid = args->pid;
 532         user_addr_t             task_addr = args->t;
 533         proc_t                  p = PROC_NULL;
 534         task_t                  t1 = TASK_NULL;
 535         mach_port_name_t        tret = MACH_PORT_NULL;
 536         ipc_port_t              tfpport;
 537         void * sright;
 538         int error = 0;
 539
 540         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
 541         AUDIT_ARG(pid, pid);
 542         AUDIT_ARG(mach_port1, target_tport);
 543
 544         /* Always check if pid == 0 */
 545         if (pid == 0) {
 546                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 547                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 548                 return(KERN_FAILURE);
 549         }
 550
 551         t1 = port_name_to_task(target_tport);
 552         if (t1 == TASK_NULL) {
 553                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 554                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 555                 return(KERN_FAILURE);
 556         }
 557
 558
 559         p = proc_find(pid);
 560 #if CONFIG_AUDIT
 561         if (p != PROC_NULL)
 562                 AUDIT_ARG(process, p);
 563 #endif
 564
 565         if (!(task_for_pid_posix_check(p))) {
 566                 error = KERN_FAILURE;
 567                 goto tfpout;
 568         }
 569
 570         if (p->task != TASK_NULL) {
 571                 /* If we aren't root and target's task access port is set... */
 572                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 573                         p != current_proc() &&
 574                         (task_get_task_access_port(p->task, &tfpport) == 0) &&
 575                         (tfpport != IPC_PORT_NULL)) {
 576
 577                         if (tfpport == IPC_PORT_DEAD) {
 578                                 error = KERN_PROTECTION_FAILURE;
 579                                 goto tfpout;
 580                         }
 581
 582                         /* Call up to the task access server */
 583                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 584
 585                         if (error != MACH_MSG_SUCCESS) {
 586                                 if (error == MACH_RCV_INTERRUPTED)
 587                                         error = KERN_ABORTED;
 588                                 else
 589                                         error = KERN_FAILURE;
 590                                 goto tfpout;
 591                         }
 592                 }
 593 #if CONFIG_MACF
 594                 error = mac_proc_check_get_task(kauth_cred_get(), p);
 595                 if (error) {
 596                         error = KERN_FAILURE;
 597                         goto tfpout;
 598                 }
 599 #endif
 600
 601                 /* Grant task port access */
 602                 task_reference(p->task);
 603                 sright = (void *) convert_task_to_port(p->task);
 604                 tret = ipc_port_copyout_send(
 605                                 sright,
 606                                 get_task_ipcspace(current_task()));
 607         }
 608         error = KERN_SUCCESS;
 609
 610 tfpout:
 611         task_deallocate(t1);
 612         AUDIT_ARG(mach_port2, tret);
 613         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 614         if (p != PROC_NULL)
 615                 proc_rele(p);
 616         AUDIT_MACH_SYSCALL_EXIT(error);
 617         return(error);
 618 }
 619
 620 /*
 621  *      Routine:        task_name_for_pid
 622  *      Purpose:
 623  *              Get the task name port for another "process", named by its
 624  *              process ID on the same host as "target_task".
 625  *
 626  *              Only permitted to privileged processes, or processes
 627  *              with the same user ID.
 628  *
 629  * XXX This should be a BSD system call, not a Mach trap!!!
 630  */
 631
 632 kern_return_t
 633 task_name_for_pid(
 634         struct task_name_for_pid_args *args)
 635 {
 636         mach_port_name_t        target_tport = args->target_tport;
 637         int                     pid = args->pid;
 638         user_addr_t             task_addr = args->t;
 639         proc_t          p = PROC_NULL;
 640         task_t          t1;
 641         mach_port_name_t        tret;
 642         void * sright;
 643         int error = 0, refheld = 0;
 644         kauth_cred_t target_cred;
 645
 646         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
 647         AUDIT_ARG(pid, pid);
 648         AUDIT_ARG(mach_port1, target_tport);
 649
 650         t1 = port_name_to_task(target_tport);
 651         if (t1 == TASK_NULL) {
 652                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 653                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 654                 return(KERN_FAILURE);
 655         }
 656
 657         p = proc_find(pid);
 658         if (p != PROC_NULL) {
 659                 AUDIT_ARG(process, p);
 660                 target_cred = kauth_cred_proc_ref(p);
 661                 refheld = 1;
 662
 663                 if ((p->p_stat != SZOMB)
 664                     && ((current_proc() == p)
 665                         || kauth_cred_issuser(kauth_cred_get())
 666                         || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
 667                             ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
 668
 669                         if (p->task != TASK_NULL) {
 670                                 task_reference(p->task);
 671 #if CONFIG_MACF
 672                                 error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
 673                                 if (error) {
 674                                         task_deallocate(p->task);
 675                                         goto noperm;
 676                                 }
 677 #endif
 678                                 sright = (void *)convert_task_name_to_port(p->task);
 679                                 tret = ipc_port_copyout_send(sright,
 680                                                 get_task_ipcspace(current_task()));
 681                         } else
 682                                 tret  = MACH_PORT_NULL;
 683
 684                         AUDIT_ARG(mach_port2, tret);
 685                         (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 686                         task_deallocate(t1);
 687                         error = KERN_SUCCESS;
 688                         goto tnfpout;
 689                 }
 690         }
 691
 692 #if CONFIG_MACF
 693 noperm:
 694 #endif
 695     task_deallocate(t1);
 696         tret = MACH_PORT_NULL;
 697         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 698         error = KERN_FAILURE;
 699 tnfpout:
 700         if (refheld != 0)
 701                 kauth_cred_unref(&target_cred);
 702         if (p != PROC_NULL)
 703                 proc_rele(p);
 704         AUDIT_MACH_SYSCALL_EXIT(error);
 705         return(error);
 706 }
 707
 708 static int
 709 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
 710     __unused int arg2, struct sysctl_req *req)
 711 {
 712     int error = 0;
 713         int new_value;
 714
 715     error = SYSCTL_OUT(req, arg1, sizeof(int));
 716     if (error || req->newptr == USER_ADDR_NULL)
 717         return(error);
 718
 719         if (!is_suser())
 720                 return(EPERM);
 721
 722         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 723                 goto out;
 724         }
 725         if ((new_value == KERN_TFP_POLICY_DENY)
 726                 || (new_value == KERN_TFP_POLICY_DEFAULT))
 727                         tfp_policy = new_value;
 728         else
 729                         error = EINVAL;
 730 out:
 731     return(error);
 732
 733 }
 734
 735 #if defined(SECURE_KERNEL)
 736 static int kern_secure_kernel = 1;
 737 #else
 738 static int kern_secure_kernel = 0;
 739 #endif
 740
 741 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
 742
 743 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
 744 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
 745     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
 746
 747 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
 748            &shared_region_trace_level, 0, "");
 749 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
 750            &shared_region_version, 0, "");
 751 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
 752            &shared_region_persistence, 0, "");
 753
 754 /*
 755  * shared_region_check_np:
 756  *
 757  * This system call is intended for dyld.
 758  *
 759  * dyld calls this when any process starts to see if the process's shared
 760  * region is already set up and ready to use.
 761  * This call returns the base address of the first mapping in the
 762  * process's shared region's first mapping.
 763  * dyld will then check what's mapped at that address.
 764  *
 765  * If the shared region is empty, dyld will then attempt to map the shared
 766  * cache file in the shared region via the shared_region_map_np() system call.
 767  *
 768  * If something's already mapped in the shared region, dyld will check if it
 769  * matches the shared cache it would like to use for that process.
 770  * If it matches, evrything's ready and the process can proceed and use the
 771  * shared region.
 772  * If it doesn't match, dyld will unmap the shared region and map the shared
 773  * cache into the process's address space via mmap().
 774  *
 775  * ERROR VALUES
 776  * EINVAL       no shared region
 777  * ENOMEM       shared region is empty
 778  * EFAULT       bad address for "start_address"
 779  */
 780 int
 781 shared_region_check_np(
 782         __unused struct proc                    *p,
 783         struct shared_region_check_np_args      *uap,
 784         __unused int                            *retvalp)
 785 {
 786         vm_shared_region_t      shared_region;
 787         mach_vm_offset_t        start_address;
 788         int                     error;
 789         kern_return_t           kr;
 790
 791         SHARED_REGION_TRACE_DEBUG(
 792                 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
 793                  current_thread(), p->p_pid, p->p_comm,
 794                  (uint64_t)uap->start_address));
 795
 796         /* retrieve the current tasks's shared region */
 797         shared_region = vm_shared_region_get(current_task());
 798         if (shared_region != NULL) {
 799                 /* retrieve address of its first mapping... */
 800                 kr = vm_shared_region_start_address(shared_region,
 801                                                     &start_address);
 802                 if (kr != KERN_SUCCESS) {
 803                         error = ENOMEM;
 804                 } else {
 805                         /* ... and give it to the caller */
 806                         error = copyout(&start_address,
 807                                         (user_addr_t) uap->start_address,
 808                                         sizeof (start_address));
 809                         if (error) {
 810                                 SHARED_REGION_TRACE_ERROR(
 811                                         ("shared_region: %p [%d(%s)] "
 812                                          "check_np(0x%llx) "
 813                                          "copyout(0x%llx) error %d\n",
 814                                          current_thread(), p->p_pid, p->p_comm,
 815                                          (uint64_t)uap->start_address, (uint64_t)start_address,
 816                                          error));
 817                         }
 818                 }
 819                 vm_shared_region_deallocate(shared_region);
 820         } else {
 821                 /* no shared region ! */
 822                 error = EINVAL;
 823         }
 824
 825         SHARED_REGION_TRACE_DEBUG(
 826                 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
 827                  current_thread(), p->p_pid, p->p_comm,
 828                  (uint64_t)uap->start_address, (uint64_t)start_address, error));
 829
 830         return error;
 831 }
 832
 833 /*
 834  * shared_region_map_np()
 835  *
 836  * This system call is intended for dyld.
 837  *
 838  * dyld uses this to map a shared cache file into a shared region.
 839  * This is usually done only the first time a shared cache is needed.
 840  * Subsequent processes will just use the populated shared region without
 841  * requiring any further setup.
 842  */
 843 int
 844 shared_region_map_np(
 845         struct proc                             *p,
 846         struct shared_region_map_np_args        *uap,
 847         __unused int                            *retvalp)
 848 {
 849         int                             error;
 850         kern_return_t                   kr;
 851         int                             fd;
 852         struct fileproc                 *fp;
 853         struct vnode                    *vp, *root_vp;
 854         struct vnode_attr               va;
 855         off_t                           fs;
 856         memory_object_size_t            file_size;
 857         user_addr_t                     user_mappings;
 858         struct shared_file_mapping_np   *mappings;
 859 #define SFM_MAX_STACK   8
 860         struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
 861         unsigned int                    mappings_count;
 862         vm_size_t                       mappings_size;
 863         memory_object_control_t         file_control;
 864         struct vm_shared_region         *shared_region;
 865
 866         SHARED_REGION_TRACE_DEBUG(
 867                 ("shared_region: %p [%d(%s)] -> map\n",
 868                  current_thread(), p->p_pid, p->p_comm));
 869
 870         shared_region = NULL;
 871         mappings_count = 0;
 872         mappings_size = 0;
 873         mappings = NULL;
 874         fp = NULL;
 875         vp = NULL;
 876
 877         /* get file descriptor for shared region cache file */
 878         fd = uap->fd;
 879
 880         /* get file structure from file descriptor */
 881         error = fp_lookup(p, fd, &fp, 0);
 882         if (error) {
 883                 SHARED_REGION_TRACE_ERROR(
 884                         ("shared_region: %p [%d(%s)] map: "
 885                          "fd=%d lookup failed (error=%d)\n",
 886                          current_thread(), p->p_pid, p->p_comm, fd, error));
 887                 goto done;
 888         }
 889
 890         /* make sure we're attempting to map a vnode */
 891         if (fp->f_fglob->fg_type != DTYPE_VNODE) {
 892                 SHARED_REGION_TRACE_ERROR(
 893                         ("shared_region: %p [%d(%s)] map: "
 894                          "fd=%d not a vnode (type=%d)\n",
 895                          current_thread(), p->p_pid, p->p_comm,
 896                          fd, fp->f_fglob->fg_type));
 897                 error = EINVAL;
 898                 goto done;
 899         }
 900
 901         /* we need at least read permission on the file */
 902         if (! (fp->f_fglob->fg_flag & FREAD)) {
 903                 SHARED_REGION_TRACE_ERROR(
 904                         ("shared_region: %p [%d(%s)] map: "
 905                          "fd=%d not readable\n",
 906                          current_thread(), p->p_pid, p->p_comm, fd));
 907                 error = EPERM;
 908                 goto done;
 909         }
 910
 911         /* get vnode from file structure */
 912         error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
 913         if (error) {
 914                 SHARED_REGION_TRACE_ERROR(
 915                         ("shared_region: %p [%d(%s)] map: "
 916                          "fd=%d getwithref failed (error=%d)\n",
 917                          current_thread(), p->p_pid, p->p_comm, fd, error));
 918                 goto done;
 919         }
 920         vp = (struct vnode *) fp->f_fglob->fg_data;
 921
 922         /* make sure the vnode is a regular file */
 923         if (vp->v_type != VREG) {
 924                 SHARED_REGION_TRACE_ERROR(
 925                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 926                          "not a file (type=%d)\n",
 927                          current_thread(), p->p_pid, p->p_comm,
 928                          vp, vp->v_name, vp->v_type));
 929                 error = EINVAL;
 930                 goto done;
 931         }
 932
 933         /* make sure vnode is on the process's root volume */
 934         root_vp = p->p_fd->fd_rdir;
 935         if (root_vp == NULL) {
 936                 root_vp = rootvnode;
 937         }
 938         if (vp->v_mount != root_vp->v_mount) {
 939                 SHARED_REGION_TRACE_ERROR(
 940                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 941                          "not on process's root volume\n",
 942                          current_thread(), p->p_pid, p->p_comm,
 943                          vp, vp->v_name));
 944                 error = EPERM;
 945                 goto done;
 946         }
 947
 948         /* make sure vnode is owned by "root" */
 949         VATTR_INIT(&va);
 950         VATTR_WANTED(&va, va_uid);
 951         error = vnode_getattr(vp, &va, vfs_context_current());
 952         if (error) {
 953                 SHARED_REGION_TRACE_ERROR(
 954                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 955                          "vnode_getattr(%p) failed (error=%d)\n",
 956                          current_thread(), p->p_pid, p->p_comm,
 957                          vp, vp->v_name, vp, error));
 958                 goto done;
 959         }
 960         if (va.va_uid != 0) {
 961                 SHARED_REGION_TRACE_ERROR(
 962                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 963                          "owned by uid=%d instead of 0\n",
 964                          current_thread(), p->p_pid, p->p_comm,
 965                          vp, vp->v_name, va.va_uid));
 966                 error = EPERM;
 967                 goto done;
 968         }
 969
 970         /* get vnode size */
 971         error = vnode_size(vp, &fs, vfs_context_current());
 972         if (error) {
 973                 SHARED_REGION_TRACE_ERROR(
 974                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 975                          "vnode_size(%p) failed (error=%d)\n",
 976                          current_thread(), p->p_pid, p->p_comm,
 977                          vp, vp->v_name, vp, error));
 978                 goto done;
 979         }
 980         file_size = fs;
 981
 982         /* get the file's memory object handle */
 983         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
 984         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
 985                 SHARED_REGION_TRACE_ERROR(
 986                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 987                          "no memory object\n",
 988                          current_thread(), p->p_pid, p->p_comm,
 989                          vp, vp->v_name));
 990                 error = EINVAL;
 991                 goto done;
 992         }
 993
 994         /* get the list of mappings the caller wants us to establish */
 995         mappings_count = uap->count;    /* number of mappings */
 996         mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
 997         if (mappings_count == 0) {
 998                 SHARED_REGION_TRACE_INFO(
 999                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1000                          "no mappings\n",
1001                          current_thread(), p->p_pid, p->p_comm,
1002                          vp, vp->v_name));
1003                 error = 0;      /* no mappings: we're done ! */
1004                 goto done;
1005         } else if (mappings_count <= SFM_MAX_STACK) {
1006                 mappings = &stack_mappings[0];
1007         } else {
1008                 SHARED_REGION_TRACE_ERROR(
1009                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1010                          "too many mappings (%d)\n",
1011                          current_thread(), p->p_pid, p->p_comm,
1012                          vp, vp->v_name, mappings_count));
1013                 error = EINVAL;
1014                 goto done;
1015         }
1016
1017         user_mappings = uap->mappings;  /* the mappings, in user space */
1018         error = copyin(user_mappings,
1019                        mappings,
1020                        mappings_size);
1021         if (error) {
1022                 SHARED_REGION_TRACE_ERROR(
1023                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1024                          "copyin(0x%llx, %d) failed (error=%d)\n",
1025                          current_thread(), p->p_pid, p->p_comm,
1026                          vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
1027                 goto done;
1028         }
1029
1030         /* get the process's shared region (setup in vm_map_exec()) */
1031         shared_region = vm_shared_region_get(current_task());
1032         if (shared_region == NULL) {
1033                 SHARED_REGION_TRACE_ERROR(
1034                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1035                          "no shared region\n",
1036                          current_thread(), p->p_pid, p->p_comm,
1037                          vp, vp->v_name));
1038                 goto done;
1039         }
1040
1041         /* map the file into that shared region's submap */
1042         kr = vm_shared_region_map_file(shared_region,
1043                                        mappings_count,
1044                                        mappings,
1045                                        file_control,
1046                                        file_size,
1047                                        (void *) p->p_fd->fd_rdir);
1048         if (kr != KERN_SUCCESS) {
1049                 SHARED_REGION_TRACE_ERROR(
1050                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1051                          "vm_shared_region_map_file() failed kr=0x%x\n",
1052                          current_thread(), p->p_pid, p->p_comm,
1053                          vp, vp->v_name, kr));
1054                 switch (kr) {
1055                 case KERN_INVALID_ADDRESS:
1056                         error = EFAULT;
1057                         break;
1058                 case KERN_PROTECTION_FAILURE:
1059                         error = EPERM;
1060                         break;
1061                 case KERN_NO_SPACE:
1062                         error = ENOMEM;
1063                         break;
1064                 case KERN_FAILURE:
1065                 case KERN_INVALID_ARGUMENT:
1066                 default:
1067                         error = EINVAL;
1068                         break;
1069                 }
1070                 goto done;
1071         }
1072
1073         error = 0;
1074
1075         /* update the vnode's access time */
1076         if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1077                 VATTR_INIT(&va);
1078                 nanotime(&va.va_access_time);
1079                 VATTR_SET_ACTIVE(&va, va_access_time);
1080                 vnode_setattr(vp, &va, vfs_context_current());
1081         }
1082
1083         if (p->p_flag & P_NOSHLIB) {
1084                 /* signal that this process is now using split libraries */
1085                 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1086         }
1087
1088 done:
1089         if (vp != NULL) {
1090                 /*
1091                  * release the vnode...
1092                  * ubc_map() still holds it for us in the non-error case
1093                  */
1094                 (void) vnode_put(vp);
1095                 vp = NULL;
1096         }
1097         if (fp != NULL) {
1098                 /* release the file descriptor */
1099                 fp_drop(p, fd, fp, 0);
1100                 fp = NULL;
1101         }
1102
1103         if (shared_region != NULL) {
1104                 vm_shared_region_deallocate(shared_region);
1105         }
1106
1107         SHARED_REGION_TRACE_DEBUG(
1108                 ("shared_region: %p [%d(%s)] <- map\n",
1109                  current_thread(), p->p_pid, p->p_comm));
1110
1111         return error;
1112 }
1113
1114
1115 /* sysctl overflow room */
1116
1117 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1118         allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1119         reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1120 extern unsigned int     vm_page_free_target;
1121 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1122                    &vm_page_free_target, 0, "Pageout daemon free target");
1123
1124 extern unsigned int     vm_memory_pressure;
1125 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD,
1126            &vm_memory_pressure, 0, "Memory pressure indicator");
1127
1128 static int
1129 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1130 {
1131 #pragma unused(oidp, arg1, arg2)
1132         unsigned int page_free_wanted;
1133
1134         page_free_wanted = mach_vm_ctl_page_free_wanted();
1135         return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1136 }
1137 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1138             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1139             0, 0, vm_ctl_page_free_wanted, "I", "");
1140
1141 extern unsigned int     vm_page_purgeable_count;
1142 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD,
1143            &vm_page_purgeable_count, 0, "Purgeable page count");
1144
1145 extern unsigned int     vm_page_purgeable_wired_count;
1146 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD,
1147            &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1148
1149 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD,
1150            &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1151 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD,
1152            &vm_page_stats_reusable.reusable_pages_success, "");
1153 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD,
1154            &vm_page_stats_reusable.reusable_pages_failure, "");
1155 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD,
1156            &vm_page_stats_reusable.reusable_pages_shared, "");
1157 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD,
1158            &vm_page_stats_reusable.all_reusable_calls, "");
1159 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD,
1160            &vm_page_stats_reusable.partial_reusable_calls, "");
1161 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD,
1162            &vm_page_stats_reusable.reuse_pages_success, "");
1163 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD,
1164            &vm_page_stats_reusable.reuse_pages_failure, "");
1165 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD,
1166            &vm_page_stats_reusable.all_reuse_calls, "");
1167 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD,
1168            &vm_page_stats_reusable.partial_reuse_calls, "");
1169 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD,
1170            &vm_page_stats_reusable.can_reuse_success, "");
1171 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD,
1172            &vm_page_stats_reusable.can_reuse_failure, "");
1173
1174
1175 int
1176 vm_pressure_monitor(
1177         __unused struct proc *p,
1178         struct vm_pressure_monitor_args *uap,
1179         int *retval)
1180 {
1181         kern_return_t   kr;
1182         uint32_t        pages_reclaimed;
1183         uint32_t        pages_wanted;
1184
1185         kr = mach_vm_pressure_monitor(
1186                 (boolean_t) uap->wait_for_pressure,
1187                 uap->nsecs_monitored,
1188                 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1189                 &pages_wanted);
1190
1191         switch (kr) {
1192         case KERN_SUCCESS:
1193                 break;
1194         case KERN_ABORTED:
1195                 return EINTR;
1196         default:
1197                 return EINVAL;
1198         }
1199
1200         if (uap->pages_reclaimed) {
1201                 if (copyout((void *)&pages_reclaimed,
1202                             uap->pages_reclaimed,
1203                             sizeof (pages_reclaimed)) != 0) {
1204                         return EFAULT;
1205                 }
1206         }
1207
1208         *retval = (int) pages_wanted;
1209         return 0;
1210 }