bsd/vm/vm_unix.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1987 Carnegie-Mellon University
  31  * All rights reserved.  The CMU software License Agreement specifies
  32  * the terms and conditions for use and redistribution.
  33  */
  34 /*
  35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  36  * support for mandatory and extensible security protections.  This notice
  37  * is included in support of clause 2.2 (b) of the Apple Public License,
  38  * Version 2.0.
  39  */
  40
  41 #include <meta_features.h>
  42
  43 #include <kern/task.h>
  44 #include <kern/thread.h>
  45 #include <kern/debug.h>
  46 #include <kern/lock.h>
  47 #include <kern/extmod_statistics.h>
  48 #include <mach/mach_traps.h>
  49 #include <mach/port.h>
  50 #include <mach/task.h>
  51 #include <mach/task_access.h>
  52 #include <mach/task_special_ports.h>
  53 #include <mach/time_value.h>
  54 #include <mach/vm_map.h>
  55 #include <mach/vm_param.h>
  56 #include <mach/vm_prot.h>
  57
  58 #include <sys/file_internal.h>
  59 #include <sys/param.h>
  60 #include <sys/systm.h>
  61 #include <sys/dir.h>
  62 #include <sys/namei.h>
  63 #include <sys/proc_internal.h>
  64 #include <sys/kauth.h>
  65 #include <sys/vm.h>
  66 #include <sys/file.h>
  67 #include <sys/vnode_internal.h>
  68 #include <sys/mount.h>
  69 #include <sys/trace.h>
  70 #include <sys/kernel.h>
  71 #include <sys/ubc_internal.h>
  72 #include <sys/user.h>
  73 #include <sys/syslog.h>
  74 #include <sys/stat.h>
  75 #include <sys/sysproto.h>
  76 #include <sys/mman.h>
  77 #include <sys/sysctl.h>
  78 #include <sys/cprotect.h>
  79 #include <sys/kpi_socket.h>
  80 #include <sys/kas_info.h>
  81
  82 #include <security/audit/audit.h>
  83 #include <security/mac.h>
  84 #include <bsm/audit_kevents.h>
  85
  86 #include <kern/kalloc.h>
  87 #include <vm/vm_map.h>
  88 #include <vm/vm_kern.h>
  89 #include <vm/vm_pageout.h>
  90
  91 #include <machine/spl.h>
  92
  93 #include <mach/shared_region.h>
  94 #include <vm/vm_shared_region.h>
  95
  96 #include <vm/vm_protos.h>
  97
  98 #include <sys/kern_memorystatus.h>
  99
 100
 101 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
 102 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
 103
 104 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
 105
 106
 107 /*
 108  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
 109  */
 110
 111 #ifndef SECURE_KERNEL
 112 extern int allow_stack_exec, allow_data_exec;
 113
 114 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
 115 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
 116 #endif /* !SECURE_KERNEL */
 117
 118 static const char *prot_values[] = {
 119         "none",
 120         "read-only",
 121         "write-only",
 122         "read-write",
 123         "execute-only",
 124         "read-execute",
 125         "write-execute",
 126         "read-write-execute"
 127 };
 128
 129 void
 130 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
 131 {
 132         printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
 133                 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
 134 }
 135
 136 int shared_region_unnest_logging = 1;
 137
 138 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
 139            &shared_region_unnest_logging, 0, "");
 140
 141 int vm_shared_region_unnest_log_interval = 10;
 142 int shared_region_unnest_log_count_threshold = 5;
 143
 144 /* These log rate throttling state variables aren't thread safe, but
 145  * are sufficient unto the task.
 146  */
 147 static int64_t last_unnest_log_time = 0;
 148 static int shared_region_unnest_log_count = 0;
 149
 150 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
 151         struct timeval tv;
 152         const char *pcommstr;
 153
 154         if (shared_region_unnest_logging == 0)
 155                 return;
 156
 157         if (shared_region_unnest_logging == 1) {
 158                 microtime(&tv);
 159                 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
 160                         if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
 161                                 return;
 162                 }
 163                 else {
 164                         last_unnest_log_time = tv.tv_sec;
 165                         shared_region_unnest_log_count = 0;
 166                 }
 167         }
 168
 169         pcommstr = current_proc()->p_comm;
 170
 171         printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
 172 }
 173
 174 int
 175 useracc(
 176         user_addr_t     addr,
 177         user_size_t     len,
 178         int     prot)
 179 {
 180         vm_map_t        map;
 181
 182         map = current_map();
 183         return (vm_map_check_protection(
 184                         map,
 185                         vm_map_trunc_page(addr,
 186                                           vm_map_page_mask(map)),
 187                         vm_map_round_page(addr+len,
 188                                           vm_map_page_mask(map)),
 189                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
 190 }
 191
 192 int
 193 vslock(
 194         user_addr_t     addr,
 195         user_size_t     len)
 196 {
 197         kern_return_t   kret;
 198         vm_map_t        map;
 199
 200         map = current_map();
 201         kret = vm_map_wire(map,
 202                            vm_map_trunc_page(addr,
 203                                              vm_map_page_mask(map)),
 204                            vm_map_round_page(addr+len,
 205                                              vm_map_page_mask(map)),
 206                            VM_PROT_READ | VM_PROT_WRITE,
 207                            FALSE);
 208
 209         switch (kret) {
 210         case KERN_SUCCESS:
 211                 return (0);
 212         case KERN_INVALID_ADDRESS:
 213         case KERN_NO_SPACE:
 214                 return (ENOMEM);
 215         case KERN_PROTECTION_FAILURE:
 216                 return (EACCES);
 217         default:
 218                 return (EINVAL);
 219         }
 220 }
 221
 222 int
 223 vsunlock(
 224         user_addr_t addr,
 225         user_size_t len,
 226         __unused int dirtied)
 227 {
 228 #if FIXME  /* [ */
 229         pmap_t          pmap;
 230         vm_page_t       pg;
 231         vm_map_offset_t vaddr;
 232         ppnum_t         paddr;
 233 #endif  /* FIXME ] */
 234         kern_return_t   kret;
 235         vm_map_t        map;
 236
 237         map = current_map();
 238
 239 #if FIXME  /* [ */
 240         if (dirtied) {
 241                 pmap = get_task_pmap(current_task());
 242                 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
 243                      vaddr < vm_map_round_page(addr+len, PAGE_MASK);
 244                      vaddr += PAGE_SIZE) {
 245                         paddr = pmap_extract(pmap, vaddr);
 246                         pg = PHYS_TO_VM_PAGE(paddr);
 247                         vm_page_set_modified(pg);
 248                 }
 249         }
 250 #endif  /* FIXME ] */
 251 #ifdef  lint
 252         dirtied++;
 253 #endif  /* lint */
 254         kret = vm_map_unwire(map,
 255                              vm_map_trunc_page(addr,
 256                                                vm_map_page_mask(map)),
 257                              vm_map_round_page(addr+len,
 258                                                vm_map_page_mask(map)),
 259                              FALSE);
 260         switch (kret) {
 261         case KERN_SUCCESS:
 262                 return (0);
 263         case KERN_INVALID_ADDRESS:
 264         case KERN_NO_SPACE:
 265                 return (ENOMEM);
 266         case KERN_PROTECTION_FAILURE:
 267                 return (EACCES);
 268         default:
 269                 return (EINVAL);
 270         }
 271 }
 272
 273 int
 274 subyte(
 275         user_addr_t addr,
 276         int byte)
 277 {
 278         char character;
 279
 280         character = (char)byte;
 281         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 282 }
 283
 284 int
 285 suibyte(
 286         user_addr_t addr,
 287         int byte)
 288 {
 289         char character;
 290
 291         character = (char)byte;
 292         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 293 }
 294
 295 int fubyte(user_addr_t addr)
 296 {
 297         unsigned char byte;
 298
 299         if (copyin(addr, (void *) &byte, sizeof(char)))
 300                 return(-1);
 301         return(byte);
 302 }
 303
 304 int fuibyte(user_addr_t addr)
 305 {
 306         unsigned char byte;
 307
 308         if (copyin(addr, (void *) &(byte), sizeof(char)))
 309                 return(-1);
 310         return(byte);
 311 }
 312
 313 int
 314 suword(
 315         user_addr_t addr,
 316         long word)
 317 {
 318         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 319 }
 320
 321 long fuword(user_addr_t addr)
 322 {
 323         long word = 0;
 324
 325         if (copyin(addr, (void *) &word, sizeof(int)))
 326                 return(-1);
 327         return(word);
 328 }
 329
 330 /* suiword and fuiword are the same as suword and fuword, respectively */
 331
 332 int
 333 suiword(
 334         user_addr_t addr,
 335         long word)
 336 {
 337         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 338 }
 339
 340 long fuiword(user_addr_t addr)
 341 {
 342         long word = 0;
 343
 344         if (copyin(addr, (void *) &word, sizeof(int)))
 345                 return(-1);
 346         return(word);
 347 }
 348
 349 /*
 350  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
 351  * fetching and setting of process-sized size_t and pointer values.
 352  */
 353 int
 354 sulong(user_addr_t addr, int64_t word)
 355 {
 356
 357         if (IS_64BIT_PROCESS(current_proc())) {
 358                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
 359         } else {
 360                 return(suiword(addr, (long)word));
 361         }
 362 }
 363
 364 int64_t
 365 fulong(user_addr_t addr)
 366 {
 367         int64_t longword;
 368
 369         if (IS_64BIT_PROCESS(current_proc())) {
 370                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
 371                         return(-1);
 372                 return(longword);
 373         } else {
 374                 return((int64_t)fuiword(addr));
 375         }
 376 }
 377
 378 int
 379 suulong(user_addr_t addr, uint64_t uword)
 380 {
 381
 382         if (IS_64BIT_PROCESS(current_proc())) {
 383                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
 384         } else {
 385                 return(suiword(addr, (uint32_t)uword));
 386         }
 387 }
 388
 389 uint64_t
 390 fuulong(user_addr_t addr)
 391 {
 392         uint64_t ulongword;
 393
 394         if (IS_64BIT_PROCESS(current_proc())) {
 395                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
 396                         return(-1ULL);
 397                 return(ulongword);
 398         } else {
 399                 return((uint64_t)fuiword(addr));
 400         }
 401 }
 402
 403 int
 404 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
 405 {
 406         return(ENOTSUP);
 407 }
 408
 409 /*
 410  * pid_for_task
 411  *
 412  * Find the BSD process ID for the Mach task associated with the given Mach port
 413  * name
 414  *
 415  * Parameters:  args            User argument descriptor (see below)
 416  *
 417  * Indirect parameters: args->t         Mach port name
 418  *                      args->pid       Process ID (returned value; see below)
 419  *
 420  * Returns:     KERL_SUCCESS    Success
 421  *              KERN_FAILURE    Not success
 422  *
 423  * Implicit returns: args->pid          Process ID
 424  *
 425  */
 426 kern_return_t
 427 pid_for_task(
 428         struct pid_for_task_args *args)
 429 {
 430         mach_port_name_t        t = args->t;
 431         user_addr_t             pid_addr  = args->pid;
 432         proc_t p;
 433         task_t          t1;
 434         int     pid = -1;
 435         kern_return_t   err = KERN_SUCCESS;
 436
 437         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
 438         AUDIT_ARG(mach_port1, t);
 439
 440         t1 = port_name_to_task(t);
 441
 442         if (t1 == TASK_NULL) {
 443                 err = KERN_FAILURE;
 444                 goto pftout;
 445         } else {
 446                 p = get_bsdtask_info(t1);
 447                 if (p) {
 448                         pid  = proc_pid(p);
 449                         err = KERN_SUCCESS;
 450                 } else {
 451                         err = KERN_FAILURE;
 452                 }
 453         }
 454         task_deallocate(t1);
 455 pftout:
 456         AUDIT_ARG(pid, pid);
 457         (void) copyout((char *) &pid, pid_addr, sizeof(int));
 458         AUDIT_MACH_SYSCALL_EXIT(err);
 459         return(err);
 460 }
 461
 462 /*
 463  *
 464  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
 465  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
 466  *
 467  */
 468 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
 469
 470 /*
 471  *      Routine:        task_for_pid_posix_check
 472  *      Purpose:
 473  *                      Verify that the current process should be allowed to
 474  *                      get the target process's task port. This is only
 475  *                      permitted if:
 476  *                      - The current process is root
 477  *                      OR all of the following are true:
 478  *                      - The target process's real, effective, and saved uids
 479  *                        are the same as the current proc's euid,
 480  *                      - The target process's group set is a subset of the
 481  *                        calling process's group set, and
 482  *                      - The target process hasn't switched credentials.
 483  *
 484  *      Returns:        TRUE: permitted
 485  *                      FALSE: denied
 486  */
 487 static int
 488 task_for_pid_posix_check(proc_t target)
 489 {
 490         kauth_cred_t targetcred, mycred;
 491         uid_t myuid;
 492         int allowed;
 493
 494         /* No task_for_pid on bad targets */
 495         if (target->p_stat == SZOMB) {
 496                 return FALSE;
 497         }
 498
 499         mycred = kauth_cred_get();
 500         myuid = kauth_cred_getuid(mycred);
 501
 502         /* If we're running as root, the check passes */
 503         if (kauth_cred_issuser(mycred))
 504                 return TRUE;
 505
 506         /* We're allowed to get our own task port */
 507         if (target == current_proc())
 508                 return TRUE;
 509
 510         /*
 511          * Under DENY, only root can get another proc's task port,
 512          * so no more checks are needed.
 513          */
 514         if (tfp_policy == KERN_TFP_POLICY_DENY) {
 515                 return FALSE;
 516         }
 517
 518         targetcred = kauth_cred_proc_ref(target);
 519         allowed = TRUE;
 520
 521         /* Do target's ruid, euid, and saved uid match my euid? */
 522         if ((kauth_cred_getuid(targetcred) != myuid) ||
 523                         (kauth_cred_getruid(targetcred) != myuid) ||
 524                         (kauth_cred_getsvuid(targetcred) != myuid)) {
 525                 allowed = FALSE;
 526                 goto out;
 527         }
 528
 529         /* Are target's groups a subset of my groups? */
 530         if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
 531                         allowed == 0) {
 532                 allowed = FALSE;
 533                 goto out;
 534         }
 535
 536         /* Has target switched credentials? */
 537         if (target->p_flag & P_SUGID) {
 538                 allowed = FALSE;
 539                 goto out;
 540         }
 541
 542 out:
 543         kauth_cred_unref(&targetcred);
 544         return allowed;
 545 }
 546
 547 /*
 548  *      Routine:        task_for_pid
 549  *      Purpose:
 550  *              Get the task port for another "process", named by its
 551  *              process ID on the same host as "target_task".
 552  *
 553  *              Only permitted to privileged processes, or processes
 554  *              with the same user ID.
 555  *
 556  *              Note: if pid == 0, an error is return no matter who is calling.
 557  *
 558  * XXX This should be a BSD system call, not a Mach trap!!!
 559  */
 560 kern_return_t
 561 task_for_pid(
 562         struct task_for_pid_args *args)
 563 {
 564         mach_port_name_t        target_tport = args->target_tport;
 565         int                     pid = args->pid;
 566         user_addr_t             task_addr = args->t;
 567         proc_t                  p = PROC_NULL;
 568         task_t                  t1 = TASK_NULL;
 569         mach_port_name_t        tret = MACH_PORT_NULL;
 570         ipc_port_t              tfpport;
 571         void * sright;
 572         int error = 0;
 573
 574         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
 575         AUDIT_ARG(pid, pid);
 576         AUDIT_ARG(mach_port1, target_tport);
 577
 578         /* Always check if pid == 0 */
 579         if (pid == 0) {
 580                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 581                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 582                 return(KERN_FAILURE);
 583         }
 584
 585         t1 = port_name_to_task(target_tport);
 586         if (t1 == TASK_NULL) {
 587                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 588                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 589                 return(KERN_FAILURE);
 590         }
 591
 592
 593         p = proc_find(pid);
 594         if (p == PROC_NULL) {
 595                 error = KERN_FAILURE;
 596                 goto tfpout;
 597         }
 598
 599 #if CONFIG_AUDIT
 600         AUDIT_ARG(process, p);
 601 #endif
 602
 603         if (!(task_for_pid_posix_check(p))) {
 604                 error = KERN_FAILURE;
 605                 goto tfpout;
 606         }
 607
 608         if (p->task != TASK_NULL) {
 609                 /* If we aren't root and target's task access port is set... */
 610                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 611                         p != current_proc() &&
 612                         (task_get_task_access_port(p->task, &tfpport) == 0) &&
 613                         (tfpport != IPC_PORT_NULL)) {
 614
 615                         if (tfpport == IPC_PORT_DEAD) {
 616                                 error = KERN_PROTECTION_FAILURE;
 617                                 goto tfpout;
 618                         }
 619
 620                         /* Call up to the task access server */
 621                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 622
 623                         if (error != MACH_MSG_SUCCESS) {
 624                                 if (error == MACH_RCV_INTERRUPTED)
 625                                         error = KERN_ABORTED;
 626                                 else
 627                                         error = KERN_FAILURE;
 628                                 goto tfpout;
 629                         }
 630                 }
 631 #if CONFIG_MACF
 632                 error = mac_proc_check_get_task(kauth_cred_get(), p);
 633                 if (error) {
 634                         error = KERN_FAILURE;
 635                         goto tfpout;
 636                 }
 637 #endif
 638
 639                 /* Grant task port access */
 640                 task_reference(p->task);
 641                 extmod_statistics_incr_task_for_pid(p->task);
 642
 643                 sright = (void *) convert_task_to_port(p->task);
 644                 tret = ipc_port_copyout_send(
 645                                 sright,
 646                                 get_task_ipcspace(current_task()));
 647         }
 648         error = KERN_SUCCESS;
 649
 650 tfpout:
 651         task_deallocate(t1);
 652         AUDIT_ARG(mach_port2, tret);
 653         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 654         if (p != PROC_NULL)
 655                 proc_rele(p);
 656         AUDIT_MACH_SYSCALL_EXIT(error);
 657         return(error);
 658 }
 659
 660 /*
 661  *      Routine:        task_name_for_pid
 662  *      Purpose:
 663  *              Get the task name port for another "process", named by its
 664  *              process ID on the same host as "target_task".
 665  *
 666  *              Only permitted to privileged processes, or processes
 667  *              with the same user ID.
 668  *
 669  * XXX This should be a BSD system call, not a Mach trap!!!
 670  */
 671
 672 kern_return_t
 673 task_name_for_pid(
 674         struct task_name_for_pid_args *args)
 675 {
 676         mach_port_name_t        target_tport = args->target_tport;
 677         int                     pid = args->pid;
 678         user_addr_t             task_addr = args->t;
 679         proc_t          p = PROC_NULL;
 680         task_t          t1;
 681         mach_port_name_t        tret;
 682         void * sright;
 683         int error = 0, refheld = 0;
 684         kauth_cred_t target_cred;
 685
 686         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
 687         AUDIT_ARG(pid, pid);
 688         AUDIT_ARG(mach_port1, target_tport);
 689
 690         t1 = port_name_to_task(target_tport);
 691         if (t1 == TASK_NULL) {
 692                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 693                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 694                 return(KERN_FAILURE);
 695         }
 696
 697         p = proc_find(pid);
 698         if (p != PROC_NULL) {
 699                 AUDIT_ARG(process, p);
 700                 target_cred = kauth_cred_proc_ref(p);
 701                 refheld = 1;
 702
 703                 if ((p->p_stat != SZOMB)
 704                     && ((current_proc() == p)
 705                         || kauth_cred_issuser(kauth_cred_get())
 706                         || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
 707                             ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
 708
 709                         if (p->task != TASK_NULL) {
 710                                 task_reference(p->task);
 711 #if CONFIG_MACF
 712                                 error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
 713                                 if (error) {
 714                                         task_deallocate(p->task);
 715                                         goto noperm;
 716                                 }
 717 #endif
 718                                 sright = (void *)convert_task_name_to_port(p->task);
 719                                 tret = ipc_port_copyout_send(sright,
 720                                                 get_task_ipcspace(current_task()));
 721                         } else
 722                                 tret  = MACH_PORT_NULL;
 723
 724                         AUDIT_ARG(mach_port2, tret);
 725                         (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 726                         task_deallocate(t1);
 727                         error = KERN_SUCCESS;
 728                         goto tnfpout;
 729                 }
 730         }
 731
 732 #if CONFIG_MACF
 733 noperm:
 734 #endif
 735     task_deallocate(t1);
 736         tret = MACH_PORT_NULL;
 737         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 738         error = KERN_FAILURE;
 739 tnfpout:
 740         if (refheld != 0)
 741                 kauth_cred_unref(&target_cred);
 742         if (p != PROC_NULL)
 743                 proc_rele(p);
 744         AUDIT_MACH_SYSCALL_EXIT(error);
 745         return(error);
 746 }
 747
 748 kern_return_t
 749 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 750 {
 751         task_t  target = NULL;
 752         proc_t  targetproc = PROC_NULL;
 753         int     pid = args->pid;
 754         int     error = 0;
 755
 756 #if CONFIG_MACF
 757         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
 758         if (error) {
 759                 error = EPERM;
 760                 goto out;
 761         }
 762 #endif
 763
 764         if (pid == 0) {
 765                 error = EPERM;
 766                 goto out;
 767         }
 768
 769         targetproc = proc_find(pid);
 770         if (targetproc == PROC_NULL) {
 771                 error = ESRCH;
 772                 goto out;
 773         }
 774
 775         if (!task_for_pid_posix_check(targetproc)) {
 776                 error = EPERM;
 777                 goto out;
 778         }
 779
 780         target = targetproc->task;
 781         if (target != TASK_NULL) {
 782                 mach_port_t tfpport;
 783
 784                 /* If we aren't root and target's task access port is set... */
 785                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 786                         targetproc != current_proc() &&
 787                         (task_get_task_access_port(target, &tfpport) == 0) &&
 788                         (tfpport != IPC_PORT_NULL)) {
 789
 790                         if (tfpport == IPC_PORT_DEAD) {
 791                                 error = EACCES;
 792                                 goto out;
 793                         }
 794
 795                         /* Call up to the task access server */
 796                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 797
 798                         if (error != MACH_MSG_SUCCESS) {
 799                                 if (error == MACH_RCV_INTERRUPTED)
 800                                         error = EINTR;
 801                                 else
 802                                         error = EPERM;
 803                                 goto out;
 804                         }
 805                 }
 806         }
 807
 808         task_reference(target);
 809         error = task_pidsuspend(target);
 810         if (error) {
 811                 if (error == KERN_INVALID_ARGUMENT) {
 812                         error = EINVAL;
 813                 } else {
 814                         error = EPERM;
 815                 }
 816         }
 817 #if CONFIG_MEMORYSTATUS
 818         else {
 819                 memorystatus_on_suspend(targetproc);
 820         }
 821 #endif
 822
 823         task_deallocate(target);
 824
 825 out:
 826         if (targetproc != PROC_NULL)
 827                 proc_rele(targetproc);
 828         *ret = error;
 829         return error;
 830 }
 831
 832 kern_return_t
 833 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 834 {
 835         task_t  target = NULL;
 836         proc_t  targetproc = PROC_NULL;
 837         int     pid = args->pid;
 838         int     error = 0;
 839
 840 #if CONFIG_MACF
 841         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
 842         if (error) {
 843                 error = EPERM;
 844                 goto out;
 845         }
 846 #endif
 847
 848         if (pid == 0) {
 849                 error = EPERM;
 850                 goto out;
 851         }
 852
 853         targetproc = proc_find(pid);
 854         if (targetproc == PROC_NULL) {
 855                 error = ESRCH;
 856                 goto out;
 857         }
 858
 859         if (!task_for_pid_posix_check(targetproc)) {
 860                 error = EPERM;
 861                 goto out;
 862         }
 863
 864         target = targetproc->task;
 865         if (target != TASK_NULL) {
 866                 mach_port_t tfpport;
 867
 868                 /* If we aren't root and target's task access port is set... */
 869                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 870                         targetproc != current_proc() &&
 871                         (task_get_task_access_port(target, &tfpport) == 0) &&
 872                         (tfpport != IPC_PORT_NULL)) {
 873
 874                         if (tfpport == IPC_PORT_DEAD) {
 875                                 error = EACCES;
 876                                 goto out;
 877                         }
 878
 879                         /* Call up to the task access server */
 880                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 881
 882                         if (error != MACH_MSG_SUCCESS) {
 883                                 if (error == MACH_RCV_INTERRUPTED)
 884                                         error = EINTR;
 885                                 else
 886                                         error = EPERM;
 887                                 goto out;
 888                         }
 889                 }
 890         }
 891
 892         task_reference(target);
 893
 894 #if CONFIG_MEMORYSTATUS
 895         memorystatus_on_resume(targetproc);
 896 #endif
 897
 898         error = task_pidresume(target);
 899         if (error) {
 900                 if (error == KERN_INVALID_ARGUMENT) {
 901                         error = EINVAL;
 902                 } else {
 903                         if (error == KERN_MEMORY_ERROR) {
 904                                 psignal(targetproc, SIGKILL);
 905                                 error = EIO;
 906                         } else
 907                                 error = EPERM;
 908                 }
 909         }
 910
 911         task_deallocate(target);
 912
 913 out:
 914         if (targetproc != PROC_NULL)
 915                 proc_rele(targetproc);
 916
 917         *ret = error;
 918         return error;
 919 }
 920
 921
 922 static int
 923 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
 924     __unused int arg2, struct sysctl_req *req)
 925 {
 926     int error = 0;
 927         int new_value;
 928
 929     error = SYSCTL_OUT(req, arg1, sizeof(int));
 930     if (error || req->newptr == USER_ADDR_NULL)
 931         return(error);
 932
 933         if (!kauth_cred_issuser(kauth_cred_get()))
 934                 return(EPERM);
 935
 936         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 937                 goto out;
 938         }
 939         if ((new_value == KERN_TFP_POLICY_DENY)
 940                 || (new_value == KERN_TFP_POLICY_DEFAULT))
 941                         tfp_policy = new_value;
 942         else
 943                         error = EINVAL;
 944 out:
 945     return(error);
 946
 947 }
 948
 949 #if defined(SECURE_KERNEL)
 950 static int kern_secure_kernel = 1;
 951 #else
 952 static int kern_secure_kernel = 0;
 953 #endif
 954
 955 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
 956
 957 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
 958 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 959     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
 960
 961 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
 962            &shared_region_trace_level, 0, "");
 963 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
 964            &shared_region_version, 0, "");
 965 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
 966            &shared_region_persistence, 0, "");
 967
 968 /*
 969  * shared_region_check_np:
 970  *
 971  * This system call is intended for dyld.
 972  *
 973  * dyld calls this when any process starts to see if the process's shared
 974  * region is already set up and ready to use.
 975  * This call returns the base address of the first mapping in the
 976  * process's shared region's first mapping.
 977  * dyld will then check what's mapped at that address.
 978  *
 979  * If the shared region is empty, dyld will then attempt to map the shared
 980  * cache file in the shared region via the shared_region_map_np() system call.
 981  *
 982  * If something's already mapped in the shared region, dyld will check if it
 983  * matches the shared cache it would like to use for that process.
 984  * If it matches, evrything's ready and the process can proceed and use the
 985  * shared region.
 986  * If it doesn't match, dyld will unmap the shared region and map the shared
 987  * cache into the process's address space via mmap().
 988  *
 989  * ERROR VALUES
 990  * EINVAL       no shared region
 991  * ENOMEM       shared region is empty
 992  * EFAULT       bad address for "start_address"
 993  */
 994 int
 995 shared_region_check_np(
 996         __unused struct proc                    *p,
 997         struct shared_region_check_np_args      *uap,
 998         __unused int                            *retvalp)
 999 {
1000         vm_shared_region_t      shared_region;
1001         mach_vm_offset_t        start_address = 0;
1002         int                     error;
1003         kern_return_t           kr;
1004
1005         SHARED_REGION_TRACE_DEBUG(
1006                 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1007                  current_thread(), p->p_pid, p->p_comm,
1008                  (uint64_t)uap->start_address));
1009
1010         /* retrieve the current tasks's shared region */
1011         shared_region = vm_shared_region_get(current_task());
1012         if (shared_region != NULL) {
1013                 /* retrieve address of its first mapping... */
1014                 kr = vm_shared_region_start_address(shared_region,
1015                                                     &start_address);
1016                 if (kr != KERN_SUCCESS) {
1017                         error = ENOMEM;
1018                 } else {
1019                         /* ... and give it to the caller */
1020                         error = copyout(&start_address,
1021                                         (user_addr_t) uap->start_address,
1022                                         sizeof (start_address));
1023                         if (error) {
1024                                 SHARED_REGION_TRACE_ERROR(
1025                                         ("shared_region: %p [%d(%s)] "
1026                                          "check_np(0x%llx) "
1027                                          "copyout(0x%llx) error %d\n",
1028                                          current_thread(), p->p_pid, p->p_comm,
1029                                          (uint64_t)uap->start_address, (uint64_t)start_address,
1030                                          error));
1031                         }
1032                 }
1033                 vm_shared_region_deallocate(shared_region);
1034         } else {
1035                 /* no shared region ! */
1036                 error = EINVAL;
1037         }
1038
1039         SHARED_REGION_TRACE_DEBUG(
1040                 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1041                  current_thread(), p->p_pid, p->p_comm,
1042                  (uint64_t)uap->start_address, (uint64_t)start_address, error));
1043
1044         return error;
1045 }
1046
1047
1048 int
1049 shared_region_copyin_mappings(
1050                 struct proc                     *p,
1051                 user_addr_t                     user_mappings,
1052                 unsigned int                    mappings_count,
1053                 struct shared_file_mapping_np   *mappings)
1054 {
1055         int             error = 0;
1056         vm_size_t       mappings_size = 0;
1057
1058         /* get the list of mappings the caller wants us to establish */
1059         mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1060         error = copyin(user_mappings,
1061                        mappings,
1062                        mappings_size);
1063         if (error) {
1064                 SHARED_REGION_TRACE_ERROR(
1065                         ("shared_region: %p [%d(%s)] map(): "
1066                          "copyin(0x%llx, %d) failed (error=%d)\n",
1067                          current_thread(), p->p_pid, p->p_comm,
1068                          (uint64_t)user_mappings, mappings_count, error));
1069         }
1070         return error;
1071 }
1072 /*
1073  * shared_region_map_np()
1074  *
1075  * This system call is intended for dyld.
1076  *
1077  * dyld uses this to map a shared cache file into a shared region.
1078  * This is usually done only the first time a shared cache is needed.
1079  * Subsequent processes will just use the populated shared region without
1080  * requiring any further setup.
1081  */
1082 int
1083 _shared_region_map_and_slide(
1084         struct proc                             *p,
1085         int                                     fd,
1086         uint32_t                                mappings_count,
1087         struct shared_file_mapping_np           *mappings,
1088         uint32_t                                slide,
1089         user_addr_t                             slide_start,
1090         user_addr_t                             slide_size)
1091 {
1092         int                             error;
1093         kern_return_t                   kr;
1094         struct fileproc                 *fp;
1095         struct vnode                    *vp, *root_vp;
1096         struct vnode_attr               va;
1097         off_t                           fs;
1098         memory_object_size_t            file_size;
1099 #if CONFIG_MACF
1100         vm_prot_t                       maxprot = VM_PROT_ALL;
1101 #endif
1102         memory_object_control_t         file_control;
1103         struct vm_shared_region         *shared_region;
1104
1105         SHARED_REGION_TRACE_DEBUG(
1106                 ("shared_region: %p [%d(%s)] -> map\n",
1107                  current_thread(), p->p_pid, p->p_comm));
1108
1109         shared_region = NULL;
1110         fp = NULL;
1111         vp = NULL;
1112
1113         /* get file structure from file descriptor */
1114         error = fp_lookup(p, fd, &fp, 0);
1115         if (error) {
1116                 SHARED_REGION_TRACE_ERROR(
1117                         ("shared_region: %p [%d(%s)] map: "
1118                          "fd=%d lookup failed (error=%d)\n",
1119                          current_thread(), p->p_pid, p->p_comm, fd, error));
1120                 goto done;
1121         }
1122
1123         /* make sure we're attempting to map a vnode */
1124         if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1125                 SHARED_REGION_TRACE_ERROR(
1126                         ("shared_region: %p [%d(%s)] map: "
1127                          "fd=%d not a vnode (type=%d)\n",
1128                          current_thread(), p->p_pid, p->p_comm,
1129                          fd, FILEGLOB_DTYPE(fp->f_fglob)));
1130                 error = EINVAL;
1131                 goto done;
1132         }
1133
1134         /* we need at least read permission on the file */
1135         if (! (fp->f_fglob->fg_flag & FREAD)) {
1136                 SHARED_REGION_TRACE_ERROR(
1137                         ("shared_region: %p [%d(%s)] map: "
1138                          "fd=%d not readable\n",
1139                          current_thread(), p->p_pid, p->p_comm, fd));
1140                 error = EPERM;
1141                 goto done;
1142         }
1143
1144         /* get vnode from file structure */
1145         error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1146         if (error) {
1147                 SHARED_REGION_TRACE_ERROR(
1148                         ("shared_region: %p [%d(%s)] map: "
1149                          "fd=%d getwithref failed (error=%d)\n",
1150                          current_thread(), p->p_pid, p->p_comm, fd, error));
1151                 goto done;
1152         }
1153         vp = (struct vnode *) fp->f_fglob->fg_data;
1154
1155         /* make sure the vnode is a regular file */
1156         if (vp->v_type != VREG) {
1157                 SHARED_REGION_TRACE_ERROR(
1158                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1159                          "not a file (type=%d)\n",
1160                          current_thread(), p->p_pid, p->p_comm,
1161                          vp, vp->v_name, vp->v_type));
1162                 error = EINVAL;
1163                 goto done;
1164         }
1165
1166 #if CONFIG_MACF
1167         error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1168                         fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1169         if (error) {
1170                 goto done;
1171         }
1172 #endif /* MAC */
1173
1174 #if CONFIG_PROTECT
1175         /* check for content protection access */
1176         {
1177                 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1178                 if (error) {
1179                         goto done;
1180                 }
1181         }
1182 #endif /* CONFIG_PROTECT */
1183
1184         /* make sure vnode is on the process's root volume */
1185         root_vp = p->p_fd->fd_rdir;
1186         if (root_vp == NULL) {
1187                 root_vp = rootvnode;
1188         } else {
1189                 /*
1190                  * Chroot-ed processes can't use the shared_region.
1191                  */
1192                 error = EINVAL;
1193                 goto done;
1194         }
1195
1196         if (vp->v_mount != root_vp->v_mount) {
1197                 SHARED_REGION_TRACE_ERROR(
1198                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1199                          "not on process's root volume\n",
1200                          current_thread(), p->p_pid, p->p_comm,
1201                          vp, vp->v_name));
1202                 error = EPERM;
1203                 goto done;
1204         }
1205
1206         /* make sure vnode is owned by "root" */
1207         VATTR_INIT(&va);
1208         VATTR_WANTED(&va, va_uid);
1209         error = vnode_getattr(vp, &va, vfs_context_current());
1210         if (error) {
1211                 SHARED_REGION_TRACE_ERROR(
1212                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1213                          "vnode_getattr(%p) failed (error=%d)\n",
1214                          current_thread(), p->p_pid, p->p_comm,
1215                          vp, vp->v_name, vp, error));
1216                 goto done;
1217         }
1218         if (va.va_uid != 0) {
1219                 SHARED_REGION_TRACE_ERROR(
1220                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1221                          "owned by uid=%d instead of 0\n",
1222                          current_thread(), p->p_pid, p->p_comm,
1223                          vp, vp->v_name, va.va_uid));
1224                 error = EPERM;
1225                 goto done;
1226         }
1227
1228         /* get vnode size */
1229         error = vnode_size(vp, &fs, vfs_context_current());
1230         if (error) {
1231                 SHARED_REGION_TRACE_ERROR(
1232                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1233                          "vnode_size(%p) failed (error=%d)\n",
1234                          current_thread(), p->p_pid, p->p_comm,
1235                          vp, vp->v_name, vp, error));
1236                 goto done;
1237         }
1238         file_size = fs;
1239
1240         /* get the file's memory object handle */
1241         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1242         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1243                 SHARED_REGION_TRACE_ERROR(
1244                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1245                          "no memory object\n",
1246                          current_thread(), p->p_pid, p->p_comm,
1247                          vp, vp->v_name));
1248                 error = EINVAL;
1249                 goto done;
1250         }
1251
1252
1253         /* get the process's shared region (setup in vm_map_exec()) */
1254         shared_region = vm_shared_region_get(current_task());
1255         if (shared_region == NULL) {
1256                 SHARED_REGION_TRACE_ERROR(
1257                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1258                          "no shared region\n",
1259                          current_thread(), p->p_pid, p->p_comm,
1260                          vp, vp->v_name));
1261                 goto done;
1262         }
1263
1264         /* map the file into that shared region's submap */
1265         kr = vm_shared_region_map_file(shared_region,
1266                                        mappings_count,
1267                                        mappings,
1268                                        file_control,
1269                                        file_size,
1270                                        (void *) p->p_fd->fd_rdir,
1271                                        slide,
1272                                        slide_start,
1273                                        slide_size);
1274         if (kr != KERN_SUCCESS) {
1275                 SHARED_REGION_TRACE_ERROR(
1276                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1277                          "vm_shared_region_map_file() failed kr=0x%x\n",
1278                          current_thread(), p->p_pid, p->p_comm,
1279                          vp, vp->v_name, kr));
1280                 switch (kr) {
1281                 case KERN_INVALID_ADDRESS:
1282                         error = EFAULT;
1283                         break;
1284                 case KERN_PROTECTION_FAILURE:
1285                         error = EPERM;
1286                         break;
1287                 case KERN_NO_SPACE:
1288                         error = ENOMEM;
1289                         break;
1290                 case KERN_FAILURE:
1291                 case KERN_INVALID_ARGUMENT:
1292                 default:
1293                         error = EINVAL;
1294                         break;
1295                 }
1296                 goto done;
1297         }
1298
1299         error = 0;
1300
1301         vnode_lock_spin(vp);
1302
1303         vp->v_flag |= VSHARED_DYLD;
1304
1305         vnode_unlock(vp);
1306
1307         /* update the vnode's access time */
1308         if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1309                 VATTR_INIT(&va);
1310                 nanotime(&va.va_access_time);
1311                 VATTR_SET_ACTIVE(&va, va_access_time);
1312                 vnode_setattr(vp, &va, vfs_context_current());
1313         }
1314
1315         if (p->p_flag & P_NOSHLIB) {
1316                 /* signal that this process is now using split libraries */
1317                 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1318         }
1319
1320 done:
1321         if (vp != NULL) {
1322                 /*
1323                  * release the vnode...
1324                  * ubc_map() still holds it for us in the non-error case
1325                  */
1326                 (void) vnode_put(vp);
1327                 vp = NULL;
1328         }
1329         if (fp != NULL) {
1330                 /* release the file descriptor */
1331                 fp_drop(p, fd, fp, 0);
1332                 fp = NULL;
1333         }
1334
1335         if (shared_region != NULL) {
1336                 vm_shared_region_deallocate(shared_region);
1337         }
1338
1339         SHARED_REGION_TRACE_DEBUG(
1340                 ("shared_region: %p [%d(%s)] <- map\n",
1341                  current_thread(), p->p_pid, p->p_comm));
1342
1343         return error;
1344 }
1345
1346 int
1347 shared_region_map_and_slide_np(
1348         struct proc                             *p,
1349         struct shared_region_map_and_slide_np_args      *uap,
1350         __unused int                                    *retvalp)
1351 {
1352         struct shared_file_mapping_np   *mappings;
1353         unsigned int                    mappings_count = uap->count;
1354         kern_return_t                   kr = KERN_SUCCESS;
1355         uint32_t                        slide = uap->slide;
1356
1357 #define SFM_MAX_STACK   8
1358         struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
1359
1360         /* Is the process chrooted?? */
1361         if (p->p_fd->fd_rdir != NULL) {
1362                 kr = EINVAL;
1363                 goto done;
1364         }
1365
1366         if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1367                 if (kr == KERN_INVALID_ARGUMENT) {
1368                         /*
1369                          * This will happen if we request sliding again
1370                          * with the same slide value that was used earlier
1371                          * for the very first sliding.
1372                          */
1373                         kr = KERN_SUCCESS;
1374                 }
1375                 goto done;
1376         }
1377
1378         if (mappings_count == 0) {
1379                 SHARED_REGION_TRACE_INFO(
1380                         ("shared_region: %p [%d(%s)] map(): "
1381                          "no mappings\n",
1382                          current_thread(), p->p_pid, p->p_comm));
1383                 kr = 0; /* no mappings: we're done ! */
1384                 goto done;
1385         } else if (mappings_count <= SFM_MAX_STACK) {
1386                 mappings = &stack_mappings[0];
1387         } else {
1388                 SHARED_REGION_TRACE_ERROR(
1389                         ("shared_region: %p [%d(%s)] map(): "
1390                          "too many mappings (%d)\n",
1391                          current_thread(), p->p_pid, p->p_comm,
1392                          mappings_count));
1393                 kr = KERN_FAILURE;
1394                 goto done;
1395         }
1396
1397         if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1398                 goto done;
1399         }
1400
1401
1402         kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1403                                           slide,
1404                                           uap->slide_start, uap->slide_size);
1405         if (kr != KERN_SUCCESS) {
1406                 return kr;
1407         }
1408
1409 done:
1410         return kr;
1411 }
1412
1413 /* sysctl overflow room */
1414
1415 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1416         allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1417         reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1418 extern unsigned int     vm_page_free_target;
1419 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1420                    &vm_page_free_target, 0, "Pageout daemon free target");
1421
1422 extern unsigned int     vm_memory_pressure;
1423 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1424            &vm_memory_pressure, 0, "Memory pressure indicator");
1425
1426 static int
1427 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1428 {
1429 #pragma unused(oidp, arg1, arg2)
1430         unsigned int page_free_wanted;
1431
1432         page_free_wanted = mach_vm_ctl_page_free_wanted();
1433         return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1434 }
1435 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1436             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1437             0, 0, vm_ctl_page_free_wanted, "I", "");
1438
1439 extern unsigned int     vm_page_purgeable_count;
1440 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1441            &vm_page_purgeable_count, 0, "Purgeable page count");
1442
1443 extern unsigned int     vm_page_purgeable_wired_count;
1444 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1445            &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1446
1447 extern int madvise_free_debug;
1448 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1449            &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1450
1451 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1452            &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1453 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1454            &vm_page_stats_reusable.reusable_pages_success, "");
1455 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1456            &vm_page_stats_reusable.reusable_pages_failure, "");
1457 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1458            &vm_page_stats_reusable.reusable_pages_shared, "");
1459 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1460            &vm_page_stats_reusable.all_reusable_calls, "");
1461 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1462            &vm_page_stats_reusable.partial_reusable_calls, "");
1463 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1464            &vm_page_stats_reusable.reuse_pages_success, "");
1465 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1466            &vm_page_stats_reusable.reuse_pages_failure, "");
1467 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1468            &vm_page_stats_reusable.all_reuse_calls, "");
1469 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1470            &vm_page_stats_reusable.partial_reuse_calls, "");
1471 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1472            &vm_page_stats_reusable.can_reuse_success, "");
1473 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1474            &vm_page_stats_reusable.can_reuse_failure, "");
1475 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1476            &vm_page_stats_reusable.reusable_reclaimed, "");
1477
1478
1479 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1480 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1481 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1482
1483 extern unsigned int vm_page_cleaned_count;
1484 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1485
1486 /* pageout counts */
1487 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1488 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1489 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1490 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1491 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1492 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1493 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1494 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1495 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1496
1497 extern unsigned int vm_pageout_freed_from_cleaned;
1498 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1499
1500 /* counts of pages entering the cleaned queue */
1501 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1502 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1503 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1504 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1505
1506 /* counts of pages leaving the cleaned queue */
1507 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1508 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1509 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1510 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1511 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1512 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1513 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1514 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1515 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1516
1517 #include <kern/thread.h>
1518 #include <sys/user.h>
1519
1520 void vm_pageout_io_throttle(void);
1521
1522 void vm_pageout_io_throttle(void) {
1523         struct uthread *uthread = get_bsdthread_info(current_thread());
1524
1525                /*
1526                 * thread is marked as a low priority I/O type
1527                 * and the I/O we issued while in this cleaning operation
1528                 * collided with normal I/O operations... we'll
1529                 * delay in order to mitigate the impact of this
1530                 * task on the normal operation of the system
1531                 */
1532
1533         if (uthread->uu_lowpri_window) {
1534                 throttle_lowpri_io(1);
1535         }
1536
1537 }
1538
1539 int
1540 vm_pressure_monitor(
1541         __unused struct proc *p,
1542         struct vm_pressure_monitor_args *uap,
1543         int *retval)
1544 {
1545         kern_return_t   kr;
1546         uint32_t        pages_reclaimed;
1547         uint32_t        pages_wanted;
1548
1549         kr = mach_vm_pressure_monitor(
1550                 (boolean_t) uap->wait_for_pressure,
1551                 uap->nsecs_monitored,
1552                 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1553                 &pages_wanted);
1554
1555         switch (kr) {
1556         case KERN_SUCCESS:
1557                 break;
1558         case KERN_ABORTED:
1559                 return EINTR;
1560         default:
1561                 return EINVAL;
1562         }
1563
1564         if (uap->pages_reclaimed) {
1565                 if (copyout((void *)&pages_reclaimed,
1566                             uap->pages_reclaimed,
1567                             sizeof (pages_reclaimed)) != 0) {
1568                         return EFAULT;
1569                 }
1570         }
1571
1572         *retval = (int) pages_wanted;
1573         return 0;
1574 }
1575
1576 int
1577 kas_info(struct proc *p,
1578                           struct kas_info_args *uap,
1579                           int *retval __unused)
1580 {
1581 #ifdef SECURE_KERNEL
1582         (void)p;
1583         (void)uap;
1584         return ENOTSUP;
1585 #else /* !SECURE_KERNEL */
1586         int                     selector = uap->selector;
1587         user_addr_t     valuep = uap->value;
1588         user_addr_t     sizep = uap->size;
1589         user_size_t size;
1590         int                     error;
1591
1592         if (!kauth_cred_issuser(kauth_cred_get())) {
1593                 return EPERM;
1594         }
1595
1596 #if CONFIG_MACF
1597         error = mac_system_check_kas_info(kauth_cred_get(), selector);
1598         if (error) {
1599                 return error;
1600         }
1601 #endif
1602
1603         if (IS_64BIT_PROCESS(p)) {
1604                 user64_size_t size64;
1605                 error = copyin(sizep, &size64, sizeof(size64));
1606                 size = (user_size_t)size64;
1607         } else {
1608                 user32_size_t size32;
1609                 error = copyin(sizep, &size32, sizeof(size32));
1610                 size = (user_size_t)size32;
1611         }
1612         if (error) {
1613                 return error;
1614         }
1615
1616         switch (selector) {
1617                 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1618                         {
1619                                 uint64_t slide = vm_kernel_slide;
1620
1621                                 if (sizeof(slide) != size) {
1622                                         return EINVAL;
1623                                 }
1624
1625                                 if (IS_64BIT_PROCESS(p)) {
1626                                         user64_size_t size64 = (user64_size_t)size;
1627                                         error = copyout(&size64, sizep, sizeof(size64));
1628                                 } else {
1629                                         user32_size_t size32 = (user32_size_t)size;
1630                                         error = copyout(&size32, sizep, sizeof(size32));
1631                                 }
1632                                 if (error) {
1633                                         return error;
1634                                 }
1635
1636                                 error = copyout(&slide, valuep, sizeof(slide));
1637                                 if (error) {
1638                                         return error;
1639                                 }
1640                         }
1641                         break;
1642                 default:
1643                         return EINVAL;
1644         }
1645
1646         return 0;
1647 #endif /* !SECURE_KERNEL */
1648 }