bsd/vm/vm_unix.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1987 Carnegie-Mellon University
  31  * All rights reserved.  The CMU software License Agreement specifies
  32  * the terms and conditions for use and redistribution.
  33  */
  34 /*
  35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  36  * support for mandatory and extensible security protections.  This notice
  37  * is included in support of clause 2.2 (b) of the Apple Public License,
  38  * Version 2.0.
  39  */
  40
  41 #include <meta_features.h>
  42
  43 #include <kern/task.h>
  44 #include <kern/thread.h>
  45 #include <kern/debug.h>
  46 #include <kern/lock.h>
  47 #include <kern/extmod_statistics.h>
  48 #include <mach/mach_traps.h>
  49 #include <mach/port.h>
  50 #include <mach/task.h>
  51 #include <mach/task_access.h>
  52 #include <mach/task_special_ports.h>
  53 #include <mach/time_value.h>
  54 #include <mach/vm_map.h>
  55 #include <mach/vm_param.h>
  56 #include <mach/vm_prot.h>
  57
  58 #include <sys/file_internal.h>
  59 #include <sys/param.h>
  60 #include <sys/systm.h>
  61 #include <sys/dir.h>
  62 #include <sys/namei.h>
  63 #include <sys/proc_internal.h>
  64 #include <sys/kauth.h>
  65 #include <sys/vm.h>
  66 #include <sys/file.h>
  67 #include <sys/vnode_internal.h>
  68 #include <sys/mount.h>
  69 #include <sys/trace.h>
  70 #include <sys/kernel.h>
  71 #include <sys/ubc_internal.h>
  72 #include <sys/user.h>
  73 #include <sys/syslog.h>
  74 #include <sys/stat.h>
  75 #include <sys/sysproto.h>
  76 #include <sys/mman.h>
  77 #include <sys/sysctl.h>
  78 #include <sys/cprotect.h>
  79 #include <sys/kpi_socket.h>
  80
  81 #include <security/audit/audit.h>
  82 #include <security/mac.h>
  83 #include <bsm/audit_kevents.h>
  84
  85 #include <kern/kalloc.h>
  86 #include <vm/vm_map.h>
  87 #include <vm/vm_kern.h>
  88 #include <vm/vm_pageout.h>
  89
  90 #include <machine/spl.h>
  91
  92 #include <mach/shared_region.h>
  93 #include <vm/vm_shared_region.h>
  94
  95 #include <vm/vm_protos.h>
  96
  97 #if CONFIG_FREEZE
  98 #include <sys/kern_memorystatus.h>
  99 #endif
 100
 101
 102 int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*);
 103 int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t);
 104 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
 105
 106 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
 107
 108
 109 /*
 110  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
 111  */
 112
 113 #ifndef SECURE_KERNEL
 114 extern int allow_stack_exec, allow_data_exec;
 115
 116 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
 117 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
 118 #endif /* !SECURE_KERNEL */
 119
 120 static const char *prot_values[] = {
 121         "none",
 122         "read-only",
 123         "write-only",
 124         "read-write",
 125         "execute-only",
 126         "read-execute",
 127         "write-execute",
 128         "read-write-execute"
 129 };
 130
 131 void
 132 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
 133 {
 134         printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
 135                 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
 136 }
 137
 138 int shared_region_unnest_logging = 1;
 139
 140 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
 141            &shared_region_unnest_logging, 0, "");
 142
 143 int vm_shared_region_unnest_log_interval = 10;
 144 int shared_region_unnest_log_count_threshold = 5;
 145
 146 /* These log rate throttling state variables aren't thread safe, but
 147  * are sufficient unto the task.
 148  */
 149 static int64_t last_unnest_log_time = 0;
 150 static int shared_region_unnest_log_count = 0;
 151
 152 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
 153         struct timeval tv;
 154         const char *pcommstr;
 155
 156         if (shared_region_unnest_logging == 0)
 157                 return;
 158
 159         if (shared_region_unnest_logging == 1) {
 160                 microtime(&tv);
 161                 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
 162                         if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
 163                                 return;
 164                 }
 165                 else {
 166                         last_unnest_log_time = tv.tv_sec;
 167                         shared_region_unnest_log_count = 0;
 168                 }
 169         }
 170
 171         pcommstr = current_proc()->p_comm;
 172
 173         printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
 174 }
 175
 176 int
 177 useracc(
 178         user_addr_t     addr,
 179         user_size_t     len,
 180         int     prot)
 181 {
 182         return (vm_map_check_protection(
 183                         current_map(),
 184                         vm_map_trunc_page(addr), vm_map_round_page(addr+len),
 185                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
 186 }
 187
 188 int
 189 vslock(
 190         user_addr_t     addr,
 191         user_size_t     len)
 192 {
 193         kern_return_t kret;
 194         kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
 195                         vm_map_round_page(addr+len),
 196                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
 197
 198         switch (kret) {
 199         case KERN_SUCCESS:
 200                 return (0);
 201         case KERN_INVALID_ADDRESS:
 202         case KERN_NO_SPACE:
 203                 return (ENOMEM);
 204         case KERN_PROTECTION_FAILURE:
 205                 return (EACCES);
 206         default:
 207                 return (EINVAL);
 208         }
 209 }
 210
 211 int
 212 vsunlock(
 213         user_addr_t addr,
 214         user_size_t len,
 215         __unused int dirtied)
 216 {
 217 #if FIXME  /* [ */
 218         pmap_t          pmap;
 219         vm_page_t       pg;
 220         vm_map_offset_t vaddr;
 221         ppnum_t         paddr;
 222 #endif  /* FIXME ] */
 223         kern_return_t kret;
 224
 225 #if FIXME  /* [ */
 226         if (dirtied) {
 227                 pmap = get_task_pmap(current_task());
 228                 for (vaddr = vm_map_trunc_page(addr);
 229                      vaddr < vm_map_round_page(addr+len);
 230                                 vaddr += PAGE_SIZE) {
 231                         paddr = pmap_extract(pmap, vaddr);
 232                         pg = PHYS_TO_VM_PAGE(paddr);
 233                         vm_page_set_modified(pg);
 234                 }
 235         }
 236 #endif  /* FIXME ] */
 237 #ifdef  lint
 238         dirtied++;
 239 #endif  /* lint */
 240         kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
 241                                 vm_map_round_page(addr+len), FALSE);
 242         switch (kret) {
 243         case KERN_SUCCESS:
 244                 return (0);
 245         case KERN_INVALID_ADDRESS:
 246         case KERN_NO_SPACE:
 247                 return (ENOMEM);
 248         case KERN_PROTECTION_FAILURE:
 249                 return (EACCES);
 250         default:
 251                 return (EINVAL);
 252         }
 253 }
 254
 255 int
 256 subyte(
 257         user_addr_t addr,
 258         int byte)
 259 {
 260         char character;
 261
 262         character = (char)byte;
 263         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 264 }
 265
 266 int
 267 suibyte(
 268         user_addr_t addr,
 269         int byte)
 270 {
 271         char character;
 272
 273         character = (char)byte;
 274         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 275 }
 276
 277 int fubyte(user_addr_t addr)
 278 {
 279         unsigned char byte;
 280
 281         if (copyin(addr, (void *) &byte, sizeof(char)))
 282                 return(-1);
 283         return(byte);
 284 }
 285
 286 int fuibyte(user_addr_t addr)
 287 {
 288         unsigned char byte;
 289
 290         if (copyin(addr, (void *) &(byte), sizeof(char)))
 291                 return(-1);
 292         return(byte);
 293 }
 294
 295 int
 296 suword(
 297         user_addr_t addr,
 298         long word)
 299 {
 300         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 301 }
 302
 303 long fuword(user_addr_t addr)
 304 {
 305         long word = 0;
 306
 307         if (copyin(addr, (void *) &word, sizeof(int)))
 308                 return(-1);
 309         return(word);
 310 }
 311
 312 /* suiword and fuiword are the same as suword and fuword, respectively */
 313
 314 int
 315 suiword(
 316         user_addr_t addr,
 317         long word)
 318 {
 319         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 320 }
 321
 322 long fuiword(user_addr_t addr)
 323 {
 324         long word = 0;
 325
 326         if (copyin(addr, (void *) &word, sizeof(int)))
 327                 return(-1);
 328         return(word);
 329 }
 330
 331 /*
 332  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
 333  * fetching and setting of process-sized size_t and pointer values.
 334  */
 335 int
 336 sulong(user_addr_t addr, int64_t word)
 337 {
 338
 339         if (IS_64BIT_PROCESS(current_proc())) {
 340                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
 341         } else {
 342                 return(suiword(addr, (long)word));
 343         }
 344 }
 345
 346 int64_t
 347 fulong(user_addr_t addr)
 348 {
 349         int64_t longword;
 350
 351         if (IS_64BIT_PROCESS(current_proc())) {
 352                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
 353                         return(-1);
 354                 return(longword);
 355         } else {
 356                 return((int64_t)fuiword(addr));
 357         }
 358 }
 359
 360 int
 361 suulong(user_addr_t addr, uint64_t uword)
 362 {
 363
 364         if (IS_64BIT_PROCESS(current_proc())) {
 365                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
 366         } else {
 367                 return(suiword(addr, (uint32_t)uword));
 368         }
 369 }
 370
 371 uint64_t
 372 fuulong(user_addr_t addr)
 373 {
 374         uint64_t ulongword;
 375
 376         if (IS_64BIT_PROCESS(current_proc())) {
 377                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
 378                         return(-1ULL);
 379                 return(ulongword);
 380         } else {
 381                 return((uint64_t)fuiword(addr));
 382         }
 383 }
 384
 385 int
 386 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
 387 {
 388         return(ENOTSUP);
 389 }
 390
 391 /*
 392  * pid_for_task
 393  *
 394  * Find the BSD process ID for the Mach task associated with the given Mach port
 395  * name
 396  *
 397  * Parameters:  args            User argument descriptor (see below)
 398  *
 399  * Indirect parameters: args->t         Mach port name
 400  *                      args->pid       Process ID (returned value; see below)
 401  *
 402  * Returns:     KERL_SUCCESS    Success
 403  *              KERN_FAILURE    Not success
 404  *
 405  * Implicit returns: args->pid          Process ID
 406  *
 407  */
 408 kern_return_t
 409 pid_for_task(
 410         struct pid_for_task_args *args)
 411 {
 412         mach_port_name_t        t = args->t;
 413         user_addr_t             pid_addr  = args->pid;
 414         proc_t p;
 415         task_t          t1;
 416         int     pid = -1;
 417         kern_return_t   err = KERN_SUCCESS;
 418
 419         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
 420         AUDIT_ARG(mach_port1, t);
 421
 422         t1 = port_name_to_task(t);
 423
 424         if (t1 == TASK_NULL) {
 425                 err = KERN_FAILURE;
 426                 goto pftout;
 427         } else {
 428                 p = get_bsdtask_info(t1);
 429                 if (p) {
 430                         pid  = proc_pid(p);
 431                         err = KERN_SUCCESS;
 432                 } else {
 433                         err = KERN_FAILURE;
 434                 }
 435         }
 436         task_deallocate(t1);
 437 pftout:
 438         AUDIT_ARG(pid, pid);
 439         (void) copyout((char *) &pid, pid_addr, sizeof(int));
 440         AUDIT_MACH_SYSCALL_EXIT(err);
 441         return(err);
 442 }
 443
 444 /*
 445  *
 446  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
 447  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
 448  *
 449  */
 450 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
 451
 452 /*
 453  *      Routine:        task_for_pid_posix_check
 454  *      Purpose:
 455  *                      Verify that the current process should be allowed to
 456  *                      get the target process's task port. This is only
 457  *                      permitted if:
 458  *                      - The current process is root
 459  *                      OR all of the following are true:
 460  *                      - The target process's real, effective, and saved uids
 461  *                        are the same as the current proc's euid,
 462  *                      - The target process's group set is a subset of the
 463  *                        calling process's group set, and
 464  *                      - The target process hasn't switched credentials.
 465  *
 466  *      Returns:        TRUE: permitted
 467  *                      FALSE: denied
 468  */
 469 static int
 470 task_for_pid_posix_check(proc_t target)
 471 {
 472         kauth_cred_t targetcred, mycred;
 473         uid_t myuid;
 474         int allowed;
 475
 476         /* No task_for_pid on bad targets */
 477         if (target == PROC_NULL || target->p_stat == SZOMB) {
 478                 return FALSE;
 479         }
 480
 481         mycred = kauth_cred_get();
 482         myuid = kauth_cred_getuid(mycred);
 483
 484         /* If we're running as root, the check passes */
 485         if (kauth_cred_issuser(mycred))
 486                 return TRUE;
 487
 488         /* We're allowed to get our own task port */
 489         if (target == current_proc())
 490                 return TRUE;
 491
 492         /*
 493          * Under DENY, only root can get another proc's task port,
 494          * so no more checks are needed.
 495          */
 496         if (tfp_policy == KERN_TFP_POLICY_DENY) {
 497                 return FALSE;
 498         }
 499
 500         targetcred = kauth_cred_proc_ref(target);
 501         allowed = TRUE;
 502
 503         /* Do target's ruid, euid, and saved uid match my euid? */
 504         if ((kauth_cred_getuid(targetcred) != myuid) ||
 505                         (kauth_cred_getruid(targetcred) != myuid) ||
 506                         (kauth_cred_getsvuid(targetcred) != myuid)) {
 507                 allowed = FALSE;
 508                 goto out;
 509         }
 510
 511         /* Are target's groups a subset of my groups? */
 512         if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
 513                         allowed == 0) {
 514                 allowed = FALSE;
 515                 goto out;
 516         }
 517
 518         /* Has target switched credentials? */
 519         if (target->p_flag & P_SUGID) {
 520                 allowed = FALSE;
 521                 goto out;
 522         }
 523
 524 out:
 525         kauth_cred_unref(&targetcred);
 526         return allowed;
 527 }
 528
 529 /*
 530  *      Routine:        task_for_pid
 531  *      Purpose:
 532  *              Get the task port for another "process", named by its
 533  *              process ID on the same host as "target_task".
 534  *
 535  *              Only permitted to privileged processes, or processes
 536  *              with the same user ID.
 537  *
 538  *              Note: if pid == 0, an error is return no matter who is calling.
 539  *
 540  * XXX This should be a BSD system call, not a Mach trap!!!
 541  */
 542 kern_return_t
 543 task_for_pid(
 544         struct task_for_pid_args *args)
 545 {
 546         mach_port_name_t        target_tport = args->target_tport;
 547         int                     pid = args->pid;
 548         user_addr_t             task_addr = args->t;
 549         proc_t                  p = PROC_NULL;
 550         task_t                  t1 = TASK_NULL;
 551         mach_port_name_t        tret = MACH_PORT_NULL;
 552         ipc_port_t              tfpport;
 553         void * sright;
 554         int error = 0;
 555
 556         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
 557         AUDIT_ARG(pid, pid);
 558         AUDIT_ARG(mach_port1, target_tport);
 559
 560         /* Always check if pid == 0 */
 561         if (pid == 0) {
 562                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 563                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 564                 return(KERN_FAILURE);
 565         }
 566
 567         t1 = port_name_to_task(target_tport);
 568         if (t1 == TASK_NULL) {
 569                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 570                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 571                 return(KERN_FAILURE);
 572         }
 573
 574
 575         p = proc_find(pid);
 576 #if CONFIG_AUDIT
 577         if (p != PROC_NULL)
 578                 AUDIT_ARG(process, p);
 579 #endif
 580
 581         if (!(task_for_pid_posix_check(p))) {
 582                 error = KERN_FAILURE;
 583                 goto tfpout;
 584         }
 585
 586         if (p->task != TASK_NULL) {
 587                 /* If we aren't root and target's task access port is set... */
 588                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 589                         p != current_proc() &&
 590                         (task_get_task_access_port(p->task, &tfpport) == 0) &&
 591                         (tfpport != IPC_PORT_NULL)) {
 592
 593                         if (tfpport == IPC_PORT_DEAD) {
 594                                 error = KERN_PROTECTION_FAILURE;
 595                                 goto tfpout;
 596                         }
 597
 598                         /* Call up to the task access server */
 599                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 600
 601                         if (error != MACH_MSG_SUCCESS) {
 602                                 if (error == MACH_RCV_INTERRUPTED)
 603                                         error = KERN_ABORTED;
 604                                 else
 605                                         error = KERN_FAILURE;
 606                                 goto tfpout;
 607                         }
 608                 }
 609 #if CONFIG_MACF
 610                 error = mac_proc_check_get_task(kauth_cred_get(), p);
 611                 if (error) {
 612                         error = KERN_FAILURE;
 613                         goto tfpout;
 614                 }
 615 #endif
 616
 617                 /* Grant task port access */
 618                 task_reference(p->task);
 619                 extmod_statistics_incr_task_for_pid(p->task);
 620
 621                 sright = (void *) convert_task_to_port(p->task);
 622                 tret = ipc_port_copyout_send(
 623                                 sright,
 624                                 get_task_ipcspace(current_task()));
 625         }
 626         error = KERN_SUCCESS;
 627
 628 tfpout:
 629         task_deallocate(t1);
 630         AUDIT_ARG(mach_port2, tret);
 631         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 632         if (p != PROC_NULL)
 633                 proc_rele(p);
 634         AUDIT_MACH_SYSCALL_EXIT(error);
 635         return(error);
 636 }
 637
 638 /*
 639  *      Routine:        task_name_for_pid
 640  *      Purpose:
 641  *              Get the task name port for another "process", named by its
 642  *              process ID on the same host as "target_task".
 643  *
 644  *              Only permitted to privileged processes, or processes
 645  *              with the same user ID.
 646  *
 647  * XXX This should be a BSD system call, not a Mach trap!!!
 648  */
 649
 650 kern_return_t
 651 task_name_for_pid(
 652         struct task_name_for_pid_args *args)
 653 {
 654         mach_port_name_t        target_tport = args->target_tport;
 655         int                     pid = args->pid;
 656         user_addr_t             task_addr = args->t;
 657         proc_t          p = PROC_NULL;
 658         task_t          t1;
 659         mach_port_name_t        tret;
 660         void * sright;
 661         int error = 0, refheld = 0;
 662         kauth_cred_t target_cred;
 663
 664         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
 665         AUDIT_ARG(pid, pid);
 666         AUDIT_ARG(mach_port1, target_tport);
 667
 668         t1 = port_name_to_task(target_tport);
 669         if (t1 == TASK_NULL) {
 670                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 671                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 672                 return(KERN_FAILURE);
 673         }
 674
 675         p = proc_find(pid);
 676         if (p != PROC_NULL) {
 677                 AUDIT_ARG(process, p);
 678                 target_cred = kauth_cred_proc_ref(p);
 679                 refheld = 1;
 680
 681                 if ((p->p_stat != SZOMB)
 682                     && ((current_proc() == p)
 683                         || kauth_cred_issuser(kauth_cred_get())
 684                         || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
 685                             ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
 686
 687                         if (p->task != TASK_NULL) {
 688                                 task_reference(p->task);
 689 #if CONFIG_MACF
 690                                 error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
 691                                 if (error) {
 692                                         task_deallocate(p->task);
 693                                         goto noperm;
 694                                 }
 695 #endif
 696                                 sright = (void *)convert_task_name_to_port(p->task);
 697                                 tret = ipc_port_copyout_send(sright,
 698                                                 get_task_ipcspace(current_task()));
 699                         } else
 700                                 tret  = MACH_PORT_NULL;
 701
 702                         AUDIT_ARG(mach_port2, tret);
 703                         (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 704                         task_deallocate(t1);
 705                         error = KERN_SUCCESS;
 706                         goto tnfpout;
 707                 }
 708         }
 709
 710 #if CONFIG_MACF
 711 noperm:
 712 #endif
 713     task_deallocate(t1);
 714         tret = MACH_PORT_NULL;
 715         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 716         error = KERN_FAILURE;
 717 tnfpout:
 718         if (refheld != 0)
 719                 kauth_cred_unref(&target_cred);
 720         if (p != PROC_NULL)
 721                 proc_rele(p);
 722         AUDIT_MACH_SYSCALL_EXIT(error);
 723         return(error);
 724 }
 725
 726 kern_return_t
 727 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 728 {
 729         task_t  target = NULL;
 730         proc_t  targetproc = PROC_NULL;
 731         int     pid = args->pid;
 732         int     error = 0;
 733
 734 #if CONFIG_MACF
 735         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
 736         if (error) {
 737                 error = EPERM;
 738                 goto out;
 739         }
 740 #endif
 741
 742         if (pid == 0) {
 743                 error = EPERM;
 744                 goto out;
 745         }
 746
 747         targetproc = proc_find(pid);
 748         if (!task_for_pid_posix_check(targetproc)) {
 749                 error = EPERM;
 750                 goto out;
 751         }
 752
 753         target = targetproc->task;
 754 #ifndef CONFIG_EMBEDDED
 755         if (target != TASK_NULL) {
 756                 mach_port_t tfpport;
 757
 758                 /* If we aren't root and target's task access port is set... */
 759                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 760                         targetproc != current_proc() &&
 761                         (task_get_task_access_port(target, &tfpport) == 0) &&
 762                         (tfpport != IPC_PORT_NULL)) {
 763
 764                         if (tfpport == IPC_PORT_DEAD) {
 765                                 error = EACCES;
 766                                 goto out;
 767                         }
 768
 769                         /* Call up to the task access server */
 770                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 771
 772                         if (error != MACH_MSG_SUCCESS) {
 773                                 if (error == MACH_RCV_INTERRUPTED)
 774                                         error = EINTR;
 775                                 else
 776                                         error = EPERM;
 777                                 goto out;
 778                         }
 779                 }
 780         }
 781 #endif
 782
 783         task_reference(target);
 784         error = task_suspend(target);
 785         if (error) {
 786                 if (error == KERN_INVALID_ARGUMENT) {
 787                         error = EINVAL;
 788                 } else {
 789                         error = EPERM;
 790                 }
 791         }
 792         task_deallocate(target);
 793
 794 #if CONFIG_FREEZE
 795         kern_hibernation_on_pid_suspend(pid);
 796 #endif
 797
 798 out:
 799         if (targetproc != PROC_NULL)
 800                 proc_rele(targetproc);
 801         *ret = error;
 802         return error;
 803 }
 804
 805 kern_return_t
 806 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 807 {
 808         task_t  target = NULL;
 809         proc_t  targetproc = PROC_NULL;
 810         int     pid = args->pid;
 811         int     error = 0;
 812
 813 #if CONFIG_MACF
 814         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
 815         if (error) {
 816                 error = EPERM;
 817                 goto out;
 818         }
 819 #endif
 820
 821         if (pid == 0) {
 822                 error = EPERM;
 823                 goto out;
 824         }
 825
 826         targetproc = proc_find(pid);
 827         if (!task_for_pid_posix_check(targetproc)) {
 828                 error = EPERM;
 829                 goto out;
 830         }
 831
 832         target = targetproc->task;
 833 #ifndef CONFIG_EMBEDDED
 834         if (target != TASK_NULL) {
 835                 mach_port_t tfpport;
 836
 837                 /* If we aren't root and target's task access port is set... */
 838                 if (!kauth_cred_issuser(kauth_cred_get()) &&
 839                         targetproc != current_proc() &&
 840                         (task_get_task_access_port(target, &tfpport) == 0) &&
 841                         (tfpport != IPC_PORT_NULL)) {
 842
 843                         if (tfpport == IPC_PORT_DEAD) {
 844                                 error = EACCES;
 845                                 goto out;
 846                         }
 847
 848                         /* Call up to the task access server */
 849                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
 850
 851                         if (error != MACH_MSG_SUCCESS) {
 852                                 if (error == MACH_RCV_INTERRUPTED)
 853                                         error = EINTR;
 854                                 else
 855                                         error = EPERM;
 856                                 goto out;
 857                         }
 858                 }
 859         }
 860 #endif
 861
 862         task_reference(target);
 863
 864 #if CONFIG_FREEZE
 865         kern_hibernation_on_pid_resume(pid, target);
 866 #endif
 867
 868         error = task_resume(target);
 869         if (error) {
 870                 if (error == KERN_INVALID_ARGUMENT) {
 871                         error = EINVAL;
 872                 } else {
 873                         error = EPERM;
 874                 }
 875         }
 876         task_deallocate(target);
 877
 878 out:
 879         if (targetproc != PROC_NULL)
 880                 proc_rele(targetproc);
 881         *ret = error;
 882         return error;
 883
 884         return 0;
 885 }
 886
 887 #if CONFIG_EMBEDDED
 888 kern_return_t
 889 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
 890 {
 891         int     error = 0;
 892         proc_t  targetproc = PROC_NULL;
 893         int     pid = args->pid;
 894
 895 #ifndef CONFIG_FREEZE
 896         #pragma unused(pid)
 897 #else
 898
 899 #if CONFIG_MACF
 900         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
 901         if (error) {
 902                 error = EPERM;
 903                 goto out;
 904         }
 905 #endif
 906
 907         /*
 908          * The only accepted pid value here is currently -1, since we just kick off the hibernation thread
 909          * here - individual ids aren't required. However, it's intended that that this call is to change
 910          * in the future to initiate hibernation of individual processes. In anticipation, we'll obtain the
 911          * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
 912          * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
 913          */
 914         if (pid >= 0) {
 915                 targetproc = proc_find(pid);
 916                 if (!task_for_pid_posix_check(targetproc)) {
 917                         error = EPERM;
 918                         goto out;
 919                 }
 920         }
 921
 922         if (pid == -1) {
 923                 kern_hibernation_on_pid_hibernate(pid);
 924         } else {
 925                 error = EPERM;
 926         }
 927
 928 out:
 929
 930 #endif /* CONFIG_FREEZE */
 931
 932         if (targetproc != PROC_NULL)
 933                 proc_rele(targetproc);
 934         *ret = error;
 935         return error;
 936 }
 937
 938 int
 939 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
 940 {
 941         int                             error = 0;
 942         proc_t                          targetproc = PROC_NULL;
 943         struct filedesc         *fdp;
 944         struct fileproc         *fp;
 945         int                             pid = args->pid;
 946         int                                     level = args->level;
 947         int                                     i;
 948
 949         if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
 950                 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
 951         {
 952                 error = EINVAL;
 953                 goto out;
 954         }
 955
 956 #if CONFIG_MACF
 957         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
 958         if (error) {
 959                 error = EPERM;
 960                 goto out;
 961         }
 962 #endif
 963
 964         targetproc = proc_find(pid);
 965         if (!task_for_pid_posix_check(targetproc)) {
 966                 error = EPERM;
 967                 goto out;
 968         }
 969
 970         proc_fdlock(targetproc);
 971         fdp = targetproc->p_fd;
 972
 973         for (i = 0; i < fdp->fd_nfiles; i++) {
 974                 struct socket *sockp;
 975
 976                 fp = fdp->fd_ofiles[i];
 977                 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
 978                         fp->f_fglob->fg_type != DTYPE_SOCKET)
 979                 {
 980                         continue;
 981                 }
 982
 983                 sockp = (struct socket *)fp->f_fglob->fg_data;
 984
 985                 /* Call networking stack with socket and level */
 986                 (void) socket_defunct(targetproc, sockp, level);
 987         }
 988
 989         proc_fdunlock(targetproc);
 990
 991 out:
 992         if (targetproc != PROC_NULL)
 993                 proc_rele(targetproc);
 994         *ret = error;
 995         return error;
 996 }
 997 #endif /* CONFIG_EMBEDDED */
 998
 999 static int
1000 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1001     __unused int arg2, struct sysctl_req *req)
1002 {
1003     int error = 0;
1004         int new_value;
1005
1006     error = SYSCTL_OUT(req, arg1, sizeof(int));
1007     if (error || req->newptr == USER_ADDR_NULL)
1008         return(error);
1009
1010         if (!is_suser())
1011                 return(EPERM);
1012
1013         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1014                 goto out;
1015         }
1016         if ((new_value == KERN_TFP_POLICY_DENY)
1017                 || (new_value == KERN_TFP_POLICY_DEFAULT))
1018                         tfp_policy = new_value;
1019         else
1020                         error = EINVAL;
1021 out:
1022     return(error);
1023
1024 }
1025
1026 #if defined(SECURE_KERNEL)
1027 static int kern_secure_kernel = 1;
1028 #else
1029 static int kern_secure_kernel = 0;
1030 #endif
1031
1032 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1033
1034 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1035 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1036     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1037
1038 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1039            &shared_region_trace_level, 0, "");
1040 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1041            &shared_region_version, 0, "");
1042 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1043            &shared_region_persistence, 0, "");
1044
1045 /*
1046  * shared_region_check_np:
1047  *
1048  * This system call is intended for dyld.
1049  *
1050  * dyld calls this when any process starts to see if the process's shared
1051  * region is already set up and ready to use.
1052  * This call returns the base address of the first mapping in the
1053  * process's shared region's first mapping.
1054  * dyld will then check what's mapped at that address.
1055  *
1056  * If the shared region is empty, dyld will then attempt to map the shared
1057  * cache file in the shared region via the shared_region_map_np() system call.
1058  *
1059  * If something's already mapped in the shared region, dyld will check if it
1060  * matches the shared cache it would like to use for that process.
1061  * If it matches, evrything's ready and the process can proceed and use the
1062  * shared region.
1063  * If it doesn't match, dyld will unmap the shared region and map the shared
1064  * cache into the process's address space via mmap().
1065  *
1066  * ERROR VALUES
1067  * EINVAL       no shared region
1068  * ENOMEM       shared region is empty
1069  * EFAULT       bad address for "start_address"
1070  */
1071 int
1072 shared_region_check_np(
1073         __unused struct proc                    *p,
1074         struct shared_region_check_np_args      *uap,
1075         __unused int                            *retvalp)
1076 {
1077         vm_shared_region_t      shared_region;
1078         mach_vm_offset_t        start_address;
1079         int                     error;
1080         kern_return_t           kr;
1081
1082         SHARED_REGION_TRACE_DEBUG(
1083                 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1084                  current_thread(), p->p_pid, p->p_comm,
1085                  (uint64_t)uap->start_address));
1086
1087         /* retrieve the current tasks's shared region */
1088         shared_region = vm_shared_region_get(current_task());
1089         if (shared_region != NULL) {
1090                 /* retrieve address of its first mapping... */
1091                 kr = vm_shared_region_start_address(shared_region,
1092                                                     &start_address);
1093                 if (kr != KERN_SUCCESS) {
1094                         error = ENOMEM;
1095                 } else {
1096                         /* ... and give it to the caller */
1097                         error = copyout(&start_address,
1098                                         (user_addr_t) uap->start_address,
1099                                         sizeof (start_address));
1100                         if (error) {
1101                                 SHARED_REGION_TRACE_ERROR(
1102                                         ("shared_region: %p [%d(%s)] "
1103                                          "check_np(0x%llx) "
1104                                          "copyout(0x%llx) error %d\n",
1105                                          current_thread(), p->p_pid, p->p_comm,
1106                                          (uint64_t)uap->start_address, (uint64_t)start_address,
1107                                          error));
1108                         }
1109                 }
1110                 vm_shared_region_deallocate(shared_region);
1111         } else {
1112                 /* no shared region ! */
1113                 error = EINVAL;
1114         }
1115
1116         SHARED_REGION_TRACE_DEBUG(
1117                 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1118                  current_thread(), p->p_pid, p->p_comm,
1119                  (uint64_t)uap->start_address, (uint64_t)start_address, error));
1120
1121         return error;
1122 }
1123
1124
1125 int
1126 shared_region_copyin_mappings(
1127                 struct proc                     *p,
1128                 user_addr_t                     user_mappings,
1129                 unsigned int                    mappings_count,
1130                 struct shared_file_mapping_np   *mappings)
1131 {
1132         int             error = 0;
1133         vm_size_t       mappings_size = 0;
1134
1135         /* get the list of mappings the caller wants us to establish */
1136         mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1137         error = copyin(user_mappings,
1138                        mappings,
1139                        mappings_size);
1140         if (error) {
1141                 SHARED_REGION_TRACE_ERROR(
1142                         ("shared_region: %p [%d(%s)] map(): "
1143                          "copyin(0x%llx, %d) failed (error=%d)\n",
1144                          current_thread(), p->p_pid, p->p_comm,
1145                          (uint64_t)user_mappings, mappings_count, error));
1146         }
1147         return error;
1148 }
1149 /*
1150  * shared_region_map_np()
1151  *
1152  * This system call is intended for dyld.
1153  *
1154  * dyld uses this to map a shared cache file into a shared region.
1155  * This is usually done only the first time a shared cache is needed.
1156  * Subsequent processes will just use the populated shared region without
1157  * requiring any further setup.
1158  */
1159 int
1160 _shared_region_map(
1161         struct proc                             *p,
1162         int                                     fd,
1163         uint32_t                                mappings_count,
1164         struct shared_file_mapping_np           *mappings,
1165         memory_object_control_t                 *sr_file_control,
1166         struct shared_file_mapping_np           *mapping_to_slide)
1167 {
1168         int                             error;
1169         kern_return_t                   kr;
1170         struct fileproc                 *fp;
1171         struct vnode                    *vp, *root_vp;
1172         struct vnode_attr               va;
1173         off_t                           fs;
1174         memory_object_size_t            file_size;
1175         vm_prot_t                       maxprot = VM_PROT_ALL;
1176         memory_object_control_t         file_control;
1177         struct vm_shared_region         *shared_region;
1178
1179         SHARED_REGION_TRACE_DEBUG(
1180                 ("shared_region: %p [%d(%s)] -> map\n",
1181                  current_thread(), p->p_pid, p->p_comm));
1182
1183         shared_region = NULL;
1184         fp = NULL;
1185         vp = NULL;
1186
1187         /* get file structure from file descriptor */
1188         error = fp_lookup(p, fd, &fp, 0);
1189         if (error) {
1190                 SHARED_REGION_TRACE_ERROR(
1191                         ("shared_region: %p [%d(%s)] map: "
1192                          "fd=%d lookup failed (error=%d)\n",
1193                          current_thread(), p->p_pid, p->p_comm, fd, error));
1194                 goto done;
1195         }
1196
1197         /* make sure we're attempting to map a vnode */
1198         if (fp->f_fglob->fg_type != DTYPE_VNODE) {
1199                 SHARED_REGION_TRACE_ERROR(
1200                         ("shared_region: %p [%d(%s)] map: "
1201                          "fd=%d not a vnode (type=%d)\n",
1202                          current_thread(), p->p_pid, p->p_comm,
1203                          fd, fp->f_fglob->fg_type));
1204                 error = EINVAL;
1205                 goto done;
1206         }
1207
1208         /* we need at least read permission on the file */
1209         if (! (fp->f_fglob->fg_flag & FREAD)) {
1210                 SHARED_REGION_TRACE_ERROR(
1211                         ("shared_region: %p [%d(%s)] map: "
1212                          "fd=%d not readable\n",
1213                          current_thread(), p->p_pid, p->p_comm, fd));
1214                 error = EPERM;
1215                 goto done;
1216         }
1217
1218         /* get vnode from file structure */
1219         error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1220         if (error) {
1221                 SHARED_REGION_TRACE_ERROR(
1222                         ("shared_region: %p [%d(%s)] map: "
1223                          "fd=%d getwithref failed (error=%d)\n",
1224                          current_thread(), p->p_pid, p->p_comm, fd, error));
1225                 goto done;
1226         }
1227         vp = (struct vnode *) fp->f_fglob->fg_data;
1228
1229         /* make sure the vnode is a regular file */
1230         if (vp->v_type != VREG) {
1231                 SHARED_REGION_TRACE_ERROR(
1232                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1233                          "not a file (type=%d)\n",
1234                          current_thread(), p->p_pid, p->p_comm,
1235                          vp, vp->v_name, vp->v_type));
1236                 error = EINVAL;
1237                 goto done;
1238         }
1239
1240 #if CONFIG_MACF
1241         error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1242                         fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1243         if (error) {
1244                 goto done;
1245         }
1246 #endif /* MAC */
1247
1248 #if CONFIG_PROTECT
1249         /* check for content protection access */
1250         {
1251         void *cnode;
1252         if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
1253                 error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
1254                 if (error)
1255                         goto done;
1256         }
1257         }
1258 #endif /* CONFIG_PROTECT */
1259
1260         /* make sure vnode is on the process's root volume */
1261         root_vp = p->p_fd->fd_rdir;
1262         if (root_vp == NULL) {
1263                 root_vp = rootvnode;
1264         } else {
1265                 /*
1266                  * Chroot-ed processes can't use the shared_region.
1267                  */
1268                 error = EINVAL;
1269                 goto done;
1270         }
1271
1272         if (vp->v_mount != root_vp->v_mount) {
1273                 SHARED_REGION_TRACE_ERROR(
1274                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1275                          "not on process's root volume\n",
1276                          current_thread(), p->p_pid, p->p_comm,
1277                          vp, vp->v_name));
1278                 error = EPERM;
1279                 goto done;
1280         }
1281
1282         /* make sure vnode is owned by "root" */
1283         VATTR_INIT(&va);
1284         VATTR_WANTED(&va, va_uid);
1285         error = vnode_getattr(vp, &va, vfs_context_current());
1286         if (error) {
1287                 SHARED_REGION_TRACE_ERROR(
1288                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1289                          "vnode_getattr(%p) failed (error=%d)\n",
1290                          current_thread(), p->p_pid, p->p_comm,
1291                          vp, vp->v_name, vp, error));
1292                 goto done;
1293         }
1294         if (va.va_uid != 0) {
1295                 SHARED_REGION_TRACE_ERROR(
1296                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1297                          "owned by uid=%d instead of 0\n",
1298                          current_thread(), p->p_pid, p->p_comm,
1299                          vp, vp->v_name, va.va_uid));
1300                 error = EPERM;
1301                 goto done;
1302         }
1303
1304         /* get vnode size */
1305         error = vnode_size(vp, &fs, vfs_context_current());
1306         if (error) {
1307                 SHARED_REGION_TRACE_ERROR(
1308                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1309                          "vnode_size(%p) failed (error=%d)\n",
1310                          current_thread(), p->p_pid, p->p_comm,
1311                          vp, vp->v_name, vp, error));
1312                 goto done;
1313         }
1314         file_size = fs;
1315
1316         /* get the file's memory object handle */
1317         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1318         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1319                 SHARED_REGION_TRACE_ERROR(
1320                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1321                          "no memory object\n",
1322                          current_thread(), p->p_pid, p->p_comm,
1323                          vp, vp->v_name));
1324                 error = EINVAL;
1325                 goto done;
1326         }
1327
1328         if (sr_file_control != NULL) {
1329                 *sr_file_control = file_control;
1330         }
1331
1332
1333
1334         /* get the process's shared region (setup in vm_map_exec()) */
1335         shared_region = vm_shared_region_get(current_task());
1336         if (shared_region == NULL) {
1337                 SHARED_REGION_TRACE_ERROR(
1338                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1339                          "no shared region\n",
1340                          current_thread(), p->p_pid, p->p_comm,
1341                          vp, vp->v_name));
1342                 goto done;
1343         }
1344
1345         /* map the file into that shared region's submap */
1346         kr = vm_shared_region_map_file(shared_region,
1347                                        mappings_count,
1348                                        mappings,
1349                                        file_control,
1350                                        file_size,
1351                                        (void *) p->p_fd->fd_rdir,
1352                                        mapping_to_slide);
1353         if (kr != KERN_SUCCESS) {
1354                 SHARED_REGION_TRACE_ERROR(
1355                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1356                          "vm_shared_region_map_file() failed kr=0x%x\n",
1357                          current_thread(), p->p_pid, p->p_comm,
1358                          vp, vp->v_name, kr));
1359                 switch (kr) {
1360                 case KERN_INVALID_ADDRESS:
1361                         error = EFAULT;
1362                         break;
1363                 case KERN_PROTECTION_FAILURE:
1364                         error = EPERM;
1365                         break;
1366                 case KERN_NO_SPACE:
1367                         error = ENOMEM;
1368                         break;
1369                 case KERN_FAILURE:
1370                 case KERN_INVALID_ARGUMENT:
1371                 default:
1372                         error = EINVAL;
1373                         break;
1374                 }
1375                 goto done;
1376         }
1377
1378         error = 0;
1379
1380         vnode_lock_spin(vp);
1381
1382         vp->v_flag |= VSHARED_DYLD;
1383
1384         vnode_unlock(vp);
1385
1386         /* update the vnode's access time */
1387         if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1388                 VATTR_INIT(&va);
1389                 nanotime(&va.va_access_time);
1390                 VATTR_SET_ACTIVE(&va, va_access_time);
1391                 vnode_setattr(vp, &va, vfs_context_current());
1392         }
1393
1394         if (p->p_flag & P_NOSHLIB) {
1395                 /* signal that this process is now using split libraries */
1396                 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1397         }
1398
1399 done:
1400         if (vp != NULL) {
1401                 /*
1402                  * release the vnode...
1403                  * ubc_map() still holds it for us in the non-error case
1404                  */
1405                 (void) vnode_put(vp);
1406                 vp = NULL;
1407         }
1408         if (fp != NULL) {
1409                 /* release the file descriptor */
1410                 fp_drop(p, fd, fp, 0);
1411                 fp = NULL;
1412         }
1413
1414         if (shared_region != NULL) {
1415                 vm_shared_region_deallocate(shared_region);
1416         }
1417
1418         SHARED_REGION_TRACE_DEBUG(
1419                 ("shared_region: %p [%d(%s)] <- map\n",
1420                  current_thread(), p->p_pid, p->p_comm));
1421
1422         return error;
1423 }
1424
1425 int
1426 _shared_region_slide(uint32_t slide,
1427                         mach_vm_offset_t        entry_start_address,
1428                         mach_vm_size_t          entry_size,
1429                         mach_vm_offset_t        slide_start,
1430                         mach_vm_size_t          slide_size,
1431                         memory_object_control_t sr_file_control)
1432 {
1433         void *slide_info_entry = NULL;
1434         int                     error;
1435
1436         if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
1437                 printf("slide_info initialization failed with kr=%d\n", error);
1438                 goto done;
1439         }
1440
1441         slide_info_entry = vm_shared_region_get_slide_info_entry();
1442         if (slide_info_entry == NULL){
1443                 error = EFAULT;
1444         } else {
1445                 error = copyin(slide_start,
1446                                slide_info_entry,
1447                                (vm_size_t)slide_size);
1448         }
1449         if (error) {
1450                 goto done;
1451         }
1452
1453         if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) {
1454                 error = EFAULT;
1455                 printf("Sanity Check failed for slide_info\n");
1456         } else {
1457 #if DEBUG
1458                 printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
1459                                 (void*)(uintptr_t)entry_start_address,
1460                                 (unsigned long)entry_size,
1461                                 (unsigned long)slide_size);
1462 #endif
1463         }
1464 done:
1465         return error;
1466 }
1467
1468 int
1469 shared_region_map_and_slide_np(
1470         struct proc                             *p,
1471         struct shared_region_map_and_slide_np_args      *uap,
1472         __unused int                                    *retvalp)
1473 {
1474         struct shared_file_mapping_np   mapping_to_slide;
1475         struct shared_file_mapping_np   *mappings;
1476         unsigned int mappings_count = uap->count;
1477
1478         memory_object_control_t         sr_file_control;
1479         kern_return_t                   kr = KERN_SUCCESS;
1480         uint32_t                        slide = uap->slide;
1481
1482 #define SFM_MAX_STACK   8
1483         struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
1484
1485         if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1486                 if (kr == KERN_INVALID_ARGUMENT) {
1487                         /*
1488                          * This will happen if we request sliding again
1489                          * with the same slide value that was used earlier
1490                          * for the very first sliding. We continue through
1491                          * to the mapping layer. This is so that we can be
1492                          * absolutely certain that the same mappings have
1493                          * been requested.
1494                          */
1495                         kr = KERN_SUCCESS;
1496                 } else {
1497                         goto done;
1498                 }
1499         }
1500
1501         if (mappings_count == 0) {
1502                 SHARED_REGION_TRACE_INFO(
1503                         ("shared_region: %p [%d(%s)] map(): "
1504                          "no mappings\n",
1505                          current_thread(), p->p_pid, p->p_comm));
1506                 kr = 0; /* no mappings: we're done ! */
1507                 goto done;
1508         } else if (mappings_count <= SFM_MAX_STACK) {
1509                 mappings = &stack_mappings[0];
1510         } else {
1511                 SHARED_REGION_TRACE_ERROR(
1512                         ("shared_region: %p [%d(%s)] map(): "
1513                          "too many mappings (%d)\n",
1514                          current_thread(), p->p_pid, p->p_comm,
1515                          mappings_count));
1516                 kr = KERN_FAILURE;
1517                 goto done;
1518         }
1519
1520         if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1521                 goto done;
1522         }
1523
1524
1525         kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
1526         if (kr != KERN_SUCCESS) {
1527                 return kr;
1528         }
1529
1530         if (slide) {
1531                 kr = _shared_region_slide(slide,
1532                                 mapping_to_slide.sfm_file_offset,
1533                                 mapping_to_slide.sfm_size,
1534                                 uap->slide_start,
1535                                 uap->slide_size,
1536                                 sr_file_control);
1537                 if (kr  != KERN_SUCCESS) {
1538                         vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
1539                         return kr;
1540                 }
1541         }
1542 done:
1543         return kr;
1544 }
1545
1546 /* sysctl overflow room */
1547
1548 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1549         allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1550         reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1551 extern unsigned int     vm_page_free_target;
1552 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1553                    &vm_page_free_target, 0, "Pageout daemon free target");
1554
1555 extern unsigned int     vm_memory_pressure;
1556 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1557            &vm_memory_pressure, 0, "Memory pressure indicator");
1558
1559 static int
1560 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1561 {
1562 #pragma unused(oidp, arg1, arg2)
1563         unsigned int page_free_wanted;
1564
1565         page_free_wanted = mach_vm_ctl_page_free_wanted();
1566         return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1567 }
1568 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1569             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1570             0, 0, vm_ctl_page_free_wanted, "I", "");
1571
1572 extern unsigned int     vm_page_purgeable_count;
1573 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1574            &vm_page_purgeable_count, 0, "Purgeable page count");
1575
1576 extern unsigned int     vm_page_purgeable_wired_count;
1577 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1578            &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1579
1580 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1581            &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1582 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1583            &vm_page_stats_reusable.reusable_pages_success, "");
1584 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1585            &vm_page_stats_reusable.reusable_pages_failure, "");
1586 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1587            &vm_page_stats_reusable.reusable_pages_shared, "");
1588 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1589            &vm_page_stats_reusable.all_reusable_calls, "");
1590 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1591            &vm_page_stats_reusable.partial_reusable_calls, "");
1592 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1593            &vm_page_stats_reusable.reuse_pages_success, "");
1594 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1595            &vm_page_stats_reusable.reuse_pages_failure, "");
1596 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1597            &vm_page_stats_reusable.all_reuse_calls, "");
1598 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1599            &vm_page_stats_reusable.partial_reuse_calls, "");
1600 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1601            &vm_page_stats_reusable.can_reuse_success, "");
1602 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1603            &vm_page_stats_reusable.can_reuse_failure, "");
1604
1605
1606 int
1607 vm_pressure_monitor(
1608         __unused struct proc *p,
1609         struct vm_pressure_monitor_args *uap,
1610         int *retval)
1611 {
1612         kern_return_t   kr;
1613         uint32_t        pages_reclaimed;
1614         uint32_t        pages_wanted;
1615
1616         kr = mach_vm_pressure_monitor(
1617                 (boolean_t) uap->wait_for_pressure,
1618                 uap->nsecs_monitored,
1619                 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1620                 &pages_wanted);
1621
1622         switch (kr) {
1623         case KERN_SUCCESS:
1624                 break;
1625         case KERN_ABORTED:
1626                 return EINTR;
1627         default:
1628                 return EINVAL;
1629         }
1630
1631         if (uap->pages_reclaimed) {
1632                 if (copyout((void *)&pages_reclaimed,
1633                             uap->pages_reclaimed,
1634                             sizeof (pages_reclaimed)) != 0) {
1635                         return EFAULT;
1636                 }
1637         }
1638
1639         *retval = (int) pages_wanted;
1640         return 0;
1641 }