osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/exc_guard.h>
  88 #include <kern/kalloc.h>
  89 #include <kern/zalloc_internal.h>
  90
  91 #include <vm/cpm.h>
  92 #include <vm/vm_compressor.h>
  93 #include <vm/vm_compressor_pager.h>
  94 #include <vm/vm_init.h>
  95 #include <vm/vm_fault.h>
  96 #include <vm/vm_map.h>
  97 #include <vm/vm_object.h>
  98 #include <vm/vm_page.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/pmap.h>
 101 #include <vm/vm_kern.h>
 102 #include <ipc/ipc_port.h>
 103 #include <kern/sched_prim.h>
 104 #include <kern/misc_protos.h>
 105
 106 #include <mach/vm_map_server.h>
 107 #include <mach/mach_host_server.h>
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_purgeable_internal.h>
 110
 111 #include <vm/vm_protos.h>
 112 #include <vm/vm_shared_region.h>
 113 #include <vm/vm_map_store.h>
 114
 115 #include <san/kasan.h>
 116
 117 #include <sys/codesign.h>
 118 #include <sys/mman.h>
 119
 120 #include <libkern/section_keywords.h>
 121 #if DEVELOPMENT || DEBUG
 122 extern int proc_selfcsflags(void);
 123 int panic_on_unsigned_execute = 0;
 124 #endif /* DEVELOPMENT || DEBUG */
 125
 126 #if MACH_ASSERT
 127 int debug4k_filter = 0;
 128 char debug4k_proc_name[1024] = "";
 129 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
 130 int debug4k_panic_on_misaligned_sharing = 0;
 131 const char *debug4k_category_name[] = {
 132         "error",        /* 0 */
 133         "life",         /* 1 */
 134         "load",         /* 2 */
 135         "fault",        /* 3 */
 136         "copy",         /* 4 */
 137         "share",        /* 5 */
 138         "adjust",       /* 6 */
 139         "pmap",         /* 7 */
 140         "mementry",     /* 8 */
 141         "iokit",        /* 9 */
 142         "upl",          /* 10 */
 143         "exc",          /* 11 */
 144         "vfs"           /* 12 */
 145 };
 146 #endif /* MACH_ASSERT */
 147 int debug4k_no_cow_copyin = 0;
 148
 149
 150 #if __arm64__
 151 extern const int fourk_binary_compatibility_unsafe;
 152 extern const int fourk_binary_compatibility_allow_wx;
 153 #endif /* __arm64__ */
 154 extern int proc_selfpid(void);
 155 extern char *proc_name_address(void *p);
 156
 157 #if VM_MAP_DEBUG_APPLE_PROTECT
 158 int vm_map_debug_apple_protect = 0;
 159 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 160 #if VM_MAP_DEBUG_FOURK
 161 int vm_map_debug_fourk = 0;
 162 #endif /* VM_MAP_DEBUG_FOURK */
 163
 164 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
 165 int vm_map_executable_immutable_verbose = 0;
 166
 167 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
 168
 169 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 170 /* Internal prototypes
 171  */
 172
 173 static void vm_map_simplify_range(
 174         vm_map_t        map,
 175         vm_map_offset_t start,
 176         vm_map_offset_t end);   /* forward */
 177
 178 static boolean_t        vm_map_range_check(
 179         vm_map_t        map,
 180         vm_map_offset_t start,
 181         vm_map_offset_t end,
 182         vm_map_entry_t  *entry);
 183
 184 static vm_map_entry_t   _vm_map_entry_create(
 185         struct vm_map_header    *map_header, boolean_t map_locked);
 186
 187 static void             _vm_map_entry_dispose(
 188         struct vm_map_header    *map_header,
 189         vm_map_entry_t          entry);
 190
 191 static void             vm_map_pmap_enter(
 192         vm_map_t                map,
 193         vm_map_offset_t         addr,
 194         vm_map_offset_t         end_addr,
 195         vm_object_t             object,
 196         vm_object_offset_t      offset,
 197         vm_prot_t               protection);
 198
 199 static void             _vm_map_clip_end(
 200         struct vm_map_header    *map_header,
 201         vm_map_entry_t          entry,
 202         vm_map_offset_t         end);
 203
 204 static void             _vm_map_clip_start(
 205         struct vm_map_header    *map_header,
 206         vm_map_entry_t          entry,
 207         vm_map_offset_t         start);
 208
 209 static void             vm_map_entry_delete(
 210         vm_map_t        map,
 211         vm_map_entry_t  entry);
 212
 213 static kern_return_t    vm_map_delete(
 214         vm_map_t        map,
 215         vm_map_offset_t start,
 216         vm_map_offset_t end,
 217         int             flags,
 218         vm_map_t        zap_map);
 219
 220 static void             vm_map_copy_insert(
 221         vm_map_t        map,
 222         vm_map_entry_t  after_where,
 223         vm_map_copy_t   copy);
 224
 225 static kern_return_t    vm_map_copy_overwrite_unaligned(
 226         vm_map_t        dst_map,
 227         vm_map_entry_t  entry,
 228         vm_map_copy_t   copy,
 229         vm_map_address_t start,
 230         boolean_t       discard_on_success);
 231
 232 static kern_return_t    vm_map_copy_overwrite_aligned(
 233         vm_map_t        dst_map,
 234         vm_map_entry_t  tmp_entry,
 235         vm_map_copy_t   copy,
 236         vm_map_offset_t start,
 237         pmap_t          pmap);
 238
 239 static kern_return_t    vm_map_copyin_kernel_buffer(
 240         vm_map_t        src_map,
 241         vm_map_address_t src_addr,
 242         vm_map_size_t   len,
 243         boolean_t       src_destroy,
 244         vm_map_copy_t   *copy_result);  /* OUT */
 245
 246 static kern_return_t    vm_map_copyout_kernel_buffer(
 247         vm_map_t        map,
 248         vm_map_address_t *addr, /* IN/OUT */
 249         vm_map_copy_t   copy,
 250         vm_map_size_t   copy_size,
 251         boolean_t       overwrite,
 252         boolean_t       consume_on_success);
 253
 254 static void             vm_map_fork_share(
 255         vm_map_t        old_map,
 256         vm_map_entry_t  old_entry,
 257         vm_map_t        new_map);
 258
 259 static boolean_t        vm_map_fork_copy(
 260         vm_map_t        old_map,
 261         vm_map_entry_t  *old_entry_p,
 262         vm_map_t        new_map,
 263         int             vm_map_copyin_flags);
 264
 265 static kern_return_t    vm_map_wire_nested(
 266         vm_map_t                   map,
 267         vm_map_offset_t            start,
 268         vm_map_offset_t            end,
 269         vm_prot_t                  caller_prot,
 270         vm_tag_t                   tag,
 271         boolean_t                  user_wire,
 272         pmap_t                     map_pmap,
 273         vm_map_offset_t            pmap_addr,
 274         ppnum_t                    *physpage_p);
 275
 276 static kern_return_t    vm_map_unwire_nested(
 277         vm_map_t                   map,
 278         vm_map_offset_t            start,
 279         vm_map_offset_t            end,
 280         boolean_t                  user_wire,
 281         pmap_t                     map_pmap,
 282         vm_map_offset_t            pmap_addr);
 283
 284 static kern_return_t    vm_map_overwrite_submap_recurse(
 285         vm_map_t                   dst_map,
 286         vm_map_offset_t            dst_addr,
 287         vm_map_size_t              dst_size);
 288
 289 static kern_return_t    vm_map_copy_overwrite_nested(
 290         vm_map_t                   dst_map,
 291         vm_map_offset_t            dst_addr,
 292         vm_map_copy_t              copy,
 293         boolean_t                  interruptible,
 294         pmap_t                     pmap,
 295         boolean_t                  discard_on_success);
 296
 297 static kern_return_t    vm_map_remap_extract(
 298         vm_map_t                map,
 299         vm_map_offset_t         addr,
 300         vm_map_size_t           size,
 301         vm_prot_t               required_protection,
 302         boolean_t               copy,
 303         struct vm_map_header    *map_header,
 304         vm_prot_t               *cur_protection,
 305         vm_prot_t               *max_protection,
 306         vm_inherit_t            inheritance,
 307         vm_map_kernel_flags_t   vmk_flags);
 308
 309 static kern_return_t    vm_map_remap_range_allocate(
 310         vm_map_t                map,
 311         vm_map_address_t        *address,
 312         vm_map_size_t           size,
 313         vm_map_offset_t         mask,
 314         int                     flags,
 315         vm_map_kernel_flags_t   vmk_flags,
 316         vm_tag_t                tag,
 317         vm_map_entry_t          *map_entry);
 318
 319 static void             vm_map_region_look_for_page(
 320         vm_map_t                   map,
 321         vm_map_offset_t            va,
 322         vm_object_t                object,
 323         vm_object_offset_t         offset,
 324         int                        max_refcnt,
 325         unsigned short             depth,
 326         vm_region_extended_info_t  extended,
 327         mach_msg_type_number_t count);
 328
 329 static int              vm_map_region_count_obj_refs(
 330         vm_map_entry_t             entry,
 331         vm_object_t                object);
 332
 333
 334 static kern_return_t    vm_map_willneed(
 335         vm_map_t        map,
 336         vm_map_offset_t start,
 337         vm_map_offset_t end);
 338
 339 static kern_return_t    vm_map_reuse_pages(
 340         vm_map_t        map,
 341         vm_map_offset_t start,
 342         vm_map_offset_t end);
 343
 344 static kern_return_t    vm_map_reusable_pages(
 345         vm_map_t        map,
 346         vm_map_offset_t start,
 347         vm_map_offset_t end);
 348
 349 static kern_return_t    vm_map_can_reuse(
 350         vm_map_t        map,
 351         vm_map_offset_t start,
 352         vm_map_offset_t end);
 353
 354 #if MACH_ASSERT
 355 static kern_return_t    vm_map_pageout(
 356         vm_map_t        map,
 357         vm_map_offset_t start,
 358         vm_map_offset_t end);
 359 #endif /* MACH_ASSERT */
 360
 361 kern_return_t vm_map_corpse_footprint_collect(
 362         vm_map_t        old_map,
 363         vm_map_entry_t  old_entry,
 364         vm_map_t        new_map);
 365 void vm_map_corpse_footprint_collect_done(
 366         vm_map_t        new_map);
 367 void vm_map_corpse_footprint_destroy(
 368         vm_map_t        map);
 369 kern_return_t vm_map_corpse_footprint_query_page_info(
 370         vm_map_t        map,
 371         vm_map_offset_t va,
 372         int             *disposition_p);
 373 void vm_map_footprint_query_page_info(
 374         vm_map_t        map,
 375         vm_map_entry_t  map_entry,
 376         vm_map_offset_t curr_s_offset,
 377         int             *disposition_p);
 378
 379 static const struct vm_map_entry vm_map_entry_template = {
 380         .behavior = VM_BEHAVIOR_DEFAULT,
 381         .inheritance = VM_INHERIT_DEFAULT,
 382 };
 383
 384 pid_t find_largest_process_vm_map_entries(void);
 385
 386 /*
 387  * Macros to copy a vm_map_entry. We must be careful to correctly
 388  * manage the wired page count. vm_map_entry_copy() creates a new
 389  * map entry to the same memory - the wired count in the new entry
 390  * must be set to zero. vm_map_entry_copy_full() creates a new
 391  * entry that is identical to the old entry.  This preserves the
 392  * wire count; it's used for map splitting and zone changing in
 393  * vm_map_copyout.
 394  */
 395
 396 static inline void
 397 vm_map_entry_copy_pmap_cs_assoc(
 398         vm_map_t map __unused,
 399         vm_map_entry_t new __unused,
 400         vm_map_entry_t old __unused)
 401 {
 402         /* when pmap_cs is not enabled, assert as a sanity check */
 403         assert(new->pmap_cs_associated == FALSE);
 404 }
 405
 406 /*
 407  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 408  * But for security reasons on some platforms, we don't want the
 409  * new mapping to be "used for jit", so we reset the flag here.
 410  */
 411 static inline void
 412 vm_map_entry_copy_code_signing(
 413         vm_map_t map,
 414         vm_map_entry_t new,
 415         vm_map_entry_t old __unused)
 416 {
 417         if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
 418                 assert(new->used_for_jit == old->used_for_jit);
 419         } else {
 420                 new->used_for_jit = FALSE;
 421         }
 422 }
 423
 424 static inline void
 425 vm_map_entry_copy(
 426         vm_map_t map,
 427         vm_map_entry_t new,
 428         vm_map_entry_t old)
 429 {
 430         boolean_t _vmec_reserved = new->from_reserved_zone;
 431         *new = *old;
 432         new->is_shared = FALSE;
 433         new->needs_wakeup = FALSE;
 434         new->in_transition = FALSE;
 435         new->wired_count = 0;
 436         new->user_wired_count = 0;
 437         new->permanent = FALSE;
 438         vm_map_entry_copy_code_signing(map, new, old);
 439         vm_map_entry_copy_pmap_cs_assoc(map, new, old);
 440         new->from_reserved_zone = _vmec_reserved;
 441         if (new->iokit_acct) {
 442                 assertf(!new->use_pmap, "old %p new %p\n", old, new);
 443                 new->iokit_acct = FALSE;
 444                 new->use_pmap = TRUE;
 445         }
 446         new->vme_resilient_codesign = FALSE;
 447         new->vme_resilient_media = FALSE;
 448         new->vme_atomic = FALSE;
 449         new->vme_no_copy_on_read = FALSE;
 450 }
 451
 452 static inline void
 453 vm_map_entry_copy_full(
 454         vm_map_entry_t new,
 455         vm_map_entry_t old)
 456 {
 457         boolean_t _vmecf_reserved = new->from_reserved_zone;
 458         *new = *old;
 459         new->from_reserved_zone = _vmecf_reserved;
 460 }
 461
 462 /*
 463  * Normal lock_read_to_write() returns FALSE/0 on failure.
 464  * These functions evaluate to zero on success and non-zero value on failure.
 465  */
 466 __attribute__((always_inline))
 467 int
 468 vm_map_lock_read_to_write(vm_map_t map)
 469 {
 470         if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
 471                 DTRACE_VM(vm_map_lock_upgrade);
 472                 return 0;
 473         }
 474         return 1;
 475 }
 476
 477 __attribute__((always_inline))
 478 boolean_t
 479 vm_map_try_lock(vm_map_t map)
 480 {
 481         if (lck_rw_try_lock_exclusive(&(map)->lock)) {
 482                 DTRACE_VM(vm_map_lock_w);
 483                 return TRUE;
 484         }
 485         return FALSE;
 486 }
 487
 488 __attribute__((always_inline))
 489 boolean_t
 490 vm_map_try_lock_read(vm_map_t map)
 491 {
 492         if (lck_rw_try_lock_shared(&(map)->lock)) {
 493                 DTRACE_VM(vm_map_lock_r);
 494                 return TRUE;
 495         }
 496         return FALSE;
 497 }
 498
 499 /*
 500  * Routines to get the page size the caller should
 501  * use while inspecting the target address space.
 502  * Use the "_safely" variant if the caller is dealing with a user-provided
 503  * array whose size depends on the page size, to avoid any overflow or
 504  * underflow of a user-allocated buffer.
 505  */
 506 int
 507 vm_self_region_page_shift_safely(
 508         vm_map_t target_map)
 509 {
 510         int effective_page_shift = 0;
 511
 512         if (PAGE_SIZE == (4096)) {
 513                 /* x86_64 and 4k watches: always use 4k */
 514                 return PAGE_SHIFT;
 515         }
 516         /* did caller provide an explicit page size for this thread to use? */
 517         effective_page_shift = thread_self_region_page_shift();
 518         if (effective_page_shift) {
 519                 /* use the explicitly-provided page size */
 520                 return effective_page_shift;
 521         }
 522         /* no explicit page size: use the caller's page size... */
 523         effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
 524         if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
 525                 /* page size match: safe to use */
 526                 return effective_page_shift;
 527         }
 528         /* page size mismatch */
 529         return -1;
 530 }
 531 int
 532 vm_self_region_page_shift(
 533         vm_map_t target_map)
 534 {
 535         int effective_page_shift;
 536
 537         effective_page_shift = vm_self_region_page_shift_safely(target_map);
 538         if (effective_page_shift == -1) {
 539                 /* no safe value but OK to guess for caller */
 540                 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
 541                     VM_MAP_PAGE_SHIFT(target_map));
 542         }
 543         return effective_page_shift;
 544 }
 545
 546
 547 /*
 548  *      Decide if we want to allow processes to execute from their data or stack areas.
 549  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 550  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 551  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 552  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 553  *      specific pmap files since the default behavior varies according to architecture.  The
 554  *      main reason it varies is because of the need to provide binary compatibility with old
 555  *      applications that were written before these restrictions came into being.  In the old
 556  *      days, an app could execute anything it could read, but this has slowly been tightened
 557  *      up over time.  The default behavior is:
 558  *
 559  *      32-bit PPC apps         may execute from both stack and data areas
 560  *      32-bit Intel apps       may exeucte from data areas but not stack
 561  *      64-bit PPC/Intel apps   may not execute from either data or stack
 562  *
 563  *      An application on any architecture may override these defaults by explicitly
 564  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 565  *      system call.  This code here just determines what happens when an app tries to
 566  *      execute from a page that lacks execute permission.
 567  *
 568  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 569  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 570  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 571  *      execution from data areas for a particular binary even if the arch normally permits it. As
 572  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 573  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 574  *      are not all NX-safe.
 575  */
 576
 577 extern int allow_data_exec, allow_stack_exec;
 578
 579 int
 580 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 581 {
 582         int current_abi;
 583
 584         if (map->pmap == kernel_pmap) {
 585                 return FALSE;
 586         }
 587
 588         /*
 589          * Determine if the app is running in 32 or 64 bit mode.
 590          */
 591
 592         if (vm_map_is_64bit(map)) {
 593                 current_abi = VM_ABI_64;
 594         } else {
 595                 current_abi = VM_ABI_32;
 596         }
 597
 598         /*
 599          * Determine if we should allow the execution based on whether it's a
 600          * stack or data area and the current architecture.
 601          */
 602
 603         if (user_tag == VM_MEMORY_STACK) {
 604                 return allow_stack_exec & current_abi;
 605         }
 606
 607         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 608 }
 609
 610
 611 /*
 612  *      Virtual memory maps provide for the mapping, protection,
 613  *      and sharing of virtual memory objects.  In addition,
 614  *      this module provides for an efficient virtual copy of
 615  *      memory from one map to another.
 616  *
 617  *      Synchronization is required prior to most operations.
 618  *
 619  *      Maps consist of an ordered doubly-linked list of simple
 620  *      entries; a single hint is used to speed up lookups.
 621  *
 622  *      Sharing maps have been deleted from this version of Mach.
 623  *      All shared objects are now mapped directly into the respective
 624  *      maps.  This requires a change in the copy on write strategy;
 625  *      the asymmetric (delayed) strategy is used for shared temporary
 626  *      objects instead of the symmetric (shadow) strategy.  All maps
 627  *      are now "top level" maps (either task map, kernel map or submap
 628  *      of the kernel map).
 629  *
 630  *      Since portions of maps are specified by start/end addreses,
 631  *      which may not align with existing map entries, all
 632  *      routines merely "clip" entries to these start/end values.
 633  *      [That is, an entry is split into two, bordering at a
 634  *      start or end value.]  Note that these clippings may not
 635  *      always be necessary (as the two resulting entries are then
 636  *      not changed); however, the clipping is done for convenience.
 637  *      No attempt is currently made to "glue back together" two
 638  *      abutting entries.
 639  *
 640  *      The symmetric (shadow) copy strategy implements virtual copy
 641  *      by copying VM object references from one map to
 642  *      another, and then marking both regions as copy-on-write.
 643  *      It is important to note that only one writeable reference
 644  *      to a VM object region exists in any map when this strategy
 645  *      is used -- this means that shadow object creation can be
 646  *      delayed until a write operation occurs.  The symmetric (delayed)
 647  *      strategy allows multiple maps to have writeable references to
 648  *      the same region of a vm object, and hence cannot delay creating
 649  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 650  *      Copying of permanent objects is completely different; see
 651  *      vm_object_copy_strategically() in vm_object.c.
 652  */
 653
 654 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone;       /* zone for vm_map structures */
 655 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
 656 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone;  /* zone for vm_map_copy structures */
 657
 658 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_entry_zone; /* zone for vm_map_entry structures */
 659 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
 660
 661 #define VM_MAP_ZONE_NAME "maps"
 662 #define VM_MAP_ZFLAGS ( \
 663   ZC_NOENCRYPT | \
 664   ZC_NOGC      | \
 665   ZC_NOGZALLOC | \
 666   ZC_ALLOW_FOREIGN)
 667
 668 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
 669 #define VM_MAP_RESERVED_ZFLAGS ( \
 670     ZC_NOENCRYPT          | \
 671     ZC_ALLOW_FOREIGN      | \
 672     ZC_NOCALLOUT          | \
 673     ZC_NOGZALLOC          | \
 674     ZC_KASAN_NOQUARANTINE | \
 675     ZC_NOGC)
 676
 677 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
 678 #define VM_MAP_HOLES_ZFLAGS ( \
 679     ZC_NOENCRYPT | \
 680     ZC_NOGC      | \
 681     ZC_NOGZALLOC | \
 682     ZC_ALLOW_FOREIGN)
 683
 684 /*
 685  * Asserts that a vm_map_copy object is coming from the
 686  * vm_map_copy_zone to ensure that it isn't a fake constructed
 687  * anywhere else.
 688  */
 689 static inline void
 690 vm_map_copy_require(struct vm_map_copy *copy)
 691 {
 692         zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
 693 }
 694
 695 /*
 696  *      Placeholder object for submap operations.  This object is dropped
 697  *      into the range by a call to vm_map_find, and removed when
 698  *      vm_map_submap creates the submap.
 699  */
 700
 701 vm_object_t     vm_submap_object;
 702
 703 static __startup_data vm_offset_t      map_data;
 704 static __startup_data vm_size_t        map_data_size;
 705 static __startup_data vm_offset_t      kentry_data;
 706 static __startup_data vm_size_t        kentry_data_size;
 707 static __startup_data vm_offset_t      map_holes_data;
 708 static __startup_data vm_size_t        map_holes_data_size;
 709
 710 #if XNU_TARGET_OS_OSX
 711 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 712 #else /* XNU_TARGET_OS_OSX */
 713 #define         NO_COALESCE_LIMIT  0
 714 #endif /* XNU_TARGET_OS_OSX */
 715
 716 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 717 unsigned int not_in_kdp = 1;
 718
 719 unsigned int vm_map_set_cache_attr_count = 0;
 720
 721 kern_return_t
 722 vm_map_set_cache_attr(
 723         vm_map_t        map,
 724         vm_map_offset_t va)
 725 {
 726         vm_map_entry_t  map_entry;
 727         vm_object_t     object;
 728         kern_return_t   kr = KERN_SUCCESS;
 729
 730         vm_map_lock_read(map);
 731
 732         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 733             map_entry->is_sub_map) {
 734                 /*
 735                  * that memory is not properly mapped
 736                  */
 737                 kr = KERN_INVALID_ARGUMENT;
 738                 goto done;
 739         }
 740         object = VME_OBJECT(map_entry);
 741
 742         if (object == VM_OBJECT_NULL) {
 743                 /*
 744                  * there should be a VM object here at this point
 745                  */
 746                 kr = KERN_INVALID_ARGUMENT;
 747                 goto done;
 748         }
 749         vm_object_lock(object);
 750         object->set_cache_attr = TRUE;
 751         vm_object_unlock(object);
 752
 753         vm_map_set_cache_attr_count++;
 754 done:
 755         vm_map_unlock_read(map);
 756
 757         return kr;
 758 }
 759
 760
 761 #if CONFIG_CODE_DECRYPTION
 762 /*
 763  * vm_map_apple_protected:
 764  * This remaps the requested part of the object with an object backed by
 765  * the decrypting pager.
 766  * crypt_info contains entry points and session data for the crypt module.
 767  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 768  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 769  */
 770 kern_return_t
 771 vm_map_apple_protected(
 772         vm_map_t                map,
 773         vm_map_offset_t         start,
 774         vm_map_offset_t         end,
 775         vm_object_offset_t      crypto_backing_offset,
 776         struct pager_crypt_info *crypt_info,
 777         uint32_t                cryptid)
 778 {
 779         boolean_t       map_locked;
 780         kern_return_t   kr;
 781         vm_map_entry_t  map_entry;
 782         struct vm_map_entry tmp_entry;
 783         memory_object_t unprotected_mem_obj;
 784         vm_object_t     protected_object;
 785         vm_map_offset_t map_addr;
 786         vm_map_offset_t start_aligned, end_aligned;
 787         vm_object_offset_t      crypto_start, crypto_end;
 788         int             vm_flags;
 789         vm_map_kernel_flags_t vmk_flags;
 790
 791         vm_flags = 0;
 792         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 793
 794         map_locked = FALSE;
 795         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 796
 797         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 798         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 799         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 800         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 801
 802 #if __arm64__
 803         /*
 804          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 805          * so we might have to loop and establish up to 3 mappings:
 806          *
 807          * + the first 16K-page, which might overlap with the previous
 808          *   4K-aligned mapping,
 809          * + the center,
 810          * + the last 16K-page, which might overlap with the next
 811          *   4K-aligned mapping.
 812          * Each of these mapping might be backed by a vnode pager (if
 813          * properly page-aligned) or a "fourk_pager", itself backed by a
 814          * vnode pager (if 4K-aligned but not page-aligned).
 815          */
 816 #endif /* __arm64__ */
 817
 818         map_addr = start_aligned;
 819         for (map_addr = start_aligned;
 820             map_addr < end;
 821             map_addr = tmp_entry.vme_end) {
 822                 vm_map_lock(map);
 823                 map_locked = TRUE;
 824
 825                 /* lookup the protected VM object */
 826                 if (!vm_map_lookup_entry(map,
 827                     map_addr,
 828                     &map_entry) ||
 829                     map_entry->is_sub_map ||
 830                     VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
 831                         /* that memory is not properly mapped */
 832                         kr = KERN_INVALID_ARGUMENT;
 833                         goto done;
 834                 }
 835
 836                 /* ensure mapped memory is mapped as executable except
 837                  *  except for model decryption flow */
 838                 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
 839                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 840                         kr = KERN_INVALID_ARGUMENT;
 841                         goto done;
 842                 }
 843
 844                 /* get the protected object to be decrypted */
 845                 protected_object = VME_OBJECT(map_entry);
 846                 if (protected_object == VM_OBJECT_NULL) {
 847                         /* there should be a VM object here at this point */
 848                         kr = KERN_INVALID_ARGUMENT;
 849                         goto done;
 850                 }
 851                 /* ensure protected object stays alive while map is unlocked */
 852                 vm_object_reference(protected_object);
 853
 854                 /* limit the map entry to the area we want to cover */
 855                 vm_map_clip_start(map, map_entry, start_aligned);
 856                 vm_map_clip_end(map, map_entry, end_aligned);
 857
 858                 tmp_entry = *map_entry;
 859                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 860                 vm_map_unlock(map);
 861                 map_locked = FALSE;
 862
 863                 /*
 864                  * This map entry might be only partially encrypted
 865                  * (if not fully "page-aligned").
 866                  */
 867                 crypto_start = 0;
 868                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 869                 if (tmp_entry.vme_start < start) {
 870                         if (tmp_entry.vme_start != start_aligned) {
 871                                 kr = KERN_INVALID_ADDRESS;
 872                         }
 873                         crypto_start += (start - tmp_entry.vme_start);
 874                 }
 875                 if (tmp_entry.vme_end > end) {
 876                         if (tmp_entry.vme_end != end_aligned) {
 877                                 kr = KERN_INVALID_ADDRESS;
 878                         }
 879                         crypto_end -= (tmp_entry.vme_end - end);
 880                 }
 881
 882                 /*
 883                  * This "extra backing offset" is needed to get the decryption
 884                  * routine to use the right key.  It adjusts for the possibly
 885                  * relative offset of an interposed "4K" pager...
 886                  */
 887                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 888                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 889                 }
 890
 891                 /*
 892                  * Lookup (and create if necessary) the protected memory object
 893                  * matching that VM object.
 894                  * If successful, this also grabs a reference on the memory object,
 895                  * to guarantee that it doesn't go away before we get a chance to map
 896                  * it.
 897                  */
 898                 unprotected_mem_obj = apple_protect_pager_setup(
 899                         protected_object,
 900                         VME_OFFSET(&tmp_entry),
 901                         crypto_backing_offset,
 902                         crypt_info,
 903                         crypto_start,
 904                         crypto_end);
 905
 906                 /* release extra ref on protected object */
 907                 vm_object_deallocate(protected_object);
 908
 909                 if (unprotected_mem_obj == NULL) {
 910                         kr = KERN_FAILURE;
 911                         goto done;
 912                 }
 913
 914                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 915                 /* can overwrite an immutable mapping */
 916                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 917 #if __arm64__
 918                 if (tmp_entry.used_for_jit &&
 919                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 920                     PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 921                     fourk_binary_compatibility_unsafe &&
 922                     fourk_binary_compatibility_allow_wx) {
 923                         printf("** FOURK_COMPAT [%d]: "
 924                             "allowing write+execute at 0x%llx\n",
 925                             proc_selfpid(), tmp_entry.vme_start);
 926                         vmk_flags.vmkf_map_jit = TRUE;
 927                 }
 928 #endif /* __arm64__ */
 929
 930                 /* map this memory object in place of the current one */
 931                 map_addr = tmp_entry.vme_start;
 932                 kr = vm_map_enter_mem_object(map,
 933                     &map_addr,
 934                     (tmp_entry.vme_end -
 935                     tmp_entry.vme_start),
 936                     (mach_vm_offset_t) 0,
 937                     vm_flags,
 938                     vmk_flags,
 939                     VM_KERN_MEMORY_NONE,
 940                     (ipc_port_t)(uintptr_t) unprotected_mem_obj,
 941                     0,
 942                     TRUE,
 943                     tmp_entry.protection,
 944                     tmp_entry.max_protection,
 945                     tmp_entry.inheritance);
 946                 assertf(kr == KERN_SUCCESS,
 947                     "kr = 0x%x\n", kr);
 948                 assertf(map_addr == tmp_entry.vme_start,
 949                     "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 950                     (uint64_t)map_addr,
 951                     (uint64_t) tmp_entry.vme_start,
 952                     &tmp_entry);
 953
 954 #if VM_MAP_DEBUG_APPLE_PROTECT
 955                 if (vm_map_debug_apple_protect) {
 956                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 957                             " backing:[object:%p,offset:0x%llx,"
 958                             "crypto_backing_offset:0x%llx,"
 959                             "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 960                             map,
 961                             (uint64_t) map_addr,
 962                             (uint64_t) (map_addr + (tmp_entry.vme_end -
 963                             tmp_entry.vme_start)),
 964                             unprotected_mem_obj,
 965                             protected_object,
 966                             VME_OFFSET(&tmp_entry),
 967                             crypto_backing_offset,
 968                             crypto_start,
 969                             crypto_end);
 970                 }
 971 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 972
 973                 /*
 974                  * Release the reference obtained by
 975                  * apple_protect_pager_setup().
 976                  * The mapping (if it succeeded) is now holding a reference on
 977                  * the memory object.
 978                  */
 979                 memory_object_deallocate(unprotected_mem_obj);
 980                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 981
 982                 /* continue with next map entry */
 983                 crypto_backing_offset += (tmp_entry.vme_end -
 984                     tmp_entry.vme_start);
 985                 crypto_backing_offset -= crypto_start;
 986         }
 987         kr = KERN_SUCCESS;
 988
 989 done:
 990         if (map_locked) {
 991                 vm_map_unlock(map);
 992         }
 993         return kr;
 994 }
 995 #endif  /* CONFIG_CODE_DECRYPTION */
 996
 997
 998 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
 999 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1000 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1001
1002 #if XNU_TARGET_OS_OSX
1003 int malloc_no_cow = 0;
1004 #else /* XNU_TARGET_OS_OSX */
1005 int malloc_no_cow = 1;
1006 #endif /* XNU_TARGET_OS_OSX */
1007 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1008 #if DEBUG
1009 int vm_check_map_sanity = 0;
1010 #endif
1011
1012 /*
1013  *      vm_map_init:
1014  *
1015  *      Initialize the vm_map module.  Must be called before
1016  *      any other vm_map routines.
1017  *
1018  *      Map and entry structures are allocated from zones -- we must
1019  *      initialize those zones.
1020  *
1021  *      There are three zones of interest:
1022  *
1023  *      vm_map_zone:            used to allocate maps.
1024  *      vm_map_entry_zone:      used to allocate map entries.
1025  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
1026  *
1027  *      The kernel allocates map entries from a special zone that is initially
1028  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
1029  *      the kernel to allocate more memory to a entry zone when it became
1030  *      empty since the very act of allocating memory implies the creation
1031  *      of a new entry.
1032  */
1033 __startup_func
1034 void
1035 vm_map_init(void)
1036 {
1037         const char *mez_name = "VM map entries";
1038
1039
1040 #if MACH_ASSERT
1041         PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1042             sizeof(debug4k_filter));
1043 #endif /* MACH_ASSERT */
1044
1045         vm_map_zone = zone_create(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1046             VM_MAP_ZFLAGS);
1047
1048         vm_map_entry_zone = zone_create(mez_name, sizeof(struct vm_map_entry),
1049             ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT);
1050
1051         /*
1052          * Don't quarantine because we always need elements available
1053          * Disallow GC on this zone... to aid the GC.
1054          */
1055         vm_map_entry_reserved_zone = zone_create_ext(VME_RESERVED_ZONE_NAME,
1056             sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1057             ZONE_ID_ANY, ^(zone_t z) {
1058                 zone_set_noexpand(z, 64 * kentry_data_size);
1059         });
1060
1061         vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1062             ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1063
1064         vm_map_holes_zone = zone_create(VM_MAP_HOLES_ZONE_NAME,
1065             sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS);
1066
1067         /*
1068          * Add the stolen memory to zones, adjust zone size and stolen counts.
1069          */
1070         zcram(vm_map_zone, map_data, map_data_size);
1071         zcram(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
1072         zcram(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1073
1074         /*
1075          * Since these are covered by zones, remove them from stolen page accounting.
1076          */
1077         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1078
1079 #if VM_MAP_DEBUG_APPLE_PROTECT
1080         PE_parse_boot_argn("vm_map_debug_apple_protect",
1081             &vm_map_debug_apple_protect,
1082             sizeof(vm_map_debug_apple_protect));
1083 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1084 #if VM_MAP_DEBUG_APPLE_FOURK
1085         PE_parse_boot_argn("vm_map_debug_fourk",
1086             &vm_map_debug_fourk,
1087             sizeof(vm_map_debug_fourk));
1088 #endif /* VM_MAP_DEBUG_FOURK */
1089         PE_parse_boot_argn("vm_map_executable_immutable",
1090             &vm_map_executable_immutable,
1091             sizeof(vm_map_executable_immutable));
1092         PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1093             &vm_map_executable_immutable_verbose,
1094             sizeof(vm_map_executable_immutable_verbose));
1095
1096         PE_parse_boot_argn("malloc_no_cow",
1097             &malloc_no_cow,
1098             sizeof(malloc_no_cow));
1099         if (malloc_no_cow) {
1100                 vm_memory_malloc_no_cow_mask = 0ULL;
1101                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1102                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1103                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1104                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1105 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1106 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1107                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1108                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1109                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1110                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1111 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1112                 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1113                     &vm_memory_malloc_no_cow_mask,
1114                     sizeof(vm_memory_malloc_no_cow_mask));
1115         }
1116
1117 #if DEBUG
1118         PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1119         if (vm_check_map_sanity) {
1120                 kprintf("VM sanity checking enabled\n");
1121         } else {
1122                 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1123         }
1124 #endif /* DEBUG */
1125
1126 #if DEVELOPMENT || DEBUG
1127         PE_parse_boot_argn("panic_on_unsigned_execute",
1128             &panic_on_unsigned_execute,
1129             sizeof(panic_on_unsigned_execute));
1130 #endif /* DEVELOPMENT || DEBUG */
1131 }
1132
1133 __startup_func
1134 static void
1135 vm_map_steal_memory(void)
1136 {
1137         uint16_t kentry_initial_pages;
1138
1139         map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1140             sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1141
1142         /*
1143          * kentry_initial_pages corresponds to the number of kernel map entries
1144          * required during bootstrap until the asynchronous replenishment
1145          * scheme is activated and/or entries are available from the general
1146          * map entry pool.
1147          */
1148 #if     defined(__LP64__)
1149         kentry_initial_pages = 10;
1150 #else
1151         kentry_initial_pages = 6;
1152 #endif
1153
1154 #if CONFIG_GZALLOC
1155         /* If using the guard allocator, reserve more memory for the kernel
1156          * reserved map entry pool.
1157          */
1158         if (gzalloc_enabled()) {
1159                 kentry_initial_pages *= 1024;
1160         }
1161 #endif
1162
1163         kentry_data_size = zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME,
1164             sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1165             kentry_initial_pages);
1166
1167         map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1168             sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1169             kentry_initial_pages);
1170
1171         /*
1172          * Steal a contiguous range of memory so that a simple range check
1173          * can validate foreign addresses being freed/crammed to these
1174          * zones
1175          */
1176         vm_size_t total_size;
1177         if (os_add3_overflow(map_data_size, kentry_data_size,
1178             map_holes_data_size, &total_size)) {
1179                 panic("vm_map_steal_memory: overflow in amount of memory requested");
1180         }
1181         map_data = zone_foreign_mem_init(total_size);
1182         kentry_data = map_data + map_data_size;
1183         map_holes_data = kentry_data + kentry_data_size;
1184 }
1185 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1186
1187 boolean_t vm_map_supports_hole_optimization = FALSE;
1188
1189 void
1190 vm_kernel_reserved_entry_init(void)
1191 {
1192         zone_prio_refill_configure(vm_map_entry_reserved_zone);
1193
1194         /*
1195          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1196          */
1197         zone_prio_refill_configure(vm_map_holes_zone);
1198         vm_map_supports_hole_optimization = TRUE;
1199 }
1200
1201 void
1202 vm_map_disable_hole_optimization(vm_map_t map)
1203 {
1204         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1205
1206         if (map->holelistenabled) {
1207                 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1208
1209                 while (hole_entry != NULL) {
1210                         next_hole_entry = hole_entry->vme_next;
1211
1212                         hole_entry->vme_next = NULL;
1213                         hole_entry->vme_prev = NULL;
1214                         zfree(vm_map_holes_zone, hole_entry);
1215
1216                         if (next_hole_entry == head_entry) {
1217                                 hole_entry = NULL;
1218                         } else {
1219                                 hole_entry = next_hole_entry;
1220                         }
1221                 }
1222
1223                 map->holes_list = NULL;
1224                 map->holelistenabled = FALSE;
1225
1226                 map->first_free = vm_map_first_entry(map);
1227                 SAVE_HINT_HOLE_WRITE(map, NULL);
1228         }
1229 }
1230
1231 boolean_t
1232 vm_kernel_map_is_kernel(vm_map_t map)
1233 {
1234         return map->pmap == kernel_pmap;
1235 }
1236
1237 /*
1238  *      vm_map_create:
1239  *
1240  *      Creates and returns a new empty VM map with
1241  *      the given physical map structure, and having
1242  *      the given lower and upper address bounds.
1243  */
1244
1245 vm_map_t
1246 vm_map_create(
1247         pmap_t          pmap,
1248         vm_map_offset_t min,
1249         vm_map_offset_t max,
1250         boolean_t       pageable)
1251 {
1252         int options;
1253
1254         options = 0;
1255         if (pageable) {
1256                 options |= VM_MAP_CREATE_PAGEABLE;
1257         }
1258         return vm_map_create_options(pmap, min, max, options);
1259 }
1260
1261 vm_map_t
1262 vm_map_create_options(
1263         pmap_t          pmap,
1264         vm_map_offset_t min,
1265         vm_map_offset_t max,
1266         int             options)
1267 {
1268         vm_map_t        result;
1269         struct vm_map_links     *hole_entry = NULL;
1270
1271         if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1272                 /* unknown option */
1273                 return VM_MAP_NULL;
1274         }
1275
1276         result = (vm_map_t) zalloc(vm_map_zone);
1277         if (result == VM_MAP_NULL) {
1278                 panic("vm_map_create");
1279         }
1280
1281         vm_map_first_entry(result) = vm_map_to_entry(result);
1282         vm_map_last_entry(result)  = vm_map_to_entry(result);
1283         result->hdr.nentries = 0;
1284         if (options & VM_MAP_CREATE_PAGEABLE) {
1285                 result->hdr.entries_pageable = TRUE;
1286         } else {
1287                 result->hdr.entries_pageable = FALSE;
1288         }
1289
1290         vm_map_store_init( &(result->hdr));
1291
1292         result->hdr.page_shift = PAGE_SHIFT;
1293
1294         result->size = 0;
1295         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1296         result->user_wire_size  = 0;
1297 #if XNU_TARGET_OS_OSX
1298         result->vmmap_high_start = 0;
1299 #endif
1300         os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1301 #if     TASK_SWAPPER
1302         result->res_count = 1;
1303         result->sw_state = MAP_SW_IN;
1304 #endif  /* TASK_SWAPPER */
1305         result->pmap = pmap;
1306         result->min_offset = min;
1307         result->max_offset = max;
1308         result->wiring_required = FALSE;
1309         result->no_zero_fill = FALSE;
1310         result->mapped_in_other_pmaps = FALSE;
1311         result->wait_for_space = FALSE;
1312         result->switch_protect = FALSE;
1313         result->disable_vmentry_reuse = FALSE;
1314         result->map_disallow_data_exec = FALSE;
1315         result->is_nested_map = FALSE;
1316         result->map_disallow_new_exec = FALSE;
1317         result->terminated = FALSE;
1318         result->cs_enforcement = FALSE;
1319         result->highest_entry_end = 0;
1320         result->first_free = vm_map_to_entry(result);
1321         result->hint = vm_map_to_entry(result);
1322         result->jit_entry_exists = FALSE;
1323         result->is_alien = FALSE;
1324         result->reserved_regions = FALSE;
1325
1326         /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1327         if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1328                 result->has_corpse_footprint = TRUE;
1329                 result->holelistenabled = FALSE;
1330                 result->vmmap_corpse_footprint = NULL;
1331         } else {
1332                 result->has_corpse_footprint = FALSE;
1333                 if (vm_map_supports_hole_optimization) {
1334                         hole_entry = zalloc(vm_map_holes_zone);
1335
1336                         hole_entry->start = min;
1337 #if defined(__arm__) || defined(__arm64__)
1338                         hole_entry->end = result->max_offset;
1339 #else
1340                         hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1341 #endif
1342                         result->holes_list = result->hole_hint = hole_entry;
1343                         hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1344                         result->holelistenabled = TRUE;
1345                 } else {
1346                         result->holelistenabled = FALSE;
1347                 }
1348         }
1349
1350         vm_map_lock_init(result);
1351         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1352
1353         return result;
1354 }
1355
1356 vm_map_size_t
1357 vm_map_adjusted_size(vm_map_t map)
1358 {
1359         struct vm_reserved_region *regions = NULL;
1360         size_t num_regions = 0;
1361         mach_vm_size_t  reserved_size = 0, map_size = 0;
1362
1363         if (map == NULL || (map->size == 0)) {
1364                 return 0;
1365         }
1366
1367         map_size = map->size;
1368
1369         if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1370                 /*
1371                  * No special reserved regions or not an exotic map or the task
1372                  * is terminating and these special regions might have already
1373                  * been deallocated.
1374                  */
1375                 return map_size;
1376         }
1377
1378         num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1379         assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1380
1381         while (num_regions) {
1382                 reserved_size += regions[--num_regions].vmrr_size;
1383         }
1384
1385         /*
1386          * There are a few places where the map is being switched out due to
1387          * 'termination' without that bit being set (e.g. exec and corpse purging).
1388          * In those cases, we could have the map's regions being deallocated on
1389          * a core while some accounting process is trying to get the map's size.
1390          * So this assert can't be enabled till all those places are uniform in
1391          * their use of the 'map->terminated' bit.
1392          *
1393          * assert(map_size >= reserved_size);
1394          */
1395
1396         return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1397 }
1398
1399 /*
1400  *      vm_map_entry_create:    [ internal use only ]
1401  *
1402  *      Allocates a VM map entry for insertion in the
1403  *      given map (or map copy).  No fields are filled.
1404  */
1405 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1406
1407 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1408         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1409 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1410
1411 static vm_map_entry_t
1412 _vm_map_entry_create(
1413         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1414 {
1415         zone_t  zone;
1416         vm_map_entry_t  entry;
1417
1418         zone = vm_map_entry_zone;
1419
1420         assert(map_header->entries_pageable ? !map_locked : TRUE);
1421
1422         if (map_header->entries_pageable) {
1423                 entry = (vm_map_entry_t) zalloc(zone);
1424         } else {
1425                 entry = (vm_map_entry_t) zalloc_noblock(zone);
1426
1427                 if (entry == VM_MAP_ENTRY_NULL) {
1428                         zone = vm_map_entry_reserved_zone;
1429                         entry = (vm_map_entry_t) zalloc(zone);
1430                         OSAddAtomic(1, &reserved_zalloc_count);
1431                 } else {
1432                         OSAddAtomic(1, &nonreserved_zalloc_count);
1433                 }
1434         }
1435
1436         if (entry == VM_MAP_ENTRY_NULL) {
1437                 panic("vm_map_entry_create");
1438         }
1439         *entry = vm_map_entry_template;
1440         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1441
1442         vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1443 #if     MAP_ENTRY_CREATION_DEBUG
1444         entry->vme_creation_maphdr = map_header;
1445         backtrace(&entry->vme_creation_bt[0],
1446             (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1447 #endif
1448         return entry;
1449 }
1450
1451 /*
1452  *      vm_map_entry_dispose:   [ internal use only ]
1453  *
1454  *      Inverse of vm_map_entry_create.
1455  *
1456  *      write map lock held so no need to
1457  *      do anything special to insure correctness
1458  *      of the stores
1459  */
1460 #define vm_map_entry_dispose(map, entry)                        \
1461         _vm_map_entry_dispose(&(map)->hdr, (entry))
1462
1463 #define vm_map_copy_entry_dispose(copy, entry) \
1464         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1465
1466 static void
1467 _vm_map_entry_dispose(
1468         struct vm_map_header    *map_header,
1469         vm_map_entry_t          entry)
1470 {
1471         zone_t          zone;
1472
1473         if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1474                 zone = vm_map_entry_zone;
1475         } else {
1476                 zone = vm_map_entry_reserved_zone;
1477         }
1478
1479         if (!map_header->entries_pageable) {
1480                 if (zone == vm_map_entry_zone) {
1481                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1482                 } else {
1483                         OSAddAtomic(-1, &reserved_zalloc_count);
1484                 }
1485         }
1486
1487         zfree(zone, entry);
1488 }
1489
1490 #if MACH_ASSERT
1491 static boolean_t first_free_check = FALSE;
1492 boolean_t
1493 first_free_is_valid(
1494         vm_map_t        map)
1495 {
1496         if (!first_free_check) {
1497                 return TRUE;
1498         }
1499
1500         return first_free_is_valid_store( map );
1501 }
1502 #endif /* MACH_ASSERT */
1503
1504
1505 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1506         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1507
1508 #define vm_map_copy_entry_unlink(copy, entry)                           \
1509         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1510
1511 #if     MACH_ASSERT && TASK_SWAPPER
1512 /*
1513  *      vm_map_res_reference:
1514  *
1515  *      Adds another valid residence count to the given map.
1516  *
1517  *      Map is locked so this function can be called from
1518  *      vm_map_swapin.
1519  *
1520  */
1521 void
1522 vm_map_res_reference(vm_map_t map)
1523 {
1524         /* assert map is locked */
1525         assert(map->res_count >= 0);
1526         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1527         if (map->res_count == 0) {
1528                 lck_mtx_unlock(&map->s_lock);
1529                 vm_map_lock(map);
1530                 vm_map_swapin(map);
1531                 lck_mtx_lock(&map->s_lock);
1532                 ++map->res_count;
1533                 vm_map_unlock(map);
1534         } else {
1535                 ++map->res_count;
1536         }
1537 }
1538
1539 /*
1540  *      vm_map_reference_swap:
1541  *
1542  *      Adds valid reference and residence counts to the given map.
1543  *
1544  *      The map may not be in memory (i.e. zero residence count).
1545  *
1546  */
1547 void
1548 vm_map_reference_swap(vm_map_t map)
1549 {
1550         assert(map != VM_MAP_NULL);
1551         lck_mtx_lock(&map->s_lock);
1552         assert(map->res_count >= 0);
1553         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1554         os_ref_retain_locked(&map->map_refcnt);
1555         vm_map_res_reference(map);
1556         lck_mtx_unlock(&map->s_lock);
1557 }
1558
1559 /*
1560  *      vm_map_res_deallocate:
1561  *
1562  *      Decrement residence count on a map; possibly causing swapout.
1563  *
1564  *      The map must be in memory (i.e. non-zero residence count).
1565  *
1566  *      The map is locked, so this function is callable from vm_map_deallocate.
1567  *
1568  */
1569 void
1570 vm_map_res_deallocate(vm_map_t map)
1571 {
1572         assert(map->res_count > 0);
1573         if (--map->res_count == 0) {
1574                 lck_mtx_unlock(&map->s_lock);
1575                 vm_map_lock(map);
1576                 vm_map_swapout(map);
1577                 vm_map_unlock(map);
1578                 lck_mtx_lock(&map->s_lock);
1579         }
1580         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1581 }
1582 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1583
1584 /*
1585  *      vm_map_destroy:
1586  *
1587  *      Actually destroy a map.
1588  */
1589 void
1590 vm_map_destroy(
1591         vm_map_t        map,
1592         int             flags)
1593 {
1594         vm_map_lock(map);
1595
1596         /* final cleanup: no need to unnest shared region */
1597         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1598         /* final cleanup: ok to remove immutable mappings */
1599         flags |= VM_MAP_REMOVE_IMMUTABLE;
1600         /* final cleanup: allow gaps in range */
1601         flags |= VM_MAP_REMOVE_GAPS_OK;
1602
1603         /* clean up regular map entries */
1604         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1605             flags, VM_MAP_NULL);
1606         /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1607 #if     !defined(__arm__)
1608         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1609             flags, VM_MAP_NULL);
1610 #endif /* !__arm__ */
1611
1612         vm_map_disable_hole_optimization(map);
1613         vm_map_corpse_footprint_destroy(map);
1614
1615         vm_map_unlock(map);
1616
1617         assert(map->hdr.nentries == 0);
1618
1619         if (map->pmap) {
1620                 pmap_destroy(map->pmap);
1621         }
1622
1623         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1624                 /*
1625                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1626                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1627                  * structure or kalloc'ed via lck_mtx_init.
1628                  * An example is s_lock_ext within struct _vm_map.
1629                  *
1630                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1631                  * can add another tag to detect embedded vs alloc'ed indirect external
1632                  * mutexes but that'll be additional checks in the lock path and require
1633                  * updating dependencies for the old vs new tag.
1634                  *
1635                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1636                  * just when lock debugging is ON, we choose to forego explicitly destroying
1637                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1638                  * count on vm_map_lck_grp, which has no serious side-effect.
1639                  */
1640         } else {
1641                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1642                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1643         }
1644
1645         zfree(vm_map_zone, map);
1646 }
1647
1648 /*
1649  * Returns pid of the task with the largest number of VM map entries.
1650  * Used in the zone-map-exhaustion jetsam path.
1651  */
1652 pid_t
1653 find_largest_process_vm_map_entries(void)
1654 {
1655         pid_t victim_pid = -1;
1656         int max_vm_map_entries = 0;
1657         task_t task = TASK_NULL;
1658         queue_head_t *task_list = &tasks;
1659
1660         lck_mtx_lock(&tasks_threads_lock);
1661         queue_iterate(task_list, task, task_t, tasks) {
1662                 if (task == kernel_task || !task->active) {
1663                         continue;
1664                 }
1665
1666                 vm_map_t task_map = task->map;
1667                 if (task_map != VM_MAP_NULL) {
1668                         int task_vm_map_entries = task_map->hdr.nentries;
1669                         if (task_vm_map_entries > max_vm_map_entries) {
1670                                 max_vm_map_entries = task_vm_map_entries;
1671                                 victim_pid = pid_from_task(task);
1672                         }
1673                 }
1674         }
1675         lck_mtx_unlock(&tasks_threads_lock);
1676
1677         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1678         return victim_pid;
1679 }
1680
1681 #if     TASK_SWAPPER
1682 /*
1683  * vm_map_swapin/vm_map_swapout
1684  *
1685  * Swap a map in and out, either referencing or releasing its resources.
1686  * These functions are internal use only; however, they must be exported
1687  * because they may be called from macros, which are exported.
1688  *
1689  * In the case of swapout, there could be races on the residence count,
1690  * so if the residence count is up, we return, assuming that a
1691  * vm_map_deallocate() call in the near future will bring us back.
1692  *
1693  * Locking:
1694  *      -- We use the map write lock for synchronization among races.
1695  *      -- The map write lock, and not the simple s_lock, protects the
1696  *         swap state of the map.
1697  *      -- If a map entry is a share map, then we hold both locks, in
1698  *         hierarchical order.
1699  *
1700  * Synchronization Notes:
1701  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1702  *      will block on the map lock and proceed when swapout is through.
1703  *      2) A vm_map_reference() call at this time is illegal, and will
1704  *      cause a panic.  vm_map_reference() is only allowed on resident
1705  *      maps, since it refuses to block.
1706  *      3) A vm_map_swapin() call during a swapin will block, and
1707  *      proceeed when the first swapin is done, turning into a nop.
1708  *      This is the reason the res_count is not incremented until
1709  *      after the swapin is complete.
1710  *      4) There is a timing hole after the checks of the res_count, before
1711  *      the map lock is taken, during which a swapin may get the lock
1712  *      before a swapout about to happen.  If this happens, the swapin
1713  *      will detect the state and increment the reference count, causing
1714  *      the swapout to be a nop, thereby delaying it until a later
1715  *      vm_map_deallocate.  If the swapout gets the lock first, then
1716  *      the swapin will simply block until the swapout is done, and
1717  *      then proceed.
1718  *
1719  * Because vm_map_swapin() is potentially an expensive operation, it
1720  * should be used with caution.
1721  *
1722  * Invariants:
1723  *      1) A map with a residence count of zero is either swapped, or
1724  *         being swapped.
1725  *      2) A map with a non-zero residence count is either resident,
1726  *         or being swapped in.
1727  */
1728
1729 int vm_map_swap_enable = 1;
1730
1731 void
1732 vm_map_swapin(vm_map_t map)
1733 {
1734         vm_map_entry_t entry;
1735
1736         if (!vm_map_swap_enable) {      /* debug */
1737                 return;
1738         }
1739
1740         /*
1741          * Map is locked
1742          * First deal with various races.
1743          */
1744         if (map->sw_state == MAP_SW_IN) {
1745                 /*
1746                  * we raced with swapout and won.  Returning will incr.
1747                  * the res_count, turning the swapout into a nop.
1748                  */
1749                 return;
1750         }
1751
1752         /*
1753          * The residence count must be zero.  If we raced with another
1754          * swapin, the state would have been IN; if we raced with a
1755          * swapout (after another competing swapin), we must have lost
1756          * the race to get here (see above comment), in which case
1757          * res_count is still 0.
1758          */
1759         assert(map->res_count == 0);
1760
1761         /*
1762          * There are no intermediate states of a map going out or
1763          * coming in, since the map is locked during the transition.
1764          */
1765         assert(map->sw_state == MAP_SW_OUT);
1766
1767         /*
1768          * We now operate upon each map entry.  If the entry is a sub-
1769          * or share-map, we call vm_map_res_reference upon it.
1770          * If the entry is an object, we call vm_object_res_reference
1771          * (this may iterate through the shadow chain).
1772          * Note that we hold the map locked the entire time,
1773          * even if we get back here via a recursive call in
1774          * vm_map_res_reference.
1775          */
1776         entry = vm_map_first_entry(map);
1777
1778         while (entry != vm_map_to_entry(map)) {
1779                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1780                         if (entry->is_sub_map) {
1781                                 vm_map_t lmap = VME_SUBMAP(entry);
1782                                 lck_mtx_lock(&lmap->s_lock);
1783                                 vm_map_res_reference(lmap);
1784                                 lck_mtx_unlock(&lmap->s_lock);
1785                         } else {
1786                                 vm_object_t object = VME_OBEJCT(entry);
1787                                 vm_object_lock(object);
1788                                 /*
1789                                  * This call may iterate through the
1790                                  * shadow chain.
1791                                  */
1792                                 vm_object_res_reference(object);
1793                                 vm_object_unlock(object);
1794                         }
1795                 }
1796                 entry = entry->vme_next;
1797         }
1798         assert(map->sw_state == MAP_SW_OUT);
1799         map->sw_state = MAP_SW_IN;
1800 }
1801
1802 void
1803 vm_map_swapout(vm_map_t map)
1804 {
1805         vm_map_entry_t entry;
1806
1807         /*
1808          * Map is locked
1809          * First deal with various races.
1810          * If we raced with a swapin and lost, the residence count
1811          * will have been incremented to 1, and we simply return.
1812          */
1813         lck_mtx_lock(&map->s_lock);
1814         if (map->res_count != 0) {
1815                 lck_mtx_unlock(&map->s_lock);
1816                 return;
1817         }
1818         lck_mtx_unlock(&map->s_lock);
1819
1820         /*
1821          * There are no intermediate states of a map going out or
1822          * coming in, since the map is locked during the transition.
1823          */
1824         assert(map->sw_state == MAP_SW_IN);
1825
1826         if (!vm_map_swap_enable) {
1827                 return;
1828         }
1829
1830         /*
1831          * We now operate upon each map entry.  If the entry is a sub-
1832          * or share-map, we call vm_map_res_deallocate upon it.
1833          * If the entry is an object, we call vm_object_res_deallocate
1834          * (this may iterate through the shadow chain).
1835          * Note that we hold the map locked the entire time,
1836          * even if we get back here via a recursive call in
1837          * vm_map_res_deallocate.
1838          */
1839         entry = vm_map_first_entry(map);
1840
1841         while (entry != vm_map_to_entry(map)) {
1842                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1843                         if (entry->is_sub_map) {
1844                                 vm_map_t lmap = VME_SUBMAP(entry);
1845                                 lck_mtx_lock(&lmap->s_lock);
1846                                 vm_map_res_deallocate(lmap);
1847                                 lck_mtx_unlock(&lmap->s_lock);
1848                         } else {
1849                                 vm_object_t object = VME_OBJECT(entry);
1850                                 vm_object_lock(object);
1851                                 /*
1852                                  * This call may take a long time,
1853                                  * since it could actively push
1854                                  * out pages (if we implement it
1855                                  * that way).
1856                                  */
1857                                 vm_object_res_deallocate(object);
1858                                 vm_object_unlock(object);
1859                         }
1860                 }
1861                 entry = entry->vme_next;
1862         }
1863         assert(map->sw_state == MAP_SW_IN);
1864         map->sw_state = MAP_SW_OUT;
1865 }
1866
1867 #endif  /* TASK_SWAPPER */
1868
1869 /*
1870  *      vm_map_lookup_entry:    [ internal use only ]
1871  *
1872  *      Calls into the vm map store layer to find the map
1873  *      entry containing (or immediately preceding) the
1874  *      specified address in the given map; the entry is returned
1875  *      in the "entry" parameter.  The boolean
1876  *      result indicates whether the address is
1877  *      actually contained in the map.
1878  */
1879 boolean_t
1880 vm_map_lookup_entry(
1881         vm_map_t                map,
1882         vm_map_offset_t address,
1883         vm_map_entry_t          *entry)         /* OUT */
1884 {
1885         return vm_map_store_lookup_entry( map, address, entry );
1886 }
1887
1888 /*
1889  *      Routine:        vm_map_find_space
1890  *      Purpose:
1891  *              Allocate a range in the specified virtual address map,
1892  *              returning the entry allocated for that range.
1893  *              Used by kmem_alloc, etc.
1894  *
1895  *              The map must be NOT be locked. It will be returned locked
1896  *              on KERN_SUCCESS, unlocked on failure.
1897  *
1898  *              If an entry is allocated, the object/offset fields
1899  *              are initialized to zero.
1900  *
1901  *      If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1902  *      is currently only used for allocating memory for zones backing
1903  *      one of the kalloc heaps.(rdar://65832263)
1904  */
1905 kern_return_t
1906 vm_map_find_space(
1907         vm_map_t                map,
1908         vm_map_offset_t         *address,       /* OUT */
1909         vm_map_size_t           size,
1910         vm_map_offset_t         mask,
1911         int                     flags,
1912         vm_map_kernel_flags_t   vmk_flags,
1913         vm_tag_t                tag,
1914         vm_map_entry_t          *o_entry)       /* OUT */
1915 {
1916         vm_map_entry_t          entry, new_entry, hole_entry;
1917         vm_map_offset_t         start;
1918         vm_map_offset_t         end;
1919
1920         if (size == 0) {
1921                 *address = 0;
1922                 return KERN_INVALID_ARGUMENT;
1923         }
1924
1925         new_entry = vm_map_entry_create(map, FALSE);
1926         vm_map_lock(map);
1927
1928         if (flags & VM_MAP_FIND_LAST_FREE) {
1929                 assert(!map->disable_vmentry_reuse);
1930                 /* TODO: Make backward lookup generic and support guard pages */
1931                 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1932                 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1933
1934                 /* Allocate space from end of map */
1935                 vm_map_store_find_last_free(map, &entry);
1936
1937                 if (!entry) {
1938                         goto noSpace;
1939                 }
1940
1941                 if (entry == vm_map_to_entry(map)) {
1942                         end = map->max_offset;
1943                 } else {
1944                         end = entry->vme_start;
1945                 }
1946
1947                 while (TRUE) {
1948                         vm_map_entry_t prev;
1949
1950                         start = end - size;
1951
1952                         if ((start < map->min_offset) || end < start) {
1953                                 goto noSpace;
1954                         }
1955
1956                         prev = entry->vme_prev;
1957                         entry = prev;
1958
1959                         if (prev == vm_map_to_entry(map)) {
1960                                 break;
1961                         }
1962
1963                         if (prev->vme_end <= start) {
1964                                 break;
1965                         }
1966
1967                         /*
1968                          *      Didn't fit -- move to the next entry.
1969                          */
1970
1971                         end = entry->vme_start;
1972                 }
1973         } else {
1974                 if (vmk_flags.vmkf_guard_after) {
1975                         /* account for the back guard page in the size */
1976                         size += VM_MAP_PAGE_SIZE(map);
1977                 }
1978
1979                 /*
1980                  *      Look for the first possible address; if there's already
1981                  *      something at this address, we have to start after it.
1982                  */
1983
1984                 if (map->disable_vmentry_reuse == TRUE) {
1985                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1986                 } else {
1987                         if (map->holelistenabled) {
1988                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1989
1990                                 if (hole_entry == NULL) {
1991                                         /*
1992                                          * No more space in the map?
1993                                          */
1994                                         goto noSpace;
1995                                 }
1996
1997                                 entry = hole_entry;
1998                                 start = entry->vme_start;
1999                         } else {
2000                                 assert(first_free_is_valid(map));
2001                                 if ((entry = map->first_free) == vm_map_to_entry(map)) {
2002                                         start = map->min_offset;
2003                                 } else {
2004                                         start = entry->vme_end;
2005                                 }
2006                         }
2007                 }
2008
2009                 /*
2010                  *      In any case, the "entry" always precedes
2011                  *      the proposed new region throughout the loop:
2012                  */
2013
2014                 while (TRUE) {
2015                         vm_map_entry_t  next;
2016
2017                         /*
2018                          *      Find the end of the proposed new region.
2019                          *      Be sure we didn't go beyond the end, or
2020                          *      wrap around the address.
2021                          */
2022
2023                         if (vmk_flags.vmkf_guard_before) {
2024                                 /* reserve space for the front guard page */
2025                                 start += VM_MAP_PAGE_SIZE(map);
2026                         }
2027                         end = ((start + mask) & ~mask);
2028
2029                         if (end < start) {
2030                                 goto noSpace;
2031                         }
2032                         start = end;
2033                         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
2034                         end += size;
2035                         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
2036
2037                         if ((end > map->max_offset) || (end < start)) {
2038                                 goto noSpace;
2039                         }
2040
2041                         next = entry->vme_next;
2042
2043                         if (map->holelistenabled) {
2044                                 if (entry->vme_end >= end) {
2045                                         break;
2046                                 }
2047                         } else {
2048                                 /*
2049                                  *      If there are no more entries, we must win.
2050                                  *
2051                                  *      OR
2052                                  *
2053                                  *      If there is another entry, it must be
2054                                  *      after the end of the potential new region.
2055                                  */
2056
2057                                 if (next == vm_map_to_entry(map)) {
2058                                         break;
2059                                 }
2060
2061                                 if (next->vme_start >= end) {
2062                                         break;
2063                                 }
2064                         }
2065
2066                         /*
2067                          *      Didn't fit -- move to the next entry.
2068                          */
2069
2070                         entry = next;
2071
2072                         if (map->holelistenabled) {
2073                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2074                                         /*
2075                                          * Wrapped around
2076                                          */
2077                                         goto noSpace;
2078                                 }
2079                                 start = entry->vme_start;
2080                         } else {
2081                                 start = entry->vme_end;
2082                         }
2083                 }
2084
2085                 if (vmk_flags.vmkf_guard_before) {
2086                         /* go back for the front guard page */
2087                         start -= VM_MAP_PAGE_SIZE(map);
2088                 }
2089         }
2090
2091         if (map->holelistenabled) {
2092                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2093                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2094                 }
2095         }
2096
2097         /*
2098          *      At this point,
2099          *              "start" and "end" should define the endpoints of the
2100          *                      available new range, and
2101          *              "entry" should refer to the region before the new
2102          *                      range, and
2103          *
2104          *              the map should be locked.
2105          */
2106
2107         *address = start;
2108
2109         assert(start < end);
2110         new_entry->vme_start = start;
2111         new_entry->vme_end = end;
2112         assert(page_aligned(new_entry->vme_start));
2113         assert(page_aligned(new_entry->vme_end));
2114         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
2115             VM_MAP_PAGE_MASK(map)));
2116         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
2117             VM_MAP_PAGE_MASK(map)));
2118
2119         new_entry->is_shared = FALSE;
2120         new_entry->is_sub_map = FALSE;
2121         new_entry->use_pmap = TRUE;
2122         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
2123         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
2124
2125         new_entry->needs_copy = FALSE;
2126
2127         new_entry->inheritance = VM_INHERIT_DEFAULT;
2128         new_entry->protection = VM_PROT_DEFAULT;
2129         new_entry->max_protection = VM_PROT_ALL;
2130         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
2131         new_entry->wired_count = 0;
2132         new_entry->user_wired_count = 0;
2133
2134         new_entry->in_transition = FALSE;
2135         new_entry->needs_wakeup = FALSE;
2136         new_entry->no_cache = FALSE;
2137         new_entry->permanent = FALSE;
2138         new_entry->superpage_size = FALSE;
2139         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2140                 new_entry->map_aligned = TRUE;
2141         } else {
2142                 new_entry->map_aligned = FALSE;
2143         }
2144
2145         new_entry->used_for_jit = FALSE;
2146         new_entry->pmap_cs_associated = FALSE;
2147         new_entry->zero_wired_pages = FALSE;
2148         new_entry->iokit_acct = FALSE;
2149         new_entry->vme_resilient_codesign = FALSE;
2150         new_entry->vme_resilient_media = FALSE;
2151         if (vmk_flags.vmkf_atomic_entry) {
2152                 new_entry->vme_atomic = TRUE;
2153         } else {
2154                 new_entry->vme_atomic = FALSE;
2155         }
2156
2157         VME_ALIAS_SET(new_entry, tag);
2158
2159         /*
2160          *      Insert the new entry into the list
2161          */
2162
2163         vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
2164
2165         map->size += size;
2166
2167         /*
2168          *      Update the lookup hint
2169          */
2170         SAVE_HINT_MAP_WRITE(map, new_entry);
2171
2172         *o_entry = new_entry;
2173         return KERN_SUCCESS;
2174
2175 noSpace:
2176
2177         vm_map_entry_dispose(map, new_entry);
2178         vm_map_unlock(map);
2179         return KERN_NO_SPACE;
2180 }
2181
2182 int vm_map_pmap_enter_print = FALSE;
2183 int vm_map_pmap_enter_enable = FALSE;
2184
2185 /*
2186  *      Routine:        vm_map_pmap_enter [internal only]
2187  *
2188  *      Description:
2189  *              Force pages from the specified object to be entered into
2190  *              the pmap at the specified address if they are present.
2191  *              As soon as a page not found in the object the scan ends.
2192  *
2193  *      Returns:
2194  *              Nothing.
2195  *
2196  *      In/out conditions:
2197  *              The source map should not be locked on entry.
2198  */
2199 __unused static void
2200 vm_map_pmap_enter(
2201         vm_map_t                map,
2202         vm_map_offset_t         addr,
2203         vm_map_offset_t         end_addr,
2204         vm_object_t             object,
2205         vm_object_offset_t      offset,
2206         vm_prot_t               protection)
2207 {
2208         int                     type_of_fault;
2209         kern_return_t           kr;
2210         struct vm_object_fault_info fault_info = {};
2211
2212         if (map->pmap == 0) {
2213                 return;
2214         }
2215
2216         assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2217
2218         while (addr < end_addr) {
2219                 vm_page_t       m;
2220
2221
2222                 /*
2223                  * TODO:
2224                  * From vm_map_enter(), we come into this function without the map
2225                  * lock held or the object lock held.
2226                  * We haven't taken a reference on the object either.
2227                  * We should do a proper lookup on the map to make sure
2228                  * that things are sane before we go locking objects that
2229                  * could have been deallocated from under us.
2230                  */
2231
2232                 vm_object_lock(object);
2233
2234                 m = vm_page_lookup(object, offset);
2235
2236                 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2237                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2238                         vm_object_unlock(object);
2239                         return;
2240                 }
2241
2242                 if (vm_map_pmap_enter_print) {
2243                         printf("vm_map_pmap_enter:");
2244                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2245                             map, (unsigned long long)addr, object, (unsigned long long)offset);
2246                 }
2247                 type_of_fault = DBG_CACHE_HIT_FAULT;
2248                 kr = vm_fault_enter(m, map->pmap,
2249                     addr,
2250                     PAGE_SIZE, 0,
2251                     protection, protection,
2252                     VM_PAGE_WIRED(m),
2253                     FALSE,                 /* change_wiring */
2254                     VM_KERN_MEMORY_NONE,                 /* tag - not wiring */
2255                     &fault_info,
2256                     NULL,                  /* need_retry */
2257                     &type_of_fault);
2258
2259                 vm_object_unlock(object);
2260
2261                 offset += PAGE_SIZE_64;
2262                 addr += PAGE_SIZE;
2263         }
2264 }
2265
2266 boolean_t vm_map_pmap_is_empty(
2267         vm_map_t        map,
2268         vm_map_offset_t start,
2269         vm_map_offset_t end);
2270 boolean_t
2271 vm_map_pmap_is_empty(
2272         vm_map_t        map,
2273         vm_map_offset_t start,
2274         vm_map_offset_t end)
2275 {
2276 #ifdef MACHINE_PMAP_IS_EMPTY
2277         return pmap_is_empty(map->pmap, start, end);
2278 #else   /* MACHINE_PMAP_IS_EMPTY */
2279         vm_map_offset_t offset;
2280         ppnum_t         phys_page;
2281
2282         if (map->pmap == NULL) {
2283                 return TRUE;
2284         }
2285
2286         for (offset = start;
2287             offset < end;
2288             offset += PAGE_SIZE) {
2289                 phys_page = pmap_find_phys(map->pmap, offset);
2290                 if (phys_page) {
2291                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2292                             "page %d at 0x%llx\n",
2293                             map, (long long)start, (long long)end,
2294                             phys_page, (long long)offset);
2295                         return FALSE;
2296                 }
2297         }
2298         return TRUE;
2299 #endif  /* MACHINE_PMAP_IS_EMPTY */
2300 }
2301
2302 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2303 kern_return_t
2304 vm_map_random_address_for_size(
2305         vm_map_t        map,
2306         vm_map_offset_t *address,
2307         vm_map_size_t   size)
2308 {
2309         kern_return_t   kr = KERN_SUCCESS;
2310         int             tries = 0;
2311         vm_map_offset_t random_addr = 0;
2312         vm_map_offset_t hole_end;
2313
2314         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2315         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2316         vm_map_size_t   vm_hole_size = 0;
2317         vm_map_size_t   addr_space_size;
2318
2319         addr_space_size = vm_map_max(map) - vm_map_min(map);
2320
2321         assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2322
2323         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2324                 random_addr = ((vm_map_offset_t)random()) << VM_MAP_PAGE_SHIFT(map);
2325                 random_addr = vm_map_trunc_page(
2326                         vm_map_min(map) + (random_addr % addr_space_size),
2327                         VM_MAP_PAGE_MASK(map));
2328
2329                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2330                         if (prev_entry == vm_map_to_entry(map)) {
2331                                 next_entry = vm_map_first_entry(map);
2332                         } else {
2333                                 next_entry = prev_entry->vme_next;
2334                         }
2335                         if (next_entry == vm_map_to_entry(map)) {
2336                                 hole_end = vm_map_max(map);
2337                         } else {
2338                                 hole_end = next_entry->vme_start;
2339                         }
2340                         vm_hole_size = hole_end - random_addr;
2341                         if (vm_hole_size >= size) {
2342                                 *address = random_addr;
2343                                 break;
2344                         }
2345                 }
2346                 tries++;
2347         }
2348
2349         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2350                 kr = KERN_NO_SPACE;
2351         }
2352         return kr;
2353 }
2354
2355 static boolean_t
2356 vm_memory_malloc_no_cow(
2357         int alias)
2358 {
2359         uint64_t alias_mask;
2360
2361         if (alias > 63) {
2362                 return FALSE;
2363         }
2364
2365         alias_mask = 1ULL << alias;
2366         if (alias_mask & vm_memory_malloc_no_cow_mask) {
2367                 return TRUE;
2368         }
2369         return FALSE;
2370 }
2371
2372 /*
2373  *      Routine:        vm_map_enter
2374  *
2375  *      Description:
2376  *              Allocate a range in the specified virtual address map.
2377  *              The resulting range will refer to memory defined by
2378  *              the given memory object and offset into that object.
2379  *
2380  *              Arguments are as defined in the vm_map call.
2381  */
2382 static unsigned int vm_map_enter_restore_successes = 0;
2383 static unsigned int vm_map_enter_restore_failures = 0;
2384 kern_return_t
2385 vm_map_enter(
2386         vm_map_t                map,
2387         vm_map_offset_t         *address,       /* IN/OUT */
2388         vm_map_size_t           size,
2389         vm_map_offset_t         mask,
2390         int                     flags,
2391         vm_map_kernel_flags_t   vmk_flags,
2392         vm_tag_t                alias,
2393         vm_object_t             object,
2394         vm_object_offset_t      offset,
2395         boolean_t               needs_copy,
2396         vm_prot_t               cur_protection,
2397         vm_prot_t               max_protection,
2398         vm_inherit_t            inheritance)
2399 {
2400         vm_map_entry_t          entry, new_entry;
2401         vm_map_offset_t         start, tmp_start, tmp_offset;
2402         vm_map_offset_t         end, tmp_end;
2403         vm_map_offset_t         tmp2_start, tmp2_end;
2404         vm_map_offset_t         desired_empty_end;
2405         vm_map_offset_t         step;
2406         kern_return_t           result = KERN_SUCCESS;
2407         vm_map_t                zap_old_map = VM_MAP_NULL;
2408         vm_map_t                zap_new_map = VM_MAP_NULL;
2409         boolean_t               map_locked = FALSE;
2410         boolean_t               pmap_empty = TRUE;
2411         boolean_t               new_mapping_established = FALSE;
2412         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2413         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2414         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2415         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2416         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2417         boolean_t               is_submap = vmk_flags.vmkf_submap;
2418         boolean_t               permanent = vmk_flags.vmkf_permanent;
2419         boolean_t               no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2420         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2421         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2422         boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2423         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2424         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2425         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2426         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2427         vm_tag_t                user_alias;
2428         vm_map_offset_t         effective_min_offset, effective_max_offset;
2429         kern_return_t           kr;
2430         boolean_t               clear_map_aligned = FALSE;
2431         vm_map_entry_t          hole_entry;
2432         vm_map_size_t           chunk_size = 0;
2433
2434         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2435
2436         if (flags & VM_FLAGS_4GB_CHUNK) {
2437 #if defined(__LP64__)
2438                 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2439 #else /* __LP64__ */
2440                 chunk_size = ANON_CHUNK_SIZE;
2441 #endif /* __LP64__ */
2442         } else {
2443                 chunk_size = ANON_CHUNK_SIZE;
2444         }
2445
2446         if (superpage_size) {
2447                 switch (superpage_size) {
2448                         /*
2449                          * Note that the current implementation only supports
2450                          * a single size for superpages, SUPERPAGE_SIZE, per
2451                          * architecture. As soon as more sizes are supposed
2452                          * to be supported, SUPERPAGE_SIZE has to be replaced
2453                          * with a lookup of the size depending on superpage_size.
2454                          */
2455 #ifdef __x86_64__
2456                 case SUPERPAGE_SIZE_ANY:
2457                         /* handle it like 2 MB and round up to page size */
2458                         size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2459                         OS_FALLTHROUGH;
2460                 case SUPERPAGE_SIZE_2MB:
2461                         break;
2462 #endif
2463                 default:
2464                         return KERN_INVALID_ARGUMENT;
2465                 }
2466                 mask = SUPERPAGE_SIZE - 1;
2467                 if (size & (SUPERPAGE_SIZE - 1)) {
2468                         return KERN_INVALID_ARGUMENT;
2469                 }
2470                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2471         }
2472
2473
2474         if ((cur_protection & VM_PROT_WRITE) &&
2475             (cur_protection & VM_PROT_EXECUTE) &&
2476 #if XNU_TARGET_OS_OSX
2477             map->pmap != kernel_pmap &&
2478             (cs_process_global_enforcement() ||
2479             (vmk_flags.vmkf_cs_enforcement_override
2480             ? vmk_flags.vmkf_cs_enforcement
2481             : (vm_map_cs_enforcement(map)
2482 #if __arm64__
2483             || !VM_MAP_IS_EXOTIC(map)
2484 #endif /* __arm64__ */
2485             ))) &&
2486 #endif /* XNU_TARGET_OS_OSX */
2487             (VM_MAP_POLICY_WX_FAIL(map) ||
2488             VM_MAP_POLICY_WX_STRIP_X(map)) &&
2489             !entry_for_jit) {
2490                 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2491
2492                 DTRACE_VM3(cs_wx,
2493                     uint64_t, 0,
2494                     uint64_t, 0,
2495                     vm_prot_t, cur_protection);
2496                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2497                     proc_selfpid(),
2498                     (current_task()->bsd_info
2499                     ? proc_name_address(current_task()->bsd_info)
2500                     : "?"),
2501                     __FUNCTION__,
2502                     (vm_protect_wx_fail ? "failing" : "turning off execute"));
2503                 cur_protection &= ~VM_PROT_EXECUTE;
2504                 if (vm_protect_wx_fail) {
2505                         return KERN_PROTECTION_FAILURE;
2506                 }
2507         }
2508
2509         /*
2510          * If the task has requested executable lockdown,
2511          * deny any new executable mapping.
2512          */
2513         if (map->map_disallow_new_exec == TRUE) {
2514                 if (cur_protection & VM_PROT_EXECUTE) {
2515                         return KERN_PROTECTION_FAILURE;
2516                 }
2517         }
2518
2519         if (resilient_codesign) {
2520                 assert(!is_submap);
2521                 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
2522                 if ((cur_protection | max_protection) & reject_prot) {
2523                         return KERN_PROTECTION_FAILURE;
2524                 }
2525         }
2526
2527         if (resilient_media) {
2528                 assert(!is_submap);
2529 //              assert(!needs_copy);
2530                 if (object != VM_OBJECT_NULL &&
2531                     !object->internal) {
2532                         /*
2533                          * This mapping is directly backed by an external
2534                          * memory manager (e.g. a vnode pager for a file):
2535                          * we would not have any safe place to inject
2536                          * a zero-filled page if an actual page is not
2537                          * available, without possibly impacting the actual
2538                          * contents of the mapped object (e.g. the file),
2539                          * so we can't provide any media resiliency here.
2540                          */
2541                         return KERN_INVALID_ARGUMENT;
2542                 }
2543         }
2544
2545         if (is_submap) {
2546                 if (purgable) {
2547                         /* submaps can not be purgeable */
2548                         return KERN_INVALID_ARGUMENT;
2549                 }
2550                 if (object == VM_OBJECT_NULL) {
2551                         /* submaps can not be created lazily */
2552                         return KERN_INVALID_ARGUMENT;
2553                 }
2554         }
2555         if (vmk_flags.vmkf_already) {
2556                 /*
2557                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2558                  * is already present.  For it to be meaningul, the requested
2559                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2560                  * we shouldn't try and remove what was mapped there first
2561                  * (!VM_FLAGS_OVERWRITE).
2562                  */
2563                 if ((flags & VM_FLAGS_ANYWHERE) ||
2564                     (flags & VM_FLAGS_OVERWRITE)) {
2565                         return KERN_INVALID_ARGUMENT;
2566                 }
2567         }
2568
2569         effective_min_offset = map->min_offset;
2570
2571         if (vmk_flags.vmkf_beyond_max) {
2572                 /*
2573                  * Allow an insertion beyond the map's max offset.
2574                  */
2575 #if     !defined(__arm__)
2576                 if (vm_map_is_64bit(map)) {
2577                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2578                 } else
2579 #endif  /* __arm__ */
2580                 effective_max_offset = 0x00000000FFFFF000ULL;
2581         } else {
2582 #if XNU_TARGET_OS_OSX
2583                 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2584                         effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2585                 } else {
2586                         effective_max_offset = map->max_offset;
2587                 }
2588 #else /* XNU_TARGET_OS_OSX */
2589                 effective_max_offset = map->max_offset;
2590 #endif /* XNU_TARGET_OS_OSX */
2591         }
2592
2593         if (size == 0 ||
2594             (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2595                 *address = 0;
2596                 return KERN_INVALID_ARGUMENT;
2597         }
2598
2599         if (map->pmap == kernel_pmap) {
2600                 user_alias = VM_KERN_MEMORY_NONE;
2601         } else {
2602                 user_alias = alias;
2603         }
2604
2605         if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2606                 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2607         }
2608
2609 #define RETURN(value)   { result = value; goto BailOut; }
2610
2611         assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2612         assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2613         if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2614                 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2615                 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2616         }
2617
2618         if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2619             !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2620                 /*
2621                  * In most cases, the caller rounds the size up to the
2622                  * map's page size.
2623                  * If we get a size that is explicitly not map-aligned here,
2624                  * we'll have to respect the caller's wish and mark the
2625                  * mapping as "not map-aligned" to avoid tripping the
2626                  * map alignment checks later.
2627                  */
2628                 clear_map_aligned = TRUE;
2629         }
2630         if (!anywhere &&
2631             VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2632             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2633                 /*
2634                  * We've been asked to map at a fixed address and that
2635                  * address is not aligned to the map's specific alignment.
2636                  * The caller should know what it's doing (i.e. most likely
2637                  * mapping some fragmented copy map, transferring memory from
2638                  * a VM map with a different alignment), so clear map_aligned
2639                  * for this new VM map entry and proceed.
2640                  */
2641                 clear_map_aligned = TRUE;
2642         }
2643
2644         /*
2645          * Only zero-fill objects are allowed to be purgable.
2646          * LP64todo - limit purgable objects to 32-bits for now
2647          */
2648         if (purgable &&
2649             (offset != 0 ||
2650             (object != VM_OBJECT_NULL &&
2651             (object->vo_size != size ||
2652             object->purgable == VM_PURGABLE_DENY))
2653             || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2654                 return KERN_INVALID_ARGUMENT;
2655         }
2656
2657         if (!anywhere && overwrite) {
2658                 /*
2659                  * Create a temporary VM map to hold the old mappings in the
2660                  * affected area while we create the new one.
2661                  * This avoids releasing the VM map lock in
2662                  * vm_map_entry_delete() and allows atomicity
2663                  * when we want to replace some mappings with a new one.
2664                  * It also allows us to restore the old VM mappings if the
2665                  * new mapping fails.
2666                  */
2667                 zap_old_map = vm_map_create(PMAP_NULL,
2668                     *address,
2669                     *address + size,
2670                     map->hdr.entries_pageable);
2671                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2672                 vm_map_disable_hole_optimization(zap_old_map);
2673         }
2674
2675 StartAgain:;
2676
2677         start = *address;
2678
2679         if (anywhere) {
2680                 vm_map_lock(map);
2681                 map_locked = TRUE;
2682
2683                 if (entry_for_jit) {
2684                         if (map->jit_entry_exists &&
2685                             !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2686                                 result = KERN_INVALID_ARGUMENT;
2687                                 goto BailOut;
2688                         }
2689                         if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2690                                 random_address = TRUE;
2691                         }
2692                 }
2693
2694                 if (random_address) {
2695                         /*
2696                          * Get a random start address.
2697                          */
2698                         result = vm_map_random_address_for_size(map, address, size);
2699                         if (result != KERN_SUCCESS) {
2700                                 goto BailOut;
2701                         }
2702                         start = *address;
2703                 }
2704 #if XNU_TARGET_OS_OSX
2705                 else if ((start == 0 || start == vm_map_min(map)) &&
2706                     !map->disable_vmentry_reuse &&
2707                     map->vmmap_high_start != 0) {
2708                         start = map->vmmap_high_start;
2709                 }
2710 #endif /* XNU_TARGET_OS_OSX */
2711
2712
2713                 /*
2714                  *      Calculate the first possible address.
2715                  */
2716
2717                 if (start < effective_min_offset) {
2718                         start = effective_min_offset;
2719                 }
2720                 if (start > effective_max_offset) {
2721                         RETURN(KERN_NO_SPACE);
2722                 }
2723
2724                 /*
2725                  *      Look for the first possible address;
2726                  *      if there's already something at this
2727                  *      address, we have to start after it.
2728                  */
2729
2730                 if (map->disable_vmentry_reuse == TRUE) {
2731                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2732                 } else {
2733                         if (map->holelistenabled) {
2734                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2735
2736                                 if (hole_entry == NULL) {
2737                                         /*
2738                                          * No more space in the map?
2739                                          */
2740                                         result = KERN_NO_SPACE;
2741                                         goto BailOut;
2742                                 } else {
2743                                         boolean_t found_hole = FALSE;
2744
2745                                         do {
2746                                                 if (hole_entry->vme_start >= start) {
2747                                                         start = hole_entry->vme_start;
2748                                                         found_hole = TRUE;
2749                                                         break;
2750                                                 }
2751
2752                                                 if (hole_entry->vme_end > start) {
2753                                                         found_hole = TRUE;
2754                                                         break;
2755                                                 }
2756                                                 hole_entry = hole_entry->vme_next;
2757                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2758
2759                                         if (found_hole == FALSE) {
2760                                                 result = KERN_NO_SPACE;
2761                                                 goto BailOut;
2762                                         }
2763
2764                                         entry = hole_entry;
2765
2766                                         if (start == 0) {
2767                                                 start += PAGE_SIZE_64;
2768                                         }
2769                                 }
2770                         } else {
2771                                 assert(first_free_is_valid(map));
2772
2773                                 entry = map->first_free;
2774
2775                                 if (entry == vm_map_to_entry(map)) {
2776                                         entry = NULL;
2777                                 } else {
2778                                         if (entry->vme_next == vm_map_to_entry(map)) {
2779                                                 /*
2780                                                  * Hole at the end of the map.
2781                                                  */
2782                                                 entry = NULL;
2783                                         } else {
2784                                                 if (start < (entry->vme_next)->vme_start) {
2785                                                         start = entry->vme_end;
2786                                                         start = vm_map_round_page(start,
2787                                                             VM_MAP_PAGE_MASK(map));
2788                                                 } else {
2789                                                         /*
2790                                                          * Need to do a lookup.
2791                                                          */
2792                                                         entry = NULL;
2793                                                 }
2794                                         }
2795                                 }
2796
2797                                 if (entry == NULL) {
2798                                         vm_map_entry_t  tmp_entry;
2799                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2800                                                 assert(!entry_for_jit);
2801                                                 start = tmp_entry->vme_end;
2802                                                 start = vm_map_round_page(start,
2803                                                     VM_MAP_PAGE_MASK(map));
2804                                         }
2805                                         entry = tmp_entry;
2806                                 }
2807                         }
2808                 }
2809
2810                 /*
2811                  *      In any case, the "entry" always precedes
2812                  *      the proposed new region throughout the
2813                  *      loop:
2814                  */
2815
2816                 while (TRUE) {
2817                         vm_map_entry_t  next;
2818
2819                         /*
2820                          *      Find the end of the proposed new region.
2821                          *      Be sure we didn't go beyond the end, or
2822                          *      wrap around the address.
2823                          */
2824
2825                         end = ((start + mask) & ~mask);
2826                         end = vm_map_round_page(end,
2827                             VM_MAP_PAGE_MASK(map));
2828                         if (end < start) {
2829                                 RETURN(KERN_NO_SPACE);
2830                         }
2831                         start = end;
2832                         assert(VM_MAP_PAGE_ALIGNED(start,
2833                             VM_MAP_PAGE_MASK(map)));
2834                         end += size;
2835
2836                         /* We want an entire page of empty space, but don't increase the allocation size. */
2837                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2838
2839                         if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2840                                 if (map->wait_for_space) {
2841                                         assert(!keep_map_locked);
2842                                         if (size <= (effective_max_offset -
2843                                             effective_min_offset)) {
2844                                                 assert_wait((event_t)map,
2845                                                     THREAD_ABORTSAFE);
2846                                                 vm_map_unlock(map);
2847                                                 map_locked = FALSE;
2848                                                 thread_block(THREAD_CONTINUE_NULL);
2849                                                 goto StartAgain;
2850                                         }
2851                                 }
2852                                 RETURN(KERN_NO_SPACE);
2853                         }
2854
2855                         next = entry->vme_next;
2856
2857                         if (map->holelistenabled) {
2858                                 if (entry->vme_end >= desired_empty_end) {
2859                                         break;
2860                                 }
2861                         } else {
2862                                 /*
2863                                  *      If there are no more entries, we must win.
2864                                  *
2865                                  *      OR
2866                                  *
2867                                  *      If there is another entry, it must be
2868                                  *      after the end of the potential new region.
2869                                  */
2870
2871                                 if (next == vm_map_to_entry(map)) {
2872                                         break;
2873                                 }
2874
2875                                 if (next->vme_start >= desired_empty_end) {
2876                                         break;
2877                                 }
2878                         }
2879
2880                         /*
2881                          *      Didn't fit -- move to the next entry.
2882                          */
2883
2884                         entry = next;
2885
2886                         if (map->holelistenabled) {
2887                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2888                                         /*
2889                                          * Wrapped around
2890                                          */
2891                                         result = KERN_NO_SPACE;
2892                                         goto BailOut;
2893                                 }
2894                                 start = entry->vme_start;
2895                         } else {
2896                                 start = entry->vme_end;
2897                         }
2898
2899                         start = vm_map_round_page(start,
2900                             VM_MAP_PAGE_MASK(map));
2901                 }
2902
2903                 if (map->holelistenabled) {
2904                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2905                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2906                         }
2907                 }
2908
2909                 *address = start;
2910                 assert(VM_MAP_PAGE_ALIGNED(*address,
2911                     VM_MAP_PAGE_MASK(map)));
2912         } else {
2913                 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2914                     !overwrite &&
2915                     user_alias == VM_MEMORY_REALLOC) {
2916                         /*
2917                          * Force realloc() to switch to a new allocation,
2918                          * to prevent 4k-fragmented virtual ranges.
2919                          */
2920 //                      DEBUG4K_ERROR("no realloc in place");
2921                         return KERN_NO_SPACE;
2922                 }
2923
2924                 /*
2925                  *      Verify that:
2926                  *              the address doesn't itself violate
2927                  *              the mask requirement.
2928                  */
2929
2930                 vm_map_lock(map);
2931                 map_locked = TRUE;
2932                 if ((start & mask) != 0) {
2933                         RETURN(KERN_NO_SPACE);
2934                 }
2935
2936                 /*
2937                  *      ...     the address is within bounds
2938                  */
2939
2940                 end = start + size;
2941
2942                 if ((start < effective_min_offset) ||
2943                     (end > effective_max_offset) ||
2944                     (start >= end)) {
2945                         RETURN(KERN_INVALID_ADDRESS);
2946                 }
2947
2948                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2949                         int remove_flags;
2950                         /*
2951                          * Fixed mapping and "overwrite" flag: attempt to
2952                          * remove all existing mappings in the specified
2953                          * address range, saving them in our "zap_old_map".
2954                          */
2955                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2956                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2957                         if (vmk_flags.vmkf_overwrite_immutable) {
2958                                 /* we can overwrite immutable mappings */
2959                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2960                         }
2961                         (void) vm_map_delete(map, start, end,
2962                             remove_flags,
2963                             zap_old_map);
2964                 }
2965
2966                 /*
2967                  *      ...     the starting address isn't allocated
2968                  */
2969
2970                 if (vm_map_lookup_entry(map, start, &entry)) {
2971                         if (!(vmk_flags.vmkf_already)) {
2972                                 RETURN(KERN_NO_SPACE);
2973                         }
2974                         /*
2975                          * Check if what's already there is what we want.
2976                          */
2977                         tmp_start = start;
2978                         tmp_offset = offset;
2979                         if (entry->vme_start < start) {
2980                                 tmp_start -= start - entry->vme_start;
2981                                 tmp_offset -= start - entry->vme_start;
2982                         }
2983                         for (; entry->vme_start < end;
2984                             entry = entry->vme_next) {
2985                                 /*
2986                                  * Check if the mapping's attributes
2987                                  * match the existing map entry.
2988                                  */
2989                                 if (entry == vm_map_to_entry(map) ||
2990                                     entry->vme_start != tmp_start ||
2991                                     entry->is_sub_map != is_submap ||
2992                                     VME_OFFSET(entry) != tmp_offset ||
2993                                     entry->needs_copy != needs_copy ||
2994                                     entry->protection != cur_protection ||
2995                                     entry->max_protection != max_protection ||
2996                                     entry->inheritance != inheritance ||
2997                                     entry->iokit_acct != iokit_acct ||
2998                                     VME_ALIAS(entry) != alias) {
2999                                         /* not the same mapping ! */
3000                                         RETURN(KERN_NO_SPACE);
3001                                 }
3002                                 /*
3003                                  * Check if the same object is being mapped.
3004                                  */
3005                                 if (is_submap) {
3006                                         if (VME_SUBMAP(entry) !=
3007                                             (vm_map_t) object) {
3008                                                 /* not the same submap */
3009                                                 RETURN(KERN_NO_SPACE);
3010                                         }
3011                                 } else {
3012                                         if (VME_OBJECT(entry) != object) {
3013                                                 /* not the same VM object... */
3014                                                 vm_object_t obj2;
3015
3016                                                 obj2 = VME_OBJECT(entry);
3017                                                 if ((obj2 == VM_OBJECT_NULL ||
3018                                                     obj2->internal) &&
3019                                                     (object == VM_OBJECT_NULL ||
3020                                                     object->internal)) {
3021                                                         /*
3022                                                          * ... but both are
3023                                                          * anonymous memory,
3024                                                          * so equivalent.
3025                                                          */
3026                                                 } else {
3027                                                         RETURN(KERN_NO_SPACE);
3028                                                 }
3029                                         }
3030                                 }
3031
3032                                 tmp_offset += entry->vme_end - entry->vme_start;
3033                                 tmp_start += entry->vme_end - entry->vme_start;
3034                                 if (entry->vme_end >= end) {
3035                                         /* reached the end of our mapping */
3036                                         break;
3037                                 }
3038                         }
3039                         /* it all matches:  let's use what's already there ! */
3040                         RETURN(KERN_MEMORY_PRESENT);
3041                 }
3042
3043                 /*
3044                  *      ...     the next region doesn't overlap the
3045                  *              end point.
3046                  */
3047
3048                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3049                     (entry->vme_next->vme_start < end)) {
3050                         RETURN(KERN_NO_SPACE);
3051                 }
3052         }
3053
3054         /*
3055          *      At this point,
3056          *              "start" and "end" should define the endpoints of the
3057          *                      available new range, and
3058          *              "entry" should refer to the region before the new
3059          *                      range, and
3060          *
3061          *              the map should be locked.
3062          */
3063
3064         /*
3065          *      See whether we can avoid creating a new entry (and object) by
3066          *      extending one of our neighbors.  [So far, we only attempt to
3067          *      extend from below.]  Note that we can never extend/join
3068          *      purgable objects because they need to remain distinct
3069          *      entities in order to implement their "volatile object"
3070          *      semantics.
3071          */
3072
3073         if (purgable ||
3074             entry_for_jit ||
3075             vm_memory_malloc_no_cow(user_alias)) {
3076                 if (object == VM_OBJECT_NULL) {
3077                         object = vm_object_allocate(size);
3078                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3079                         object->true_share = FALSE;
3080                         if (purgable) {
3081                                 task_t owner;
3082                                 object->purgable = VM_PURGABLE_NONVOLATILE;
3083                                 if (map->pmap == kernel_pmap) {
3084                                         /*
3085                                          * Purgeable mappings made in a kernel
3086                                          * map are "owned" by the kernel itself
3087                                          * rather than the current user task
3088                                          * because they're likely to be used by
3089                                          * more than this user task (see
3090                                          * execargs_purgeable_allocate(), for
3091                                          * example).
3092                                          */
3093                                         owner = kernel_task;
3094                                 } else {
3095                                         owner = current_task();
3096                                 }
3097                                 assert(object->vo_owner == NULL);
3098                                 assert(object->resident_page_count == 0);
3099                                 assert(object->wired_page_count == 0);
3100                                 vm_object_lock(object);
3101                                 vm_purgeable_nonvolatile_enqueue(object, owner);
3102                                 vm_object_unlock(object);
3103                         }
3104                         offset = (vm_object_offset_t)0;
3105                 }
3106         } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
3107                 /* no coalescing if address space uses sub-pages */
3108         } else if ((is_submap == FALSE) &&
3109             (object == VM_OBJECT_NULL) &&
3110             (entry != vm_map_to_entry(map)) &&
3111             (entry->vme_end == start) &&
3112             (!entry->is_shared) &&
3113             (!entry->is_sub_map) &&
3114             (!entry->in_transition) &&
3115             (!entry->needs_wakeup) &&
3116             (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
3117             (entry->protection == cur_protection) &&
3118             (entry->max_protection == max_protection) &&
3119             (entry->inheritance == inheritance) &&
3120             ((user_alias == VM_MEMORY_REALLOC) ||
3121             (VME_ALIAS(entry) == alias)) &&
3122             (entry->no_cache == no_cache) &&
3123             (entry->permanent == permanent) &&
3124             /* no coalescing for immutable executable mappings */
3125             !((entry->protection & VM_PROT_EXECUTE) &&
3126             entry->permanent) &&
3127             (!entry->superpage_size && !superpage_size) &&
3128             /*
3129              * No coalescing if not map-aligned, to avoid propagating
3130              * that condition any further than needed:
3131              */
3132             (!entry->map_aligned || !clear_map_aligned) &&
3133             (!entry->zero_wired_pages) &&
3134             (!entry->used_for_jit && !entry_for_jit) &&
3135             (!entry->pmap_cs_associated) &&
3136             (entry->iokit_acct == iokit_acct) &&
3137             (!entry->vme_resilient_codesign) &&
3138             (!entry->vme_resilient_media) &&
3139             (!entry->vme_atomic) &&
3140             (entry->vme_no_copy_on_read == no_copy_on_read) &&
3141
3142             ((entry->vme_end - entry->vme_start) + size <=
3143             (user_alias == VM_MEMORY_REALLOC ?
3144             ANON_CHUNK_SIZE :
3145             NO_COALESCE_LIMIT)) &&
3146
3147             (entry->wired_count == 0)) {        /* implies user_wired_count == 0 */
3148                 if (vm_object_coalesce(VME_OBJECT(entry),
3149                     VM_OBJECT_NULL,
3150                     VME_OFFSET(entry),
3151                     (vm_object_offset_t) 0,
3152                     (vm_map_size_t)(entry->vme_end - entry->vme_start),
3153                     (vm_map_size_t)(end - entry->vme_end))) {
3154                         /*
3155                          *      Coalesced the two objects - can extend
3156                          *      the previous map entry to include the
3157                          *      new range.
3158                          */
3159                         map->size += (end - entry->vme_end);
3160                         assert(entry->vme_start < end);
3161                         assert(VM_MAP_PAGE_ALIGNED(end,
3162                             VM_MAP_PAGE_MASK(map)));
3163                         if (__improbable(vm_debug_events)) {
3164                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
3165                         }
3166                         entry->vme_end = end;
3167                         if (map->holelistenabled) {
3168                                 vm_map_store_update_first_free(map, entry, TRUE);
3169                         } else {
3170                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
3171                         }
3172                         new_mapping_established = TRUE;
3173                         RETURN(KERN_SUCCESS);
3174                 }
3175         }
3176
3177         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
3178         new_entry = NULL;
3179
3180         for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
3181                 tmp2_end = tmp2_start + step;
3182                 /*
3183                  *      Create a new entry
3184                  *
3185                  * XXX FBDP
3186                  * The reserved "page zero" in each process's address space can
3187                  * be arbitrarily large.  Splitting it into separate objects and
3188                  * therefore different VM map entries serves no purpose and just
3189                  * slows down operations on the VM map, so let's not split the
3190                  * allocation into chunks if the max protection is NONE.  That
3191                  * memory should never be accessible, so it will never get to the
3192                  * default pager.
3193                  */
3194                 tmp_start = tmp2_start;
3195                 if (object == VM_OBJECT_NULL &&
3196                     size > chunk_size &&
3197                     max_protection != VM_PROT_NONE &&
3198                     superpage_size == 0) {
3199                         tmp_end = tmp_start + chunk_size;
3200                 } else {
3201                         tmp_end = tmp2_end;
3202                 }
3203                 do {
3204                         new_entry = vm_map_entry_insert(map,
3205                             entry, tmp_start, tmp_end,
3206                             object, offset, needs_copy,
3207                             FALSE, FALSE,
3208                             cur_protection, max_protection,
3209                             VM_BEHAVIOR_DEFAULT,
3210                             (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3211                             VM_INHERIT_NONE : inheritance),
3212                             0,
3213                             no_cache,
3214                             permanent,
3215                             no_copy_on_read,
3216                             superpage_size,
3217                             clear_map_aligned,
3218                             is_submap,
3219                             entry_for_jit,
3220                             alias,
3221                             translated_allow_execute);
3222
3223                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3224
3225                         if (resilient_codesign) {
3226                                 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
3227                                 if (!((cur_protection | max_protection) & reject_prot)) {
3228                                         new_entry->vme_resilient_codesign = TRUE;
3229                                 }
3230                         }
3231
3232                         if (resilient_media &&
3233                             (object == VM_OBJECT_NULL ||
3234                             object->internal)) {
3235                                 new_entry->vme_resilient_media = TRUE;
3236                         }
3237
3238                         assert(!new_entry->iokit_acct);
3239                         if (!is_submap &&
3240                             object != VM_OBJECT_NULL &&
3241                             (object->purgable != VM_PURGABLE_DENY ||
3242                             object->vo_ledger_tag)) {
3243                                 assert(new_entry->use_pmap);
3244                                 assert(!new_entry->iokit_acct);
3245                                 /*
3246                                  * Turn off pmap accounting since
3247                                  * purgeable (or tagged) objects have their
3248                                  * own ledgers.
3249                                  */
3250                                 new_entry->use_pmap = FALSE;
3251                         } else if (!is_submap &&
3252                             iokit_acct &&
3253                             object != VM_OBJECT_NULL &&
3254                             object->internal) {
3255                                 /* alternate accounting */
3256                                 assert(!new_entry->iokit_acct);
3257                                 assert(new_entry->use_pmap);
3258                                 new_entry->iokit_acct = TRUE;
3259                                 new_entry->use_pmap = FALSE;
3260                                 DTRACE_VM4(
3261                                         vm_map_iokit_mapped_region,
3262                                         vm_map_t, map,
3263                                         vm_map_offset_t, new_entry->vme_start,
3264                                         vm_map_offset_t, new_entry->vme_end,
3265                                         int, VME_ALIAS(new_entry));
3266                                 vm_map_iokit_mapped_region(
3267                                         map,
3268                                         (new_entry->vme_end -
3269                                         new_entry->vme_start));
3270                         } else if (!is_submap) {
3271                                 assert(!new_entry->iokit_acct);
3272                                 assert(new_entry->use_pmap);
3273                         }
3274
3275                         if (is_submap) {
3276                                 vm_map_t        submap;
3277                                 boolean_t       submap_is_64bit;
3278                                 boolean_t       use_pmap;
3279
3280                                 assert(new_entry->is_sub_map);
3281                                 assert(!new_entry->use_pmap);
3282                                 assert(!new_entry->iokit_acct);
3283                                 submap = (vm_map_t) object;
3284                                 submap_is_64bit = vm_map_is_64bit(submap);
3285                                 use_pmap = vmk_flags.vmkf_nested_pmap;
3286 #ifndef NO_NESTED_PMAP
3287                                 if (use_pmap && submap->pmap == NULL) {
3288                                         ledger_t ledger = map->pmap->ledger;
3289                                         /* we need a sub pmap to nest... */
3290                                         submap->pmap = pmap_create_options(ledger, 0,
3291                                             submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3292                                         if (submap->pmap == NULL) {
3293                                                 /* let's proceed without nesting... */
3294                                         }
3295 #if     defined(__arm__) || defined(__arm64__)
3296                                         else {
3297                                                 pmap_set_nested(submap->pmap);
3298                                         }
3299 #endif
3300                                 }
3301                                 if (use_pmap && submap->pmap != NULL) {
3302                                         if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3303                                                 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3304                                                 kr = KERN_FAILURE;
3305                                         } else {
3306                                                 kr = pmap_nest(map->pmap,
3307                                                     submap->pmap,
3308                                                     tmp_start,
3309                                                     tmp_end - tmp_start);
3310                                         }
3311                                         if (kr != KERN_SUCCESS) {
3312                                                 printf("vm_map_enter: "
3313                                                     "pmap_nest(0x%llx,0x%llx) "
3314                                                     "error 0x%x\n",
3315                                                     (long long)tmp_start,
3316                                                     (long long)tmp_end,
3317                                                     kr);
3318                                         } else {
3319                                                 /* we're now nested ! */
3320                                                 new_entry->use_pmap = TRUE;
3321                                                 pmap_empty = FALSE;
3322                                         }
3323                                 }
3324 #endif /* NO_NESTED_PMAP */
3325                         }
3326                         entry = new_entry;
3327
3328                         if (superpage_size) {
3329                                 vm_page_t pages, m;
3330                                 vm_object_t sp_object;
3331                                 vm_object_offset_t sp_offset;
3332
3333                                 VME_OFFSET_SET(entry, 0);
3334
3335                                 /* allocate one superpage */
3336                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3337                                 if (kr != KERN_SUCCESS) {
3338                                         /* deallocate whole range... */
3339                                         new_mapping_established = TRUE;
3340                                         /* ... but only up to "tmp_end" */
3341                                         size -= end - tmp_end;
3342                                         RETURN(kr);
3343                                 }
3344
3345                                 /* create one vm_object per superpage */
3346                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3347                                 sp_object->phys_contiguous = TRUE;
3348                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3349                                 VME_OBJECT_SET(entry, sp_object);
3350                                 assert(entry->use_pmap);
3351
3352                                 /* enter the base pages into the object */
3353                                 vm_object_lock(sp_object);
3354                                 for (sp_offset = 0;
3355                                     sp_offset < SUPERPAGE_SIZE;
3356                                     sp_offset += PAGE_SIZE) {
3357                                         m = pages;
3358                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3359                                         pages = NEXT_PAGE(m);
3360                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3361                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3362                                 }
3363                                 vm_object_unlock(sp_object);
3364                         }
3365                 } while (tmp_end != tmp2_end &&
3366                     (tmp_start = tmp_end) &&
3367                     (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3368                     tmp_end + chunk_size : tmp2_end));
3369         }
3370
3371         new_mapping_established = TRUE;
3372
3373 BailOut:
3374         assert(map_locked == TRUE);
3375
3376         if (result == KERN_SUCCESS) {
3377                 vm_prot_t pager_prot;
3378                 memory_object_t pager;
3379
3380 #if DEBUG
3381                 if (pmap_empty &&
3382                     !(vmk_flags.vmkf_no_pmap_check)) {
3383                         assert(vm_map_pmap_is_empty(map,
3384                             *address,
3385                             *address + size));
3386                 }
3387 #endif /* DEBUG */
3388
3389                 /*
3390                  * For "named" VM objects, let the pager know that the
3391                  * memory object is being mapped.  Some pagers need to keep
3392                  * track of this, to know when they can reclaim the memory
3393                  * object, for example.
3394                  * VM calls memory_object_map() for each mapping (specifying
3395                  * the protection of each mapping) and calls
3396                  * memory_object_last_unmap() when all the mappings are gone.
3397                  */
3398                 pager_prot = max_protection;
3399                 if (needs_copy) {
3400                         /*
3401                          * Copy-On-Write mapping: won't modify
3402                          * the memory object.
3403                          */
3404                         pager_prot &= ~VM_PROT_WRITE;
3405                 }
3406                 if (!is_submap &&
3407                     object != VM_OBJECT_NULL &&
3408                     object->named &&
3409                     object->pager != MEMORY_OBJECT_NULL) {
3410                         vm_object_lock(object);
3411                         pager = object->pager;
3412                         if (object->named &&
3413                             pager != MEMORY_OBJECT_NULL) {
3414                                 assert(object->pager_ready);
3415                                 vm_object_mapping_wait(object, THREAD_UNINT);
3416                                 vm_object_mapping_begin(object);
3417                                 vm_object_unlock(object);
3418
3419                                 kr = memory_object_map(pager, pager_prot);
3420                                 assert(kr == KERN_SUCCESS);
3421
3422                                 vm_object_lock(object);
3423                                 vm_object_mapping_end(object);
3424                         }
3425                         vm_object_unlock(object);
3426                 }
3427         }
3428
3429         assert(map_locked == TRUE);
3430
3431         if (!keep_map_locked) {
3432                 vm_map_unlock(map);
3433                 map_locked = FALSE;
3434         }
3435
3436         /*
3437          * We can't hold the map lock if we enter this block.
3438          */
3439
3440         if (result == KERN_SUCCESS) {
3441                 /*      Wire down the new entry if the user
3442                  *      requested all new map entries be wired.
3443                  */
3444                 if ((map->wiring_required) || (superpage_size)) {
3445                         assert(!keep_map_locked);
3446                         pmap_empty = FALSE; /* pmap won't be empty */
3447                         kr = vm_map_wire_kernel(map, start, end,
3448                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
3449                             TRUE);
3450                         result = kr;
3451                 }
3452
3453         }
3454
3455         if (result != KERN_SUCCESS) {
3456                 if (new_mapping_established) {
3457                         /*
3458                          * We have to get rid of the new mappings since we
3459                          * won't make them available to the user.
3460                          * Try and do that atomically, to minimize the risk
3461                          * that someone else create new mappings that range.
3462                          */
3463                         zap_new_map = vm_map_create(PMAP_NULL,
3464                             *address,
3465                             *address + size,
3466                             map->hdr.entries_pageable);
3467                         vm_map_set_page_shift(zap_new_map,
3468                             VM_MAP_PAGE_SHIFT(map));
3469                         vm_map_disable_hole_optimization(zap_new_map);
3470
3471                         if (!map_locked) {
3472                                 vm_map_lock(map);
3473                                 map_locked = TRUE;
3474                         }
3475                         (void) vm_map_delete(map, *address, *address + size,
3476                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3477                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3478                             zap_new_map);
3479                 }
3480                 if (zap_old_map != VM_MAP_NULL &&
3481                     zap_old_map->hdr.nentries != 0) {
3482                         vm_map_entry_t  entry1, entry2;
3483
3484                         /*
3485                          * The new mapping failed.  Attempt to restore
3486                          * the old mappings, saved in the "zap_old_map".
3487                          */
3488                         if (!map_locked) {
3489                                 vm_map_lock(map);
3490                                 map_locked = TRUE;
3491                         }
3492
3493                         /* first check if the coast is still clear */
3494                         start = vm_map_first_entry(zap_old_map)->vme_start;
3495                         end = vm_map_last_entry(zap_old_map)->vme_end;
3496                         if (vm_map_lookup_entry(map, start, &entry1) ||
3497                             vm_map_lookup_entry(map, end, &entry2) ||
3498                             entry1 != entry2) {
3499                                 /*
3500                                  * Part of that range has already been
3501                                  * re-mapped:  we can't restore the old
3502                                  * mappings...
3503                                  */
3504                                 vm_map_enter_restore_failures++;
3505                         } else {
3506                                 /*
3507                                  * Transfer the saved map entries from
3508                                  * "zap_old_map" to the original "map",
3509                                  * inserting them all after "entry1".
3510                                  */
3511                                 for (entry2 = vm_map_first_entry(zap_old_map);
3512                                     entry2 != vm_map_to_entry(zap_old_map);
3513                                     entry2 = vm_map_first_entry(zap_old_map)) {
3514                                         vm_map_size_t entry_size;
3515
3516                                         entry_size = (entry2->vme_end -
3517                                             entry2->vme_start);
3518                                         vm_map_store_entry_unlink(zap_old_map,
3519                                             entry2);
3520                                         zap_old_map->size -= entry_size;
3521                                         vm_map_store_entry_link(map, entry1, entry2,
3522                                             VM_MAP_KERNEL_FLAGS_NONE);
3523                                         map->size += entry_size;
3524                                         entry1 = entry2;
3525                                 }
3526                                 if (map->wiring_required) {
3527                                         /*
3528                                          * XXX TODO: we should rewire the
3529                                          * old pages here...
3530                                          */
3531                                 }
3532                                 vm_map_enter_restore_successes++;
3533                         }
3534                 }
3535         }
3536
3537         /*
3538          * The caller is responsible for releasing the lock if it requested to
3539          * keep the map locked.
3540          */
3541         if (map_locked && !keep_map_locked) {
3542                 vm_map_unlock(map);
3543         }
3544
3545         /*
3546          * Get rid of the "zap_maps" and all the map entries that
3547          * they may still contain.
3548          */
3549         if (zap_old_map != VM_MAP_NULL) {
3550                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3551                 zap_old_map = VM_MAP_NULL;
3552         }
3553         if (zap_new_map != VM_MAP_NULL) {
3554                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3555                 zap_new_map = VM_MAP_NULL;
3556         }
3557
3558         return result;
3559
3560 #undef  RETURN
3561 }
3562
3563 #if __arm64__
3564 extern const struct memory_object_pager_ops fourk_pager_ops;
3565 kern_return_t
3566 vm_map_enter_fourk(
3567         vm_map_t                map,
3568         vm_map_offset_t         *address,       /* IN/OUT */
3569         vm_map_size_t           size,
3570         vm_map_offset_t         mask,
3571         int                     flags,
3572         vm_map_kernel_flags_t   vmk_flags,
3573         vm_tag_t                alias,
3574         vm_object_t             object,
3575         vm_object_offset_t      offset,
3576         boolean_t               needs_copy,
3577         vm_prot_t               cur_protection,
3578         vm_prot_t               max_protection,
3579         vm_inherit_t            inheritance)
3580 {
3581         vm_map_entry_t          entry, new_entry;
3582         vm_map_offset_t         start, fourk_start;
3583         vm_map_offset_t         end, fourk_end;
3584         vm_map_size_t           fourk_size;
3585         kern_return_t           result = KERN_SUCCESS;
3586         vm_map_t                zap_old_map = VM_MAP_NULL;
3587         vm_map_t                zap_new_map = VM_MAP_NULL;
3588         boolean_t               map_locked = FALSE;
3589         boolean_t               pmap_empty = TRUE;
3590         boolean_t               new_mapping_established = FALSE;
3591         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3592         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3593         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3594         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3595         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3596         boolean_t               is_submap = vmk_flags.vmkf_submap;
3597         boolean_t               permanent = vmk_flags.vmkf_permanent;
3598         boolean_t               no_copy_on_read = vmk_flags.vmkf_permanent;
3599         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3600 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3601         boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3602         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3603         vm_map_offset_t         effective_min_offset, effective_max_offset;
3604         kern_return_t           kr;
3605         boolean_t               clear_map_aligned = FALSE;
3606         memory_object_t         fourk_mem_obj;
3607         vm_object_t             fourk_object;
3608         vm_map_offset_t         fourk_pager_offset;
3609         int                     fourk_pager_index_start, fourk_pager_index_num;
3610         int                     cur_idx;
3611         boolean_t               fourk_copy;
3612         vm_object_t             copy_object;
3613         vm_object_offset_t      copy_offset;
3614
3615         if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3616                 panic("%s:%d\n", __FUNCTION__, __LINE__);
3617         }
3618         fourk_mem_obj = MEMORY_OBJECT_NULL;
3619         fourk_object = VM_OBJECT_NULL;
3620
3621         if (superpage_size) {
3622                 return KERN_NOT_SUPPORTED;
3623         }
3624
3625         if ((cur_protection & VM_PROT_WRITE) &&
3626             (cur_protection & VM_PROT_EXECUTE) &&
3627 #if XNU_TARGET_OS_OSX
3628             map->pmap != kernel_pmap &&
3629             (vm_map_cs_enforcement(map)
3630 #if __arm64__
3631             || !VM_MAP_IS_EXOTIC(map)
3632 #endif /* __arm64__ */
3633             ) &&
3634 #endif /* XNU_TARGET_OS_OSX */
3635             !entry_for_jit) {
3636                 DTRACE_VM3(cs_wx,
3637                     uint64_t, 0,
3638                     uint64_t, 0,
3639                     vm_prot_t, cur_protection);
3640                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3641                     "turning off execute\n",
3642                     proc_selfpid(),
3643                     (current_task()->bsd_info
3644                     ? proc_name_address(current_task()->bsd_info)
3645                     : "?"),
3646                     __FUNCTION__);
3647                 cur_protection &= ~VM_PROT_EXECUTE;
3648         }
3649
3650         /*
3651          * If the task has requested executable lockdown,
3652          * deny any new executable mapping.
3653          */
3654         if (map->map_disallow_new_exec == TRUE) {
3655                 if (cur_protection & VM_PROT_EXECUTE) {
3656                         return KERN_PROTECTION_FAILURE;
3657                 }
3658         }
3659
3660         if (is_submap) {
3661                 return KERN_NOT_SUPPORTED;
3662         }
3663         if (vmk_flags.vmkf_already) {
3664                 return KERN_NOT_SUPPORTED;
3665         }
3666         if (purgable || entry_for_jit) {
3667                 return KERN_NOT_SUPPORTED;
3668         }
3669
3670         effective_min_offset = map->min_offset;
3671
3672         if (vmk_flags.vmkf_beyond_max) {
3673                 return KERN_NOT_SUPPORTED;
3674         } else {
3675                 effective_max_offset = map->max_offset;
3676         }
3677
3678         if (size == 0 ||
3679             (offset & FOURK_PAGE_MASK) != 0) {
3680                 *address = 0;
3681                 return KERN_INVALID_ARGUMENT;
3682         }
3683
3684 #define RETURN(value)   { result = value; goto BailOut; }
3685
3686         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3687         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3688
3689         if (!anywhere && overwrite) {
3690                 return KERN_NOT_SUPPORTED;
3691         }
3692         if (!anywhere && overwrite) {
3693                 /*
3694                  * Create a temporary VM map to hold the old mappings in the
3695                  * affected area while we create the new one.
3696                  * This avoids releasing the VM map lock in
3697                  * vm_map_entry_delete() and allows atomicity
3698                  * when we want to replace some mappings with a new one.
3699                  * It also allows us to restore the old VM mappings if the
3700                  * new mapping fails.
3701                  */
3702                 zap_old_map = vm_map_create(PMAP_NULL,
3703                     *address,
3704                     *address + size,
3705                     map->hdr.entries_pageable);
3706                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3707                 vm_map_disable_hole_optimization(zap_old_map);
3708         }
3709
3710         fourk_start = *address;
3711         fourk_size = size;
3712         fourk_end = fourk_start + fourk_size;
3713
3714         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3715         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3716         size = end - start;
3717
3718         if (anywhere) {
3719                 return KERN_NOT_SUPPORTED;
3720         } else {
3721                 /*
3722                  *      Verify that:
3723                  *              the address doesn't itself violate
3724                  *              the mask requirement.
3725                  */
3726
3727                 vm_map_lock(map);
3728                 map_locked = TRUE;
3729                 if ((start & mask) != 0) {
3730                         RETURN(KERN_NO_SPACE);
3731                 }
3732
3733                 /*
3734                  *      ...     the address is within bounds
3735                  */
3736
3737                 end = start + size;
3738
3739                 if ((start < effective_min_offset) ||
3740                     (end > effective_max_offset) ||
3741                     (start >= end)) {
3742                         RETURN(KERN_INVALID_ADDRESS);
3743                 }
3744
3745                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3746                         /*
3747                          * Fixed mapping and "overwrite" flag: attempt to
3748                          * remove all existing mappings in the specified
3749                          * address range, saving them in our "zap_old_map".
3750                          */
3751                         (void) vm_map_delete(map, start, end,
3752                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3753                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3754                             zap_old_map);
3755                 }
3756
3757                 /*
3758                  *      ...     the starting address isn't allocated
3759                  */
3760                 if (vm_map_lookup_entry(map, start, &entry)) {
3761                         vm_object_t cur_object, shadow_object;
3762
3763                         /*
3764                          * We might already some 4K mappings
3765                          * in a 16K page here.
3766                          */
3767
3768                         if (entry->vme_end - entry->vme_start
3769                             != SIXTEENK_PAGE_SIZE) {
3770                                 RETURN(KERN_NO_SPACE);
3771                         }
3772                         if (entry->is_sub_map) {
3773                                 RETURN(KERN_NO_SPACE);
3774                         }
3775                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3776                                 RETURN(KERN_NO_SPACE);
3777                         }
3778
3779                         /* go all the way down the shadow chain */
3780                         cur_object = VME_OBJECT(entry);
3781                         vm_object_lock(cur_object);
3782                         while (cur_object->shadow != VM_OBJECT_NULL) {
3783                                 shadow_object = cur_object->shadow;
3784                                 vm_object_lock(shadow_object);
3785                                 vm_object_unlock(cur_object);
3786                                 cur_object = shadow_object;
3787                                 shadow_object = VM_OBJECT_NULL;
3788                         }
3789                         if (cur_object->internal ||
3790                             cur_object->pager == NULL) {
3791                                 vm_object_unlock(cur_object);
3792                                 RETURN(KERN_NO_SPACE);
3793                         }
3794                         if (cur_object->pager->mo_pager_ops
3795                             != &fourk_pager_ops) {
3796                                 vm_object_unlock(cur_object);
3797                                 RETURN(KERN_NO_SPACE);
3798                         }
3799                         fourk_object = cur_object;
3800                         fourk_mem_obj = fourk_object->pager;
3801
3802                         /* keep the "4K" object alive */
3803                         vm_object_reference_locked(fourk_object);
3804                         memory_object_reference(fourk_mem_obj);
3805                         vm_object_unlock(fourk_object);
3806
3807                         /* merge permissions */
3808                         entry->protection |= cur_protection;
3809                         entry->max_protection |= max_protection;
3810                         if ((entry->protection & (VM_PROT_WRITE |
3811                             VM_PROT_EXECUTE)) ==
3812                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3813                             fourk_binary_compatibility_unsafe &&
3814                             fourk_binary_compatibility_allow_wx) {
3815                                 /* write+execute: need to be "jit" */
3816                                 entry->used_for_jit = TRUE;
3817                         }
3818                         goto map_in_fourk_pager;
3819                 }
3820
3821                 /*
3822                  *      ...     the next region doesn't overlap the
3823                  *              end point.
3824                  */
3825
3826                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3827                     (entry->vme_next->vme_start < end)) {
3828                         RETURN(KERN_NO_SPACE);
3829                 }
3830         }
3831
3832         /*
3833          *      At this point,
3834          *              "start" and "end" should define the endpoints of the
3835          *                      available new range, and
3836          *              "entry" should refer to the region before the new
3837          *                      range, and
3838          *
3839          *              the map should be locked.
3840          */
3841
3842         /* create a new "4K" pager */
3843         fourk_mem_obj = fourk_pager_create();
3844         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3845         assert(fourk_object);
3846
3847         /* keep the "4" object alive */
3848         vm_object_reference(fourk_object);
3849
3850         /* create a "copy" object, to map the "4K" object copy-on-write */
3851         fourk_copy = TRUE;
3852         result = vm_object_copy_strategically(fourk_object,
3853             0,
3854             end - start,
3855             &copy_object,
3856             &copy_offset,
3857             &fourk_copy);
3858         assert(result == KERN_SUCCESS);
3859         assert(copy_object != VM_OBJECT_NULL);
3860         assert(copy_offset == 0);
3861
3862         /* map the "4K" pager's copy object */
3863         new_entry =
3864             vm_map_entry_insert(map, entry,
3865             vm_map_trunc_page(start,
3866             VM_MAP_PAGE_MASK(map)),
3867             vm_map_round_page(end,
3868             VM_MAP_PAGE_MASK(map)),
3869             copy_object,
3870             0,                         /* offset */
3871             FALSE,                         /* needs_copy */
3872             FALSE,
3873             FALSE,
3874             cur_protection, max_protection,
3875             VM_BEHAVIOR_DEFAULT,
3876             (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3877             VM_INHERIT_NONE : inheritance),
3878             0,
3879             no_cache,
3880             permanent,
3881             no_copy_on_read,
3882             superpage_size,
3883             clear_map_aligned,
3884             is_submap,
3885             FALSE,                         /* jit */
3886             alias,
3887             translated_allow_execute);
3888         entry = new_entry;
3889
3890 #if VM_MAP_DEBUG_FOURK
3891         if (vm_map_debug_fourk) {
3892                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3893                     map,
3894                     (uint64_t) entry->vme_start,
3895                     (uint64_t) entry->vme_end,
3896                     fourk_mem_obj);
3897         }
3898 #endif /* VM_MAP_DEBUG_FOURK */
3899
3900         new_mapping_established = TRUE;
3901
3902 map_in_fourk_pager:
3903         /* "map" the original "object" where it belongs in the "4K" pager */
3904         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3905         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3906         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3907                 fourk_pager_index_num = 4;
3908         } else {
3909                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3910         }
3911         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3912                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3913         }
3914         for (cur_idx = 0;
3915             cur_idx < fourk_pager_index_num;
3916             cur_idx++) {
3917                 vm_object_t             old_object;
3918                 vm_object_offset_t      old_offset;
3919
3920                 kr = fourk_pager_populate(fourk_mem_obj,
3921                     TRUE,                       /* overwrite */
3922                     fourk_pager_index_start + cur_idx,
3923                     object,
3924                     (object
3925                     ? (offset +
3926                     (cur_idx * FOURK_PAGE_SIZE))
3927                     : 0),
3928                     &old_object,
3929                     &old_offset);
3930 #if VM_MAP_DEBUG_FOURK
3931                 if (vm_map_debug_fourk) {
3932                         if (old_object == (vm_object_t) -1 &&
3933                             old_offset == (vm_object_offset_t) -1) {
3934                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3935                                     "pager [%p:0x%llx] "
3936                                     "populate[%d] "
3937                                     "[object:%p,offset:0x%llx]\n",
3938                                     map,
3939                                     (uint64_t) entry->vme_start,
3940                                     (uint64_t) entry->vme_end,
3941                                     fourk_mem_obj,
3942                                     VME_OFFSET(entry),
3943                                     fourk_pager_index_start + cur_idx,
3944                                     object,
3945                                     (object
3946                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3947                                     : 0));
3948                         } else {
3949                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3950                                     "pager [%p:0x%llx] "
3951                                     "populate[%d] [object:%p,offset:0x%llx] "
3952                                     "old [%p:0x%llx]\n",
3953                                     map,
3954                                     (uint64_t) entry->vme_start,
3955                                     (uint64_t) entry->vme_end,
3956                                     fourk_mem_obj,
3957                                     VME_OFFSET(entry),
3958                                     fourk_pager_index_start + cur_idx,
3959                                     object,
3960                                     (object
3961                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3962                                     : 0),
3963                                     old_object,
3964                                     old_offset);
3965                         }
3966                 }
3967 #endif /* VM_MAP_DEBUG_FOURK */
3968
3969                 assert(kr == KERN_SUCCESS);
3970                 if (object != old_object &&
3971                     object != VM_OBJECT_NULL &&
3972                     object != (vm_object_t) -1) {
3973                         vm_object_reference(object);
3974                 }
3975                 if (object != old_object &&
3976                     old_object != VM_OBJECT_NULL &&
3977                     old_object != (vm_object_t) -1) {
3978                         vm_object_deallocate(old_object);
3979                 }
3980         }
3981
3982 BailOut:
3983         assert(map_locked == TRUE);
3984
3985         if (result == KERN_SUCCESS) {
3986                 vm_prot_t pager_prot;
3987                 memory_object_t pager;
3988
3989 #if DEBUG
3990                 if (pmap_empty &&
3991                     !(vmk_flags.vmkf_no_pmap_check)) {
3992                         assert(vm_map_pmap_is_empty(map,
3993                             *address,
3994                             *address + size));
3995                 }
3996 #endif /* DEBUG */
3997
3998                 /*
3999                  * For "named" VM objects, let the pager know that the
4000                  * memory object is being mapped.  Some pagers need to keep
4001                  * track of this, to know when they can reclaim the memory
4002                  * object, for example.
4003                  * VM calls memory_object_map() for each mapping (specifying
4004                  * the protection of each mapping) and calls
4005                  * memory_object_last_unmap() when all the mappings are gone.
4006                  */
4007                 pager_prot = max_protection;
4008                 if (needs_copy) {
4009                         /*
4010                          * Copy-On-Write mapping: won't modify
4011                          * the memory object.
4012                          */
4013                         pager_prot &= ~VM_PROT_WRITE;
4014                 }
4015                 if (!is_submap &&
4016                     object != VM_OBJECT_NULL &&
4017                     object->named &&
4018                     object->pager != MEMORY_OBJECT_NULL) {
4019                         vm_object_lock(object);
4020                         pager = object->pager;
4021                         if (object->named &&
4022                             pager != MEMORY_OBJECT_NULL) {
4023                                 assert(object->pager_ready);
4024                                 vm_object_mapping_wait(object, THREAD_UNINT);
4025                                 vm_object_mapping_begin(object);
4026                                 vm_object_unlock(object);
4027
4028                                 kr = memory_object_map(pager, pager_prot);
4029                                 assert(kr == KERN_SUCCESS);
4030
4031                                 vm_object_lock(object);
4032                                 vm_object_mapping_end(object);
4033                         }
4034                         vm_object_unlock(object);
4035                 }
4036                 if (!is_submap &&
4037                     fourk_object != VM_OBJECT_NULL &&
4038                     fourk_object->named &&
4039                     fourk_object->pager != MEMORY_OBJECT_NULL) {
4040                         vm_object_lock(fourk_object);
4041                         pager = fourk_object->pager;
4042                         if (fourk_object->named &&
4043                             pager != MEMORY_OBJECT_NULL) {
4044                                 assert(fourk_object->pager_ready);
4045                                 vm_object_mapping_wait(fourk_object,
4046                                     THREAD_UNINT);
4047                                 vm_object_mapping_begin(fourk_object);
4048                                 vm_object_unlock(fourk_object);
4049
4050                                 kr = memory_object_map(pager, VM_PROT_READ);
4051                                 assert(kr == KERN_SUCCESS);
4052
4053                                 vm_object_lock(fourk_object);
4054                                 vm_object_mapping_end(fourk_object);
4055                         }
4056                         vm_object_unlock(fourk_object);
4057                 }
4058         }
4059
4060         if (fourk_object != VM_OBJECT_NULL) {
4061                 vm_object_deallocate(fourk_object);
4062                 fourk_object = VM_OBJECT_NULL;
4063                 memory_object_deallocate(fourk_mem_obj);
4064                 fourk_mem_obj = MEMORY_OBJECT_NULL;
4065         }
4066
4067         assert(map_locked == TRUE);
4068
4069         if (!keep_map_locked) {
4070                 vm_map_unlock(map);
4071                 map_locked = FALSE;
4072         }
4073
4074         /*
4075          * We can't hold the map lock if we enter this block.
4076          */
4077
4078         if (result == KERN_SUCCESS) {
4079                 /*      Wire down the new entry if the user
4080                  *      requested all new map entries be wired.
4081                  */
4082                 if ((map->wiring_required) || (superpage_size)) {
4083                         assert(!keep_map_locked);
4084                         pmap_empty = FALSE; /* pmap won't be empty */
4085                         kr = vm_map_wire_kernel(map, start, end,
4086                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
4087                             TRUE);
4088                         result = kr;
4089                 }
4090
4091         }
4092
4093         if (result != KERN_SUCCESS) {
4094                 if (new_mapping_established) {
4095                         /*
4096                          * We have to get rid of the new mappings since we
4097                          * won't make them available to the user.
4098                          * Try and do that atomically, to minimize the risk
4099                          * that someone else create new mappings that range.
4100                          */
4101                         zap_new_map = vm_map_create(PMAP_NULL,
4102                             *address,
4103                             *address + size,
4104                             map->hdr.entries_pageable);
4105                         vm_map_set_page_shift(zap_new_map,
4106                             VM_MAP_PAGE_SHIFT(map));
4107                         vm_map_disable_hole_optimization(zap_new_map);
4108
4109                         if (!map_locked) {
4110                                 vm_map_lock(map);
4111                                 map_locked = TRUE;
4112                         }
4113                         (void) vm_map_delete(map, *address, *address + size,
4114                             (VM_MAP_REMOVE_SAVE_ENTRIES |
4115                             VM_MAP_REMOVE_NO_MAP_ALIGN),
4116                             zap_new_map);
4117                 }
4118                 if (zap_old_map != VM_MAP_NULL &&
4119                     zap_old_map->hdr.nentries != 0) {
4120                         vm_map_entry_t  entry1, entry2;
4121
4122                         /*
4123                          * The new mapping failed.  Attempt to restore
4124                          * the old mappings, saved in the "zap_old_map".
4125                          */
4126                         if (!map_locked) {
4127                                 vm_map_lock(map);
4128                                 map_locked = TRUE;
4129                         }
4130
4131                         /* first check if the coast is still clear */
4132                         start = vm_map_first_entry(zap_old_map)->vme_start;
4133                         end = vm_map_last_entry(zap_old_map)->vme_end;
4134                         if (vm_map_lookup_entry(map, start, &entry1) ||
4135                             vm_map_lookup_entry(map, end, &entry2) ||
4136                             entry1 != entry2) {
4137                                 /*
4138                                  * Part of that range has already been
4139                                  * re-mapped:  we can't restore the old
4140                                  * mappings...
4141                                  */
4142                                 vm_map_enter_restore_failures++;
4143                         } else {
4144                                 /*
4145                                  * Transfer the saved map entries from
4146                                  * "zap_old_map" to the original "map",
4147                                  * inserting them all after "entry1".
4148                                  */
4149                                 for (entry2 = vm_map_first_entry(zap_old_map);
4150                                     entry2 != vm_map_to_entry(zap_old_map);
4151                                     entry2 = vm_map_first_entry(zap_old_map)) {
4152                                         vm_map_size_t entry_size;
4153
4154                                         entry_size = (entry2->vme_end -
4155                                             entry2->vme_start);
4156                                         vm_map_store_entry_unlink(zap_old_map,
4157                                             entry2);
4158                                         zap_old_map->size -= entry_size;
4159                                         vm_map_store_entry_link(map, entry1, entry2,
4160                                             VM_MAP_KERNEL_FLAGS_NONE);
4161                                         map->size += entry_size;
4162                                         entry1 = entry2;
4163                                 }
4164                                 if (map->wiring_required) {
4165                                         /*
4166                                          * XXX TODO: we should rewire the
4167                                          * old pages here...
4168                                          */
4169                                 }
4170                                 vm_map_enter_restore_successes++;
4171                         }
4172                 }
4173         }
4174
4175         /*
4176          * The caller is responsible for releasing the lock if it requested to
4177          * keep the map locked.
4178          */
4179         if (map_locked && !keep_map_locked) {
4180                 vm_map_unlock(map);
4181         }
4182
4183         /*
4184          * Get rid of the "zap_maps" and all the map entries that
4185          * they may still contain.
4186          */
4187         if (zap_old_map != VM_MAP_NULL) {
4188                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4189                 zap_old_map = VM_MAP_NULL;
4190         }
4191         if (zap_new_map != VM_MAP_NULL) {
4192                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4193                 zap_new_map = VM_MAP_NULL;
4194         }
4195
4196         return result;
4197
4198 #undef  RETURN
4199 }
4200 #endif /* __arm64__ */
4201
4202 /*
4203  * Counters for the prefault optimization.
4204  */
4205 int64_t vm_prefault_nb_pages = 0;
4206 int64_t vm_prefault_nb_bailout = 0;
4207
4208 static kern_return_t
4209 vm_map_enter_mem_object_helper(
4210         vm_map_t                target_map,
4211         vm_map_offset_t         *address,
4212         vm_map_size_t           initial_size,
4213         vm_map_offset_t         mask,
4214         int                     flags,
4215         vm_map_kernel_flags_t   vmk_flags,
4216         vm_tag_t                tag,
4217         ipc_port_t              port,
4218         vm_object_offset_t      offset,
4219         boolean_t               copy,
4220         vm_prot_t               cur_protection,
4221         vm_prot_t               max_protection,
4222         vm_inherit_t            inheritance,
4223         upl_page_list_ptr_t     page_list,
4224         unsigned int            page_list_count)
4225 {
4226         vm_map_address_t        map_addr;
4227         vm_map_size_t           map_size;
4228         vm_object_t             object;
4229         vm_object_size_t        size;
4230         kern_return_t           result;
4231         boolean_t               mask_cur_protection, mask_max_protection;
4232         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
4233         vm_map_offset_t         offset_in_mapping = 0;
4234 #if __arm64__
4235         boolean_t               fourk = vmk_flags.vmkf_fourk;
4236 #endif /* __arm64__ */
4237
4238         if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4239                 /* XXX TODO4K prefaulting depends on page size... */
4240                 try_prefault = FALSE;
4241         }
4242
4243         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4244
4245         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4246         mask_max_protection = max_protection & VM_PROT_IS_MASK;
4247         cur_protection &= ~VM_PROT_IS_MASK;
4248         max_protection &= ~VM_PROT_IS_MASK;
4249
4250         /*
4251          * Check arguments for validity
4252          */
4253         if ((target_map == VM_MAP_NULL) ||
4254             (cur_protection & ~VM_PROT_ALL) ||
4255             (max_protection & ~VM_PROT_ALL) ||
4256             (inheritance > VM_INHERIT_LAST_VALID) ||
4257             (try_prefault && (copy || !page_list)) ||
4258             initial_size == 0) {
4259                 return KERN_INVALID_ARGUMENT;
4260         }
4261
4262 #if __arm64__
4263         if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4264                 /* no "fourk" if map is using a sub-page page size */
4265                 fourk = FALSE;
4266         }
4267         if (fourk) {
4268                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4269                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4270         } else
4271 #endif /* __arm64__ */
4272         {
4273                 map_addr = vm_map_trunc_page(*address,
4274                     VM_MAP_PAGE_MASK(target_map));
4275                 map_size = vm_map_round_page(initial_size,
4276                     VM_MAP_PAGE_MASK(target_map));
4277         }
4278         size = vm_object_round_page(initial_size);
4279
4280         /*
4281          * Find the vm object (if any) corresponding to this port.
4282          */
4283         if (!IP_VALID(port)) {
4284                 object = VM_OBJECT_NULL;
4285                 offset = 0;
4286                 copy = FALSE;
4287         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4288                 vm_named_entry_t        named_entry;
4289                 vm_object_offset_t      data_offset;
4290
4291                 named_entry = (vm_named_entry_t) ip_get_kobject(port);
4292
4293                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4294                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4295                         data_offset = named_entry->data_offset;
4296                         offset += named_entry->data_offset;
4297                 } else {
4298                         data_offset = 0;
4299                 }
4300
4301                 /* a few checks to make sure user is obeying rules */
4302                 if (size == 0) {
4303                         if (offset >= named_entry->size) {
4304                                 return KERN_INVALID_RIGHT;
4305                         }
4306                         size = named_entry->size - offset;
4307                 }
4308                 if (mask_max_protection) {
4309                         max_protection &= named_entry->protection;
4310                 }
4311                 if (mask_cur_protection) {
4312                         cur_protection &= named_entry->protection;
4313                 }
4314                 if ((named_entry->protection & max_protection) !=
4315                     max_protection) {
4316                         return KERN_INVALID_RIGHT;
4317                 }
4318                 if ((named_entry->protection & cur_protection) !=
4319                     cur_protection) {
4320                         return KERN_INVALID_RIGHT;
4321                 }
4322                 if (offset + size < offset) {
4323                         /* overflow */
4324                         return KERN_INVALID_ARGUMENT;
4325                 }
4326                 if (named_entry->size < (offset + initial_size)) {
4327                         return KERN_INVALID_ARGUMENT;
4328                 }
4329
4330                 if (named_entry->is_copy) {
4331                         /* for a vm_map_copy, we can only map it whole */
4332                         if ((size != named_entry->size) &&
4333                             (vm_map_round_page(size,
4334                             VM_MAP_PAGE_MASK(target_map)) ==
4335                             named_entry->size)) {
4336                                 /* XXX FBDP use the rounded size... */
4337                                 size = vm_map_round_page(
4338                                         size,
4339                                         VM_MAP_PAGE_MASK(target_map));
4340                         }
4341                 }
4342
4343                 /* the callers parameter offset is defined to be the */
4344                 /* offset from beginning of named entry offset in object */
4345                 offset = offset + named_entry->offset;
4346
4347                 if (!VM_MAP_PAGE_ALIGNED(size,
4348                     VM_MAP_PAGE_MASK(target_map))) {
4349                         /*
4350                          * Let's not map more than requested;
4351                          * vm_map_enter() will handle this "not map-aligned"
4352                          * case.
4353                          */
4354                         map_size = size;
4355                 }
4356
4357                 named_entry_lock(named_entry);
4358                 if (named_entry->is_sub_map) {
4359                         vm_map_t                submap;
4360
4361                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4362                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4363                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4364                         }
4365
4366                         submap = named_entry->backing.map;
4367                         vm_map_reference(submap);
4368                         named_entry_unlock(named_entry);
4369
4370                         vmk_flags.vmkf_submap = TRUE;
4371
4372                         result = vm_map_enter(target_map,
4373                             &map_addr,
4374                             map_size,
4375                             mask,
4376                             flags,
4377                             vmk_flags,
4378                             tag,
4379                             (vm_object_t)(uintptr_t) submap,
4380                             offset,
4381                             copy,
4382                             cur_protection,
4383                             max_protection,
4384                             inheritance);
4385                         if (result != KERN_SUCCESS) {
4386                                 vm_map_deallocate(submap);
4387                         } else {
4388                                 /*
4389                                  * No need to lock "submap" just to check its
4390                                  * "mapped" flag: that flag is never reset
4391                                  * once it's been set and if we race, we'll
4392                                  * just end up setting it twice, which is OK.
4393                                  */
4394                                 if (submap->mapped_in_other_pmaps == FALSE &&
4395                                     vm_map_pmap(submap) != PMAP_NULL &&
4396                                     vm_map_pmap(submap) !=
4397                                     vm_map_pmap(target_map)) {
4398                                         /*
4399                                          * This submap is being mapped in a map
4400                                          * that uses a different pmap.
4401                                          * Set its "mapped_in_other_pmaps" flag
4402                                          * to indicate that we now need to
4403                                          * remove mappings from all pmaps rather
4404                                          * than just the submap's pmap.
4405                                          */
4406                                         vm_map_lock(submap);
4407                                         submap->mapped_in_other_pmaps = TRUE;
4408                                         vm_map_unlock(submap);
4409                                 }
4410                                 *address = map_addr;
4411                         }
4412                         return result;
4413                 } else if (named_entry->is_copy) {
4414                         kern_return_t   kr;
4415                         vm_map_copy_t   copy_map;
4416                         vm_map_entry_t  copy_entry;
4417                         vm_map_offset_t copy_addr;
4418                         vm_map_copy_t   target_copy_map;
4419                         vm_map_offset_t overmap_start, overmap_end;
4420                         vm_map_offset_t trimmed_start;
4421                         vm_map_size_t   target_size;
4422
4423                         if (flags & ~(VM_FLAGS_FIXED |
4424                             VM_FLAGS_ANYWHERE |
4425                             VM_FLAGS_OVERWRITE |
4426                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4427                             VM_FLAGS_RETURN_DATA_ADDR |
4428                             VM_FLAGS_ALIAS_MASK)) {
4429                                 named_entry_unlock(named_entry);
4430                                 return KERN_INVALID_ARGUMENT;
4431                         }
4432
4433                         copy_map = named_entry->backing.copy;
4434                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4435                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4436                                 /* unsupported type; should not happen */
4437                                 printf("vm_map_enter_mem_object: "
4438                                     "memory_entry->backing.copy "
4439                                     "unsupported type 0x%x\n",
4440                                     copy_map->type);
4441                                 named_entry_unlock(named_entry);
4442                                 return KERN_INVALID_ARGUMENT;
4443                         }
4444
4445                         if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4446                                 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4447                         }
4448
4449                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4450                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4451                                 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4452                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4453                                         offset_in_mapping &= ~((signed)(0xFFF));
4454                                 }
4455                         }
4456
4457                         target_copy_map = VM_MAP_COPY_NULL;
4458                         target_size = copy_map->size;
4459                         overmap_start = 0;
4460                         overmap_end = 0;
4461                         trimmed_start = 0;
4462                         if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4463                                 DEBUG4K_ADJUST("adjusting...\n");
4464                                 kr = vm_map_copy_adjust_to_target(
4465                                         copy_map,
4466                                         offset /* includes data_offset */,
4467                                         initial_size,
4468                                         target_map,
4469                                         copy,
4470                                         &target_copy_map,
4471                                         &overmap_start,
4472                                         &overmap_end,
4473                                         &trimmed_start);
4474                                 if (kr != KERN_SUCCESS) {
4475                                         named_entry_unlock(named_entry);
4476                                         return kr;
4477                                 }
4478                                 target_size = target_copy_map->size;
4479                                 if (trimmed_start >= data_offset) {
4480                                         data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4481                                 } else {
4482                                         data_offset -= trimmed_start;
4483                                 }
4484                         } else {
4485                                 target_copy_map = copy_map;
4486                         }
4487
4488                         /* reserve a contiguous range */
4489                         kr = vm_map_enter(target_map,
4490                             &map_addr,
4491                             vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4492                             mask,
4493                             flags & (VM_FLAGS_ANYWHERE |
4494                             VM_FLAGS_OVERWRITE |
4495                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4496                             VM_FLAGS_RETURN_DATA_ADDR),
4497                             vmk_flags,
4498                             tag,
4499                             VM_OBJECT_NULL,
4500                             0,
4501                             FALSE,               /* copy */
4502                             cur_protection,
4503                             max_protection,
4504                             inheritance);
4505                         if (kr != KERN_SUCCESS) {
4506                                 DEBUG4K_ERROR("kr 0x%x\n", kr);
4507                                 if (target_copy_map != copy_map) {
4508                                         vm_map_copy_discard(target_copy_map);
4509                                         target_copy_map = VM_MAP_COPY_NULL;
4510                                 }
4511                                 named_entry_unlock(named_entry);
4512                                 return kr;
4513                         }
4514
4515                         copy_addr = map_addr;
4516
4517                         for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4518                             copy_entry != vm_map_copy_to_entry(target_copy_map);
4519                             copy_entry = copy_entry->vme_next) {
4520                                 int                     remap_flags;
4521                                 vm_map_kernel_flags_t   vmk_remap_flags;
4522                                 vm_map_t                copy_submap;
4523                                 vm_object_t             copy_object;
4524                                 vm_map_size_t           copy_size;
4525                                 vm_object_offset_t      copy_offset;
4526                                 int                     copy_vm_alias;
4527
4528                                 remap_flags = 0;
4529                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4530
4531                                 copy_object = VME_OBJECT(copy_entry);
4532                                 copy_offset = VME_OFFSET(copy_entry);
4533                                 copy_size = (copy_entry->vme_end -
4534                                     copy_entry->vme_start);
4535                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4536                                 if (copy_vm_alias == 0) {
4537                                         /*
4538                                          * Caller does not want a specific
4539                                          * alias for this new mapping:  use
4540                                          * the alias of the original mapping.
4541                                          */
4542                                         copy_vm_alias = VME_ALIAS(copy_entry);
4543                                 }
4544
4545                                 /* sanity check */
4546                                 if ((copy_addr + copy_size) >
4547                                     (map_addr +
4548                                     overmap_start + overmap_end +
4549                                     named_entry->size /* XXX full size */)) {
4550                                         /* over-mapping too much !? */
4551                                         kr = KERN_INVALID_ARGUMENT;
4552                                         DEBUG4K_ERROR("kr 0x%x\n", kr);
4553                                         /* abort */
4554                                         break;
4555                                 }
4556
4557                                 /* take a reference on the object */
4558                                 if (copy_entry->is_sub_map) {
4559                                         vmk_remap_flags.vmkf_submap = TRUE;
4560                                         copy_submap = VME_SUBMAP(copy_entry);
4561                                         vm_map_lock(copy_submap);
4562                                         vm_map_reference(copy_submap);
4563                                         vm_map_unlock(copy_submap);
4564                                         copy_object = (vm_object_t)(uintptr_t) copy_submap;
4565                                 } else if (!copy &&
4566                                     copy_object != VM_OBJECT_NULL &&
4567                                     (copy_entry->needs_copy ||
4568                                     copy_object->shadowed ||
4569                                     (!copy_object->true_share &&
4570                                     !copy_entry->is_shared &&
4571                                     copy_object->vo_size > copy_size))) {
4572                                         /*
4573                                          * We need to resolve our side of this
4574                                          * "symmetric" copy-on-write now; we
4575                                          * need a new object to map and share,
4576                                          * instead of the current one which
4577                                          * might still be shared with the
4578                                          * original mapping.
4579                                          *
4580                                          * Note: A "vm_map_copy_t" does not
4581                                          * have a lock but we're protected by
4582                                          * the named entry's lock here.
4583                                          */
4584                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4585                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
4586                                         if (!copy_entry->needs_copy &&
4587                                             copy_entry->protection & VM_PROT_WRITE) {
4588                                                 vm_prot_t prot;
4589
4590                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
4591                                                 vm_object_pmap_protect(copy_object,
4592                                                     copy_offset,
4593                                                     copy_size,
4594                                                     PMAP_NULL,
4595                                                     PAGE_SIZE,
4596                                                     0,
4597                                                     prot);
4598                                         }
4599
4600                                         copy_entry->needs_copy = FALSE;
4601                                         copy_entry->is_shared = TRUE;
4602                                         copy_object = VME_OBJECT(copy_entry);
4603                                         copy_offset = VME_OFFSET(copy_entry);
4604                                         vm_object_lock(copy_object);
4605                                         vm_object_reference_locked(copy_object);
4606                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4607                                                 /* we're about to make a shared mapping of this object */
4608                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4609                                                 copy_object->true_share = TRUE;
4610                                         }
4611                                         vm_object_unlock(copy_object);
4612                                 } else {
4613                                         /*
4614                                          * We already have the right object
4615                                          * to map.
4616                                          */
4617                                         copy_object = VME_OBJECT(copy_entry);
4618                                         vm_object_reference(copy_object);
4619                                 }
4620
4621                                 /* over-map the object into destination */
4622                                 remap_flags |= flags;
4623                                 remap_flags |= VM_FLAGS_FIXED;
4624                                 remap_flags |= VM_FLAGS_OVERWRITE;
4625                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4626                                 if (!copy && !copy_entry->is_sub_map) {
4627                                         /*
4628                                          * copy-on-write should have been
4629                                          * resolved at this point, or we would
4630                                          * end up sharing instead of copying.
4631                                          */
4632                                         assert(!copy_entry->needs_copy);
4633                                 }
4634 #if XNU_TARGET_OS_OSX
4635                                 if (copy_entry->used_for_jit) {
4636                                         vmk_remap_flags.vmkf_map_jit = TRUE;
4637                                 }
4638 #endif /* XNU_TARGET_OS_OSX */
4639
4640                                 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4641                                     "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4642                                 kr = vm_map_enter(target_map,
4643                                     &copy_addr,
4644                                     copy_size,
4645                                     (vm_map_offset_t) 0,
4646                                     remap_flags,
4647                                     vmk_remap_flags,
4648                                     (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4649                                     copy_object,
4650                                     copy_offset,
4651                                     ((copy_object == NULL) ? FALSE : copy),
4652                                     cur_protection,
4653                                     max_protection,
4654                                     inheritance);
4655                                 if (kr != KERN_SUCCESS) {
4656                                         DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4657                                         if (copy_entry->is_sub_map) {
4658                                                 vm_map_deallocate(copy_submap);
4659                                         } else {
4660                                                 vm_object_deallocate(copy_object);
4661                                         }
4662                                         /* abort */
4663                                         break;
4664                                 }
4665
4666                                 /* next mapping */
4667                                 copy_addr += copy_size;
4668                         }
4669
4670                         if (kr == KERN_SUCCESS) {
4671                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4672                                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4673                                         *address = map_addr + offset_in_mapping;
4674                                 } else {
4675                                         *address = map_addr;
4676                                 }
4677                                 if (overmap_start) {
4678                                         *address += overmap_start;
4679                                         DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4680                                 }
4681                         }
4682                         named_entry_unlock(named_entry);
4683                         if (target_copy_map != copy_map) {
4684                                 vm_map_copy_discard(target_copy_map);
4685                                 target_copy_map = VM_MAP_COPY_NULL;
4686                         }
4687
4688                         if (kr != KERN_SUCCESS) {
4689                                 if (!(flags & VM_FLAGS_OVERWRITE)) {
4690                                         /* deallocate the contiguous range */
4691                                         (void) vm_deallocate(target_map,
4692                                             map_addr,
4693                                             map_size);
4694                                 }
4695                         }
4696
4697                         return kr;
4698                 }
4699
4700                 if (named_entry->is_object) {
4701                         unsigned int    access;
4702                         vm_prot_t       protections;
4703                         unsigned int    wimg_mode;
4704
4705                         /* we are mapping a VM object */
4706
4707                         protections = named_entry->protection & VM_PROT_ALL;
4708                         access = GET_MAP_MEM(named_entry->protection);
4709
4710                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4711                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4712                                 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4713                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4714                                         offset_in_mapping &= ~((signed)(0xFFF));
4715                                 }
4716                                 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4717                                 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4718                         }
4719
4720                         object = vm_named_entry_to_vm_object(named_entry);
4721                         assert(object != VM_OBJECT_NULL);
4722                         vm_object_lock(object);
4723                         named_entry_unlock(named_entry);
4724
4725                         vm_object_reference_locked(object);
4726
4727                         wimg_mode = object->wimg_bits;
4728                         vm_prot_to_wimg(access, &wimg_mode);
4729                         if (object->wimg_bits != wimg_mode) {
4730                                 vm_object_change_wimg_mode(object, wimg_mode);
4731                         }
4732
4733                         vm_object_unlock(object);
4734                 } else {
4735                         panic("invalid VM named entry %p", named_entry);
4736                 }
4737         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4738                 /*
4739                  * JMM - This is temporary until we unify named entries
4740                  * and raw memory objects.
4741                  *
4742                  * Detected fake ip_kotype for a memory object.  In
4743                  * this case, the port isn't really a port at all, but
4744                  * instead is just a raw memory object.
4745                  */
4746                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4747                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4748                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4749                 }
4750
4751                 object = memory_object_to_vm_object((memory_object_t)port);
4752                 if (object == VM_OBJECT_NULL) {
4753                         return KERN_INVALID_OBJECT;
4754                 }
4755                 vm_object_reference(object);
4756
4757                 /* wait for object (if any) to be ready */
4758                 if (object != VM_OBJECT_NULL) {
4759                         if (object == kernel_object) {
4760                                 printf("Warning: Attempt to map kernel object"
4761                                     " by a non-private kernel entity\n");
4762                                 return KERN_INVALID_OBJECT;
4763                         }
4764                         if (!object->pager_ready) {
4765                                 vm_object_lock(object);
4766
4767                                 while (!object->pager_ready) {
4768                                         vm_object_wait(object,
4769                                             VM_OBJECT_EVENT_PAGER_READY,
4770                                             THREAD_UNINT);
4771                                         vm_object_lock(object);
4772                                 }
4773                                 vm_object_unlock(object);
4774                         }
4775                 }
4776         } else {
4777                 return KERN_INVALID_OBJECT;
4778         }
4779
4780         if (object != VM_OBJECT_NULL &&
4781             object->named &&
4782             object->pager != MEMORY_OBJECT_NULL &&
4783             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4784                 memory_object_t pager;
4785                 vm_prot_t       pager_prot;
4786                 kern_return_t   kr;
4787
4788                 /*
4789                  * For "named" VM objects, let the pager know that the
4790                  * memory object is being mapped.  Some pagers need to keep
4791                  * track of this, to know when they can reclaim the memory
4792                  * object, for example.
4793                  * VM calls memory_object_map() for each mapping (specifying
4794                  * the protection of each mapping) and calls
4795                  * memory_object_last_unmap() when all the mappings are gone.
4796                  */
4797                 pager_prot = max_protection;
4798                 if (copy) {
4799                         /*
4800                          * Copy-On-Write mapping: won't modify the
4801                          * memory object.
4802                          */
4803                         pager_prot &= ~VM_PROT_WRITE;
4804                 }
4805                 vm_object_lock(object);
4806                 pager = object->pager;
4807                 if (object->named &&
4808                     pager != MEMORY_OBJECT_NULL &&
4809                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4810                         assert(object->pager_ready);
4811                         vm_object_mapping_wait(object, THREAD_UNINT);
4812                         vm_object_mapping_begin(object);
4813                         vm_object_unlock(object);
4814
4815                         kr = memory_object_map(pager, pager_prot);
4816                         assert(kr == KERN_SUCCESS);
4817
4818                         vm_object_lock(object);
4819                         vm_object_mapping_end(object);
4820                 }
4821                 vm_object_unlock(object);
4822         }
4823
4824         /*
4825          *      Perform the copy if requested
4826          */
4827
4828         if (copy) {
4829                 vm_object_t             new_object;
4830                 vm_object_offset_t      new_offset;
4831
4832                 result = vm_object_copy_strategically(object, offset,
4833                     map_size,
4834                     &new_object, &new_offset,
4835                     &copy);
4836
4837
4838                 if (result == KERN_MEMORY_RESTART_COPY) {
4839                         boolean_t success;
4840                         boolean_t src_needs_copy;
4841
4842                         /*
4843                          * XXX
4844                          * We currently ignore src_needs_copy.
4845                          * This really is the issue of how to make
4846                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4847                          * non-kernel users to use. Solution forthcoming.
4848                          * In the meantime, since we don't allow non-kernel
4849                          * memory managers to specify symmetric copy,
4850                          * we won't run into problems here.
4851                          */
4852                         new_object = object;
4853                         new_offset = offset;
4854                         success = vm_object_copy_quickly(&new_object,
4855                             new_offset,
4856                             map_size,
4857                             &src_needs_copy,
4858                             &copy);
4859                         assert(success);
4860                         result = KERN_SUCCESS;
4861                 }
4862                 /*
4863                  *      Throw away the reference to the
4864                  *      original object, as it won't be mapped.
4865                  */
4866
4867                 vm_object_deallocate(object);
4868
4869                 if (result != KERN_SUCCESS) {
4870                         return result;
4871                 }
4872
4873                 object = new_object;
4874                 offset = new_offset;
4875         }
4876
4877         /*
4878          * If non-kernel users want to try to prefault pages, the mapping and prefault
4879          * needs to be atomic.
4880          */
4881         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4882         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4883
4884 #if __arm64__
4885         if (fourk) {
4886                 /* map this object in a "4K" pager */
4887                 result = vm_map_enter_fourk(target_map,
4888                     &map_addr,
4889                     map_size,
4890                     (vm_map_offset_t) mask,
4891                     flags,
4892                     vmk_flags,
4893                     tag,
4894                     object,
4895                     offset,
4896                     copy,
4897                     cur_protection,
4898                     max_protection,
4899                     inheritance);
4900         } else
4901 #endif /* __arm64__ */
4902         {
4903                 result = vm_map_enter(target_map,
4904                     &map_addr, map_size,
4905                     (vm_map_offset_t)mask,
4906                     flags,
4907                     vmk_flags,
4908                     tag,
4909                     object, offset,
4910                     copy,
4911                     cur_protection, max_protection,
4912                     inheritance);
4913         }
4914         if (result != KERN_SUCCESS) {
4915                 vm_object_deallocate(object);
4916         }
4917
4918         /*
4919          * Try to prefault, and do not forget to release the vm map lock.
4920          */
4921         if (result == KERN_SUCCESS && try_prefault) {
4922                 mach_vm_address_t va = map_addr;
4923                 kern_return_t kr = KERN_SUCCESS;
4924                 unsigned int i = 0;
4925                 int pmap_options;
4926
4927                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4928                 if (object->internal) {
4929                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4930                 }
4931
4932                 for (i = 0; i < page_list_count; ++i) {
4933                         if (!UPL_VALID_PAGE(page_list, i)) {
4934                                 if (kernel_prefault) {
4935                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4936                                         result = KERN_MEMORY_ERROR;
4937                                         break;
4938                                 }
4939                         } else {
4940                                 /*
4941                                  * If this function call failed, we should stop
4942                                  * trying to optimize, other calls are likely
4943                                  * going to fail too.
4944                                  *
4945                                  * We are not gonna report an error for such
4946                                  * failure though. That's an optimization, not
4947                                  * something critical.
4948                                  */
4949                                 kr = pmap_enter_options(target_map->pmap,
4950                                     va, UPL_PHYS_PAGE(page_list, i),
4951                                     cur_protection, VM_PROT_NONE,
4952                                     0, TRUE, pmap_options, NULL);
4953                                 if (kr != KERN_SUCCESS) {
4954                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4955                                         if (kernel_prefault) {
4956                                                 result = kr;
4957                                         }
4958                                         break;
4959                                 }
4960                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4961                         }
4962
4963                         /* Next virtual address */
4964                         va += PAGE_SIZE;
4965                 }
4966                 if (vmk_flags.vmkf_keep_map_locked) {
4967                         vm_map_unlock(target_map);
4968                 }
4969         }
4970
4971         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4972             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4973                 *address = map_addr + offset_in_mapping;
4974         } else {
4975                 *address = map_addr;
4976         }
4977         return result;
4978 }
4979
4980 kern_return_t
4981 vm_map_enter_mem_object(
4982         vm_map_t                target_map,
4983         vm_map_offset_t         *address,
4984         vm_map_size_t           initial_size,
4985         vm_map_offset_t         mask,
4986         int                     flags,
4987         vm_map_kernel_flags_t   vmk_flags,
4988         vm_tag_t                tag,
4989         ipc_port_t              port,
4990         vm_object_offset_t      offset,
4991         boolean_t               copy,
4992         vm_prot_t               cur_protection,
4993         vm_prot_t               max_protection,
4994         vm_inherit_t            inheritance)
4995 {
4996         kern_return_t ret;
4997
4998         ret = vm_map_enter_mem_object_helper(target_map,
4999             address,
5000             initial_size,
5001             mask,
5002             flags,
5003             vmk_flags,
5004             tag,
5005             port,
5006             offset,
5007             copy,
5008             cur_protection,
5009             max_protection,
5010             inheritance,
5011             NULL,
5012             0);
5013
5014 #if KASAN
5015         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5016                 kasan_notify_address(*address, initial_size);
5017         }
5018 #endif
5019
5020         return ret;
5021 }
5022
5023 kern_return_t
5024 vm_map_enter_mem_object_prefault(
5025         vm_map_t                target_map,
5026         vm_map_offset_t         *address,
5027         vm_map_size_t           initial_size,
5028         vm_map_offset_t         mask,
5029         int                     flags,
5030         vm_map_kernel_flags_t   vmk_flags,
5031         vm_tag_t                tag,
5032         ipc_port_t              port,
5033         vm_object_offset_t      offset,
5034         vm_prot_t               cur_protection,
5035         vm_prot_t               max_protection,
5036         upl_page_list_ptr_t     page_list,
5037         unsigned int            page_list_count)
5038 {
5039         kern_return_t ret;
5040
5041         ret = vm_map_enter_mem_object_helper(target_map,
5042             address,
5043             initial_size,
5044             mask,
5045             flags,
5046             vmk_flags,
5047             tag,
5048             port,
5049             offset,
5050             FALSE,
5051             cur_protection,
5052             max_protection,
5053             VM_INHERIT_DEFAULT,
5054             page_list,
5055             page_list_count);
5056
5057 #if KASAN
5058         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5059                 kasan_notify_address(*address, initial_size);
5060         }
5061 #endif
5062
5063         return ret;
5064 }
5065
5066
5067 kern_return_t
5068 vm_map_enter_mem_object_control(
5069         vm_map_t                target_map,
5070         vm_map_offset_t         *address,
5071         vm_map_size_t           initial_size,
5072         vm_map_offset_t         mask,
5073         int                     flags,
5074         vm_map_kernel_flags_t   vmk_flags,
5075         vm_tag_t                tag,
5076         memory_object_control_t control,
5077         vm_object_offset_t      offset,
5078         boolean_t               copy,
5079         vm_prot_t               cur_protection,
5080         vm_prot_t               max_protection,
5081         vm_inherit_t            inheritance)
5082 {
5083         vm_map_address_t        map_addr;
5084         vm_map_size_t           map_size;
5085         vm_object_t             object;
5086         vm_object_size_t        size;
5087         kern_return_t           result;
5088         memory_object_t         pager;
5089         vm_prot_t               pager_prot;
5090         kern_return_t           kr;
5091 #if __arm64__
5092         boolean_t               fourk = vmk_flags.vmkf_fourk;
5093 #endif /* __arm64__ */
5094
5095         /*
5096          * Check arguments for validity
5097          */
5098         if ((target_map == VM_MAP_NULL) ||
5099             (cur_protection & ~VM_PROT_ALL) ||
5100             (max_protection & ~VM_PROT_ALL) ||
5101             (inheritance > VM_INHERIT_LAST_VALID) ||
5102             initial_size == 0) {
5103                 return KERN_INVALID_ARGUMENT;
5104         }
5105
5106 #if __arm64__
5107         if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
5108                 fourk = FALSE;
5109         }
5110
5111         if (fourk) {
5112                 map_addr = vm_map_trunc_page(*address,
5113                     FOURK_PAGE_MASK);
5114                 map_size = vm_map_round_page(initial_size,
5115                     FOURK_PAGE_MASK);
5116         } else
5117 #endif /* __arm64__ */
5118         {
5119                 map_addr = vm_map_trunc_page(*address,
5120                     VM_MAP_PAGE_MASK(target_map));
5121                 map_size = vm_map_round_page(initial_size,
5122                     VM_MAP_PAGE_MASK(target_map));
5123         }
5124         size = vm_object_round_page(initial_size);
5125
5126         object = memory_object_control_to_vm_object(control);
5127
5128         if (object == VM_OBJECT_NULL) {
5129                 return KERN_INVALID_OBJECT;
5130         }
5131
5132         if (object == kernel_object) {
5133                 printf("Warning: Attempt to map kernel object"
5134                     " by a non-private kernel entity\n");
5135                 return KERN_INVALID_OBJECT;
5136         }
5137
5138         vm_object_lock(object);
5139         object->ref_count++;
5140         vm_object_res_reference(object);
5141
5142         /*
5143          * For "named" VM objects, let the pager know that the
5144          * memory object is being mapped.  Some pagers need to keep
5145          * track of this, to know when they can reclaim the memory
5146          * object, for example.
5147          * VM calls memory_object_map() for each mapping (specifying
5148          * the protection of each mapping) and calls
5149          * memory_object_last_unmap() when all the mappings are gone.
5150          */
5151         pager_prot = max_protection;
5152         if (copy) {
5153                 pager_prot &= ~VM_PROT_WRITE;
5154         }
5155         pager = object->pager;
5156         if (object->named &&
5157             pager != MEMORY_OBJECT_NULL &&
5158             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5159                 assert(object->pager_ready);
5160                 vm_object_mapping_wait(object, THREAD_UNINT);
5161                 vm_object_mapping_begin(object);
5162                 vm_object_unlock(object);
5163
5164                 kr = memory_object_map(pager, pager_prot);
5165                 assert(kr == KERN_SUCCESS);
5166
5167                 vm_object_lock(object);
5168                 vm_object_mapping_end(object);
5169         }
5170         vm_object_unlock(object);
5171
5172         /*
5173          *      Perform the copy if requested
5174          */
5175
5176         if (copy) {
5177                 vm_object_t             new_object;
5178                 vm_object_offset_t      new_offset;
5179
5180                 result = vm_object_copy_strategically(object, offset, size,
5181                     &new_object, &new_offset,
5182                     &copy);
5183
5184
5185                 if (result == KERN_MEMORY_RESTART_COPY) {
5186                         boolean_t success;
5187                         boolean_t src_needs_copy;
5188
5189                         /*
5190                          * XXX
5191                          * We currently ignore src_needs_copy.
5192                          * This really is the issue of how to make
5193                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5194                          * non-kernel users to use. Solution forthcoming.
5195                          * In the meantime, since we don't allow non-kernel
5196                          * memory managers to specify symmetric copy,
5197                          * we won't run into problems here.
5198                          */
5199                         new_object = object;
5200                         new_offset = offset;
5201                         success = vm_object_copy_quickly(&new_object,
5202                             new_offset, size,
5203                             &src_needs_copy,
5204                             &copy);
5205                         assert(success);
5206                         result = KERN_SUCCESS;
5207                 }
5208                 /*
5209                  *      Throw away the reference to the
5210                  *      original object, as it won't be mapped.
5211                  */
5212
5213                 vm_object_deallocate(object);
5214
5215                 if (result != KERN_SUCCESS) {
5216                         return result;
5217                 }
5218
5219                 object = new_object;
5220                 offset = new_offset;
5221         }
5222
5223 #if __arm64__
5224         if (fourk) {
5225                 result = vm_map_enter_fourk(target_map,
5226                     &map_addr,
5227                     map_size,
5228                     (vm_map_offset_t)mask,
5229                     flags,
5230                     vmk_flags,
5231                     tag,
5232                     object, offset,
5233                     copy,
5234                     cur_protection, max_protection,
5235                     inheritance);
5236         } else
5237 #endif /* __arm64__ */
5238         {
5239                 result = vm_map_enter(target_map,
5240                     &map_addr, map_size,
5241                     (vm_map_offset_t)mask,
5242                     flags,
5243                     vmk_flags,
5244                     tag,
5245                     object, offset,
5246                     copy,
5247                     cur_protection, max_protection,
5248                     inheritance);
5249         }
5250         if (result != KERN_SUCCESS) {
5251                 vm_object_deallocate(object);
5252         }
5253         *address = map_addr;
5254
5255         return result;
5256 }
5257
5258
5259 #if     VM_CPM
5260
5261 #ifdef MACH_ASSERT
5262 extern pmap_paddr_t     avail_start, avail_end;
5263 #endif
5264
5265 /*
5266  *      Allocate memory in the specified map, with the caveat that
5267  *      the memory is physically contiguous.  This call may fail
5268  *      if the system can't find sufficient contiguous memory.
5269  *      This call may cause or lead to heart-stopping amounts of
5270  *      paging activity.
5271  *
5272  *      Memory obtained from this call should be freed in the
5273  *      normal way, viz., via vm_deallocate.
5274  */
5275 kern_return_t
5276 vm_map_enter_cpm(
5277         vm_map_t                map,
5278         vm_map_offset_t *addr,
5279         vm_map_size_t           size,
5280         int                     flags)
5281 {
5282         vm_object_t             cpm_obj;
5283         pmap_t                  pmap;
5284         vm_page_t               m, pages;
5285         kern_return_t           kr;
5286         vm_map_offset_t         va, start, end, offset;
5287 #if     MACH_ASSERT
5288         vm_map_offset_t         prev_addr = 0;
5289 #endif  /* MACH_ASSERT */
5290
5291         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5292         vm_tag_t tag;
5293
5294         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5295                 /* XXX TODO4K do we need to support this? */
5296                 *addr = 0;
5297                 return KERN_NOT_SUPPORTED;
5298         }
5299
5300         VM_GET_FLAGS_ALIAS(flags, tag);
5301
5302         if (size == 0) {
5303                 *addr = 0;
5304                 return KERN_SUCCESS;
5305         }
5306         if (anywhere) {
5307                 *addr = vm_map_min(map);
5308         } else {
5309                 *addr = vm_map_trunc_page(*addr,
5310                     VM_MAP_PAGE_MASK(map));
5311         }
5312         size = vm_map_round_page(size,
5313             VM_MAP_PAGE_MASK(map));
5314
5315         /*
5316          * LP64todo - cpm_allocate should probably allow
5317          * allocations of >4GB, but not with the current
5318          * algorithm, so just cast down the size for now.
5319          */
5320         if (size > VM_MAX_ADDRESS) {
5321                 return KERN_RESOURCE_SHORTAGE;
5322         }
5323         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5324             &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5325                 return kr;
5326         }
5327
5328         cpm_obj = vm_object_allocate((vm_object_size_t)size);
5329         assert(cpm_obj != VM_OBJECT_NULL);
5330         assert(cpm_obj->internal);
5331         assert(cpm_obj->vo_size == (vm_object_size_t)size);
5332         assert(cpm_obj->can_persist == FALSE);
5333         assert(cpm_obj->pager_created == FALSE);
5334         assert(cpm_obj->pageout == FALSE);
5335         assert(cpm_obj->shadow == VM_OBJECT_NULL);
5336
5337         /*
5338          *      Insert pages into object.
5339          */
5340
5341         vm_object_lock(cpm_obj);
5342         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5343                 m = pages;
5344                 pages = NEXT_PAGE(m);
5345                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5346
5347                 assert(!m->vmp_gobbled);
5348                 assert(!m->vmp_wanted);
5349                 assert(!m->vmp_pageout);
5350                 assert(!m->vmp_tabled);
5351                 assert(VM_PAGE_WIRED(m));
5352                 assert(m->vmp_busy);
5353                 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5354
5355                 m->vmp_busy = FALSE;
5356                 vm_page_insert(m, cpm_obj, offset);
5357         }
5358         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5359         vm_object_unlock(cpm_obj);
5360
5361         /*
5362          *      Hang onto a reference on the object in case a
5363          *      multi-threaded application for some reason decides
5364          *      to deallocate the portion of the address space into
5365          *      which we will insert this object.
5366          *
5367          *      Unfortunately, we must insert the object now before
5368          *      we can talk to the pmap module about which addresses
5369          *      must be wired down.  Hence, the race with a multi-
5370          *      threaded app.
5371          */
5372         vm_object_reference(cpm_obj);
5373
5374         /*
5375          *      Insert object into map.
5376          */
5377
5378         kr = vm_map_enter(
5379                 map,
5380                 addr,
5381                 size,
5382                 (vm_map_offset_t)0,
5383                 flags,
5384                 VM_MAP_KERNEL_FLAGS_NONE,
5385                 cpm_obj,
5386                 (vm_object_offset_t)0,
5387                 FALSE,
5388                 VM_PROT_ALL,
5389                 VM_PROT_ALL,
5390                 VM_INHERIT_DEFAULT);
5391
5392         if (kr != KERN_SUCCESS) {
5393                 /*
5394                  *      A CPM object doesn't have can_persist set,
5395                  *      so all we have to do is deallocate it to
5396                  *      free up these pages.
5397                  */
5398                 assert(cpm_obj->pager_created == FALSE);
5399                 assert(cpm_obj->can_persist == FALSE);
5400                 assert(cpm_obj->pageout == FALSE);
5401                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5402                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5403                 vm_object_deallocate(cpm_obj); /* kill creation ref */
5404         }
5405
5406         /*
5407          *      Inform the physical mapping system that the
5408          *      range of addresses may not fault, so that
5409          *      page tables and such can be locked down as well.
5410          */
5411         start = *addr;
5412         end = start + size;
5413         pmap = vm_map_pmap(map);
5414         pmap_pageable(pmap, start, end, FALSE);
5415
5416         /*
5417          *      Enter each page into the pmap, to avoid faults.
5418          *      Note that this loop could be coded more efficiently,
5419          *      if the need arose, rather than looking up each page
5420          *      again.
5421          */
5422         for (offset = 0, va = start; offset < size;
5423             va += PAGE_SIZE, offset += PAGE_SIZE) {
5424                 int type_of_fault;
5425
5426                 vm_object_lock(cpm_obj);
5427                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5428                 assert(m != VM_PAGE_NULL);
5429
5430                 vm_page_zero_fill(m);
5431
5432                 type_of_fault = DBG_ZERO_FILL_FAULT;
5433
5434                 vm_fault_enter(m, pmap, va,
5435                     PAGE_SIZE, 0,
5436                     VM_PROT_ALL, VM_PROT_WRITE,
5437                     VM_PAGE_WIRED(m),
5438                     FALSE,                             /* change_wiring */
5439                     VM_KERN_MEMORY_NONE,                             /* tag - not wiring */
5440                     FALSE,                             /* no_cache */
5441                     FALSE,                             /* cs_bypass */
5442                     0,                                 /* user_tag */
5443                     0,                             /* pmap_options */
5444                     NULL,                              /* need_retry */
5445                     &type_of_fault);
5446
5447                 vm_object_unlock(cpm_obj);
5448         }
5449
5450 #if     MACH_ASSERT
5451         /*
5452          *      Verify ordering in address space.
5453          */
5454         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5455                 vm_object_lock(cpm_obj);
5456                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5457                 vm_object_unlock(cpm_obj);
5458                 if (m == VM_PAGE_NULL) {
5459                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5460                             cpm_obj, (uint64_t)offset);
5461                 }
5462                 assert(m->vmp_tabled);
5463                 assert(!m->vmp_busy);
5464                 assert(!m->vmp_wanted);
5465                 assert(!m->vmp_fictitious);
5466                 assert(!m->vmp_private);
5467                 assert(!m->vmp_absent);
5468                 assert(!m->vmp_error);
5469                 assert(!m->vmp_cleaning);
5470                 assert(!m->vmp_laundry);
5471                 assert(!m->vmp_precious);
5472                 assert(!m->vmp_clustered);
5473                 if (offset != 0) {
5474                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5475                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5476                                     (uint64_t)start, (uint64_t)end, (uint64_t)va);
5477                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5478                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5479                                 panic("vm_allocate_cpm:  pages not contig!");
5480                         }
5481                 }
5482                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5483         }
5484 #endif  /* MACH_ASSERT */
5485
5486         vm_object_deallocate(cpm_obj); /* kill extra ref */
5487
5488         return kr;
5489 }
5490
5491
5492 #else   /* VM_CPM */
5493
5494 /*
5495  *      Interface is defined in all cases, but unless the kernel
5496  *      is built explicitly for this option, the interface does
5497  *      nothing.
5498  */
5499
5500 kern_return_t
5501 vm_map_enter_cpm(
5502         __unused vm_map_t       map,
5503         __unused vm_map_offset_t        *addr,
5504         __unused vm_map_size_t  size,
5505         __unused int            flags)
5506 {
5507         return KERN_FAILURE;
5508 }
5509 #endif /* VM_CPM */
5510
5511 /* Not used without nested pmaps */
5512 #ifndef NO_NESTED_PMAP
5513 /*
5514  * Clip and unnest a portion of a nested submap mapping.
5515  */
5516
5517
5518 static void
5519 vm_map_clip_unnest(
5520         vm_map_t        map,
5521         vm_map_entry_t  entry,
5522         vm_map_offset_t start_unnest,
5523         vm_map_offset_t end_unnest)
5524 {
5525         vm_map_offset_t old_start_unnest = start_unnest;
5526         vm_map_offset_t old_end_unnest = end_unnest;
5527
5528         assert(entry->is_sub_map);
5529         assert(VME_SUBMAP(entry) != NULL);
5530         assert(entry->use_pmap);
5531
5532         /*
5533          * Query the platform for the optimal unnest range.
5534          * DRK: There's some duplication of effort here, since
5535          * callers may have adjusted the range to some extent. This
5536          * routine was introduced to support 1GiB subtree nesting
5537          * for x86 platforms, which can also nest on 2MiB boundaries
5538          * depending on size/alignment.
5539          */
5540         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5541                 assert(VME_SUBMAP(entry)->is_nested_map);
5542                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5543                 log_unnest_badness(map,
5544                     old_start_unnest,
5545                     old_end_unnest,
5546                     VME_SUBMAP(entry)->is_nested_map,
5547                     (entry->vme_start +
5548                     VME_SUBMAP(entry)->lowest_unnestable_start -
5549                     VME_OFFSET(entry)));
5550         }
5551
5552         if (entry->vme_start > start_unnest ||
5553             entry->vme_end < end_unnest) {
5554                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5555                     "bad nested entry: start=0x%llx end=0x%llx\n",
5556                     (long long)start_unnest, (long long)end_unnest,
5557                     (long long)entry->vme_start, (long long)entry->vme_end);
5558         }
5559
5560         if (start_unnest > entry->vme_start) {
5561                 _vm_map_clip_start(&map->hdr,
5562                     entry,
5563                     start_unnest);
5564                 if (map->holelistenabled) {
5565                         vm_map_store_update_first_free(map, NULL, FALSE);
5566                 } else {
5567                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5568                 }
5569         }
5570         if (entry->vme_end > end_unnest) {
5571                 _vm_map_clip_end(&map->hdr,
5572                     entry,
5573                     end_unnest);
5574                 if (map->holelistenabled) {
5575                         vm_map_store_update_first_free(map, NULL, FALSE);
5576                 } else {
5577                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5578                 }
5579         }
5580
5581         pmap_unnest(map->pmap,
5582             entry->vme_start,
5583             entry->vme_end - entry->vme_start);
5584         if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5585                 /* clean up parent map/maps */
5586                 vm_map_submap_pmap_clean(
5587                         map, entry->vme_start,
5588                         entry->vme_end,
5589                         VME_SUBMAP(entry),
5590                         VME_OFFSET(entry));
5591         }
5592         entry->use_pmap = FALSE;
5593         if ((map->pmap != kernel_pmap) &&
5594             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5595                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5596         }
5597 }
5598 #endif  /* NO_NESTED_PMAP */
5599
5600 /*
5601  *      vm_map_clip_start:      [ internal use only ]
5602  *
5603  *      Asserts that the given entry begins at or after
5604  *      the specified address; if necessary,
5605  *      it splits the entry into two.
5606  */
5607 void
5608 vm_map_clip_start(
5609         vm_map_t        map,
5610         vm_map_entry_t  entry,
5611         vm_map_offset_t startaddr)
5612 {
5613 #ifndef NO_NESTED_PMAP
5614         if (entry->is_sub_map &&
5615             entry->use_pmap &&
5616             startaddr >= entry->vme_start) {
5617                 vm_map_offset_t start_unnest, end_unnest;
5618
5619                 /*
5620                  * Make sure "startaddr" is no longer in a nested range
5621                  * before we clip.  Unnest only the minimum range the platform
5622                  * can handle.
5623                  * vm_map_clip_unnest may perform additional adjustments to
5624                  * the unnest range.
5625                  */
5626                 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5627                 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5628                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5629         }
5630 #endif /* NO_NESTED_PMAP */
5631         if (startaddr > entry->vme_start) {
5632                 if (VME_OBJECT(entry) &&
5633                     !entry->is_sub_map &&
5634                     VME_OBJECT(entry)->phys_contiguous) {
5635                         pmap_remove(map->pmap,
5636                             (addr64_t)(entry->vme_start),
5637                             (addr64_t)(entry->vme_end));
5638                 }
5639                 if (entry->vme_atomic) {
5640                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5641                 }
5642
5643                 DTRACE_VM5(
5644                         vm_map_clip_start,
5645                         vm_map_t, map,
5646                         vm_map_offset_t, entry->vme_start,
5647                         vm_map_offset_t, entry->vme_end,
5648                         vm_map_offset_t, startaddr,
5649                         int, VME_ALIAS(entry));
5650
5651                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5652                 if (map->holelistenabled) {
5653                         vm_map_store_update_first_free(map, NULL, FALSE);
5654                 } else {
5655                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5656                 }
5657         }
5658 }
5659
5660
5661 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5662         MACRO_BEGIN \
5663         if ((startaddr) > (entry)->vme_start) \
5664                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5665         MACRO_END
5666
5667 /*
5668  *      This routine is called only when it is known that
5669  *      the entry must be split.
5670  */
5671 static void
5672 _vm_map_clip_start(
5673         struct vm_map_header    *map_header,
5674         vm_map_entry_t          entry,
5675         vm_map_offset_t         start)
5676 {
5677         vm_map_entry_t  new_entry;
5678
5679         /*
5680          *      Split off the front portion --
5681          *      note that we must insert the new
5682          *      entry BEFORE this one, so that
5683          *      this entry has the specified starting
5684          *      address.
5685          */
5686
5687         if (entry->map_aligned) {
5688                 assert(VM_MAP_PAGE_ALIGNED(start,
5689                     VM_MAP_HDR_PAGE_MASK(map_header)));
5690         }
5691
5692         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5693         vm_map_entry_copy_full(new_entry, entry);
5694
5695         new_entry->vme_end = start;
5696         assert(new_entry->vme_start < new_entry->vme_end);
5697         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5698         assert(start < entry->vme_end);
5699         entry->vme_start = start;
5700
5701         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5702
5703         if (entry->is_sub_map) {
5704                 vm_map_reference(VME_SUBMAP(new_entry));
5705         } else {
5706                 vm_object_reference(VME_OBJECT(new_entry));
5707         }
5708 }
5709
5710
5711 /*
5712  *      vm_map_clip_end:        [ internal use only ]
5713  *
5714  *      Asserts that the given entry ends at or before
5715  *      the specified address; if necessary,
5716  *      it splits the entry into two.
5717  */
5718 void
5719 vm_map_clip_end(
5720         vm_map_t        map,
5721         vm_map_entry_t  entry,
5722         vm_map_offset_t endaddr)
5723 {
5724         if (endaddr > entry->vme_end) {
5725                 /*
5726                  * Within the scope of this clipping, limit "endaddr" to
5727                  * the end of this map entry...
5728                  */
5729                 endaddr = entry->vme_end;
5730         }
5731 #ifndef NO_NESTED_PMAP
5732         if (entry->is_sub_map && entry->use_pmap) {
5733                 vm_map_offset_t start_unnest, end_unnest;
5734
5735                 /*
5736                  * Make sure the range between the start of this entry and
5737                  * the new "endaddr" is no longer nested before we clip.
5738                  * Unnest only the minimum range the platform can handle.
5739                  * vm_map_clip_unnest may perform additional adjustments to
5740                  * the unnest range.
5741                  */
5742                 start_unnest = entry->vme_start;
5743                 end_unnest =
5744                     (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5745                     ~(pmap_shared_region_size_min(map->pmap) - 1);
5746                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5747         }
5748 #endif /* NO_NESTED_PMAP */
5749         if (endaddr < entry->vme_end) {
5750                 if (VME_OBJECT(entry) &&
5751                     !entry->is_sub_map &&
5752                     VME_OBJECT(entry)->phys_contiguous) {
5753                         pmap_remove(map->pmap,
5754                             (addr64_t)(entry->vme_start),
5755                             (addr64_t)(entry->vme_end));
5756                 }
5757                 if (entry->vme_atomic) {
5758                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5759                 }
5760                 DTRACE_VM5(
5761                         vm_map_clip_end,
5762                         vm_map_t, map,
5763                         vm_map_offset_t, entry->vme_start,
5764                         vm_map_offset_t, entry->vme_end,
5765                         vm_map_offset_t, endaddr,
5766                         int, VME_ALIAS(entry));
5767
5768                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5769                 if (map->holelistenabled) {
5770                         vm_map_store_update_first_free(map, NULL, FALSE);
5771                 } else {
5772                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5773                 }
5774         }
5775 }
5776
5777
5778 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5779         MACRO_BEGIN \
5780         if ((endaddr) < (entry)->vme_end) \
5781                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5782         MACRO_END
5783
5784 /*
5785  *      This routine is called only when it is known that
5786  *      the entry must be split.
5787  */
5788 static void
5789 _vm_map_clip_end(
5790         struct vm_map_header    *map_header,
5791         vm_map_entry_t          entry,
5792         vm_map_offset_t         end)
5793 {
5794         vm_map_entry_t  new_entry;
5795
5796         /*
5797          *      Create a new entry and insert it
5798          *      AFTER the specified entry
5799          */
5800
5801         if (entry->map_aligned) {
5802                 assert(VM_MAP_PAGE_ALIGNED(end,
5803                     VM_MAP_HDR_PAGE_MASK(map_header)));
5804         }
5805
5806         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5807         vm_map_entry_copy_full(new_entry, entry);
5808
5809         assert(entry->vme_start < end);
5810         new_entry->vme_start = entry->vme_end = end;
5811         VME_OFFSET_SET(new_entry,
5812             VME_OFFSET(new_entry) + (end - entry->vme_start));
5813         assert(new_entry->vme_start < new_entry->vme_end);
5814
5815         _vm_map_store_entry_link(map_header, entry, new_entry);
5816
5817         if (entry->is_sub_map) {
5818                 vm_map_reference(VME_SUBMAP(new_entry));
5819         } else {
5820                 vm_object_reference(VME_OBJECT(new_entry));
5821         }
5822 }
5823
5824
5825 /*
5826  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5827  *
5828  *      Asserts that the starting and ending region
5829  *      addresses fall within the valid range of the map.
5830  */
5831 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5832         MACRO_BEGIN                             \
5833         if (start < vm_map_min(map))            \
5834                 start = vm_map_min(map);        \
5835         if (end > vm_map_max(map))              \
5836                 end = vm_map_max(map);          \
5837         if (start > end)                        \
5838                 start = end;                    \
5839         MACRO_END
5840
5841 /*
5842  *      vm_map_range_check:     [ internal use only ]
5843  *
5844  *      Check that the region defined by the specified start and
5845  *      end addresses are wholly contained within a single map
5846  *      entry or set of adjacent map entries of the spacified map,
5847  *      i.e. the specified region contains no unmapped space.
5848  *      If any or all of the region is unmapped, FALSE is returned.
5849  *      Otherwise, TRUE is returned and if the output argument 'entry'
5850  *      is not NULL it points to the map entry containing the start
5851  *      of the region.
5852  *
5853  *      The map is locked for reading on entry and is left locked.
5854  */
5855 static boolean_t
5856 vm_map_range_check(
5857         vm_map_t                map,
5858         vm_map_offset_t         start,
5859         vm_map_offset_t         end,
5860         vm_map_entry_t          *entry)
5861 {
5862         vm_map_entry_t          cur;
5863         vm_map_offset_t         prev;
5864
5865         /*
5866          *      Basic sanity checks first
5867          */
5868         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5869                 return FALSE;
5870         }
5871
5872         /*
5873          *      Check first if the region starts within a valid
5874          *      mapping for the map.
5875          */
5876         if (!vm_map_lookup_entry(map, start, &cur)) {
5877                 return FALSE;
5878         }
5879
5880         /*
5881          *      Optimize for the case that the region is contained
5882          *      in a single map entry.
5883          */
5884         if (entry != (vm_map_entry_t *) NULL) {
5885                 *entry = cur;
5886         }
5887         if (end <= cur->vme_end) {
5888                 return TRUE;
5889         }
5890
5891         /*
5892          *      If the region is not wholly contained within a
5893          *      single entry, walk the entries looking for holes.
5894          */
5895         prev = cur->vme_end;
5896         cur = cur->vme_next;
5897         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5898                 if (end <= cur->vme_end) {
5899                         return TRUE;
5900                 }
5901                 prev = cur->vme_end;
5902                 cur = cur->vme_next;
5903         }
5904         return FALSE;
5905 }
5906
5907 /*
5908  *      vm_map_submap:          [ kernel use only ]
5909  *
5910  *      Mark the given range as handled by a subordinate map.
5911  *
5912  *      This range must have been created with vm_map_find using
5913  *      the vm_submap_object, and no other operations may have been
5914  *      performed on this range prior to calling vm_map_submap.
5915  *
5916  *      Only a limited number of operations can be performed
5917  *      within this rage after calling vm_map_submap:
5918  *              vm_fault
5919  *      [Don't try vm_map_copyin!]
5920  *
5921  *      To remove a submapping, one must first remove the
5922  *      range from the superior map, and then destroy the
5923  *      submap (if desired).  [Better yet, don't try it.]
5924  */
5925 kern_return_t
5926 vm_map_submap(
5927         vm_map_t        map,
5928         vm_map_offset_t start,
5929         vm_map_offset_t end,
5930         vm_map_t        submap,
5931         vm_map_offset_t offset,
5932 #ifdef NO_NESTED_PMAP
5933         __unused
5934 #endif  /* NO_NESTED_PMAP */
5935         boolean_t       use_pmap)
5936 {
5937         vm_map_entry_t          entry;
5938         kern_return_t           result = KERN_INVALID_ARGUMENT;
5939         vm_object_t             object;
5940
5941         vm_map_lock(map);
5942
5943         if (!vm_map_lookup_entry(map, start, &entry)) {
5944                 entry = entry->vme_next;
5945         }
5946
5947         if (entry == vm_map_to_entry(map) ||
5948             entry->is_sub_map) {
5949                 vm_map_unlock(map);
5950                 return KERN_INVALID_ARGUMENT;
5951         }
5952
5953         vm_map_clip_start(map, entry, start);
5954         vm_map_clip_end(map, entry, end);
5955
5956         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5957             (!entry->is_sub_map) &&
5958             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5959             (object->resident_page_count == 0) &&
5960             (object->copy == VM_OBJECT_NULL) &&
5961             (object->shadow == VM_OBJECT_NULL) &&
5962             (!object->pager_created)) {
5963                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5964                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5965                 vm_object_deallocate(object);
5966                 entry->is_sub_map = TRUE;
5967                 entry->use_pmap = FALSE;
5968                 VME_SUBMAP_SET(entry, submap);
5969                 vm_map_reference(submap);
5970                 if (submap->mapped_in_other_pmaps == FALSE &&
5971                     vm_map_pmap(submap) != PMAP_NULL &&
5972                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5973                         /*
5974                          * This submap is being mapped in a map
5975                          * that uses a different pmap.
5976                          * Set its "mapped_in_other_pmaps" flag
5977                          * to indicate that we now need to
5978                          * remove mappings from all pmaps rather
5979                          * than just the submap's pmap.
5980                          */
5981                         submap->mapped_in_other_pmaps = TRUE;
5982                 }
5983
5984 #ifndef NO_NESTED_PMAP
5985                 if (use_pmap) {
5986                         /* nest if platform code will allow */
5987                         if (submap->pmap == NULL) {
5988                                 ledger_t ledger = map->pmap->ledger;
5989                                 submap->pmap = pmap_create_options(ledger,
5990                                     (vm_map_size_t) 0, 0);
5991                                 if (submap->pmap == PMAP_NULL) {
5992                                         vm_map_unlock(map);
5993                                         return KERN_NO_SPACE;
5994                                 }
5995 #if     defined(__arm__) || defined(__arm64__)
5996                                 pmap_set_nested(submap->pmap);
5997 #endif
5998                         }
5999                         result = pmap_nest(map->pmap,
6000                             (VME_SUBMAP(entry))->pmap,
6001                             (addr64_t)start,
6002                             (uint64_t)(end - start));
6003                         if (result) {
6004                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
6005                         }
6006                         entry->use_pmap = TRUE;
6007                 }
6008 #else   /* NO_NESTED_PMAP */
6009                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
6010 #endif  /* NO_NESTED_PMAP */
6011                 result = KERN_SUCCESS;
6012         }
6013         vm_map_unlock(map);
6014
6015         return result;
6016 }
6017
6018 /*
6019  *      vm_map_protect:
6020  *
6021  *      Sets the protection of the specified address
6022  *      region in the target map.  If "set_max" is
6023  *      specified, the maximum protection is to be set;
6024  *      otherwise, only the current protection is affected.
6025  */
6026 kern_return_t
6027 vm_map_protect(
6028         vm_map_t        map,
6029         vm_map_offset_t start,
6030         vm_map_offset_t end,
6031         vm_prot_t       new_prot,
6032         boolean_t       set_max)
6033 {
6034         vm_map_entry_t                  current;
6035         vm_map_offset_t                 prev;
6036         vm_map_entry_t                  entry;
6037         vm_prot_t                       new_max;
6038         int                             pmap_options = 0;
6039         kern_return_t                   kr;
6040
6041         if (new_prot & VM_PROT_COPY) {
6042                 vm_map_offset_t         new_start;
6043                 vm_prot_t               cur_prot, max_prot;
6044                 vm_map_kernel_flags_t   kflags;
6045
6046                 /* LP64todo - see below */
6047                 if (start >= map->max_offset) {
6048                         return KERN_INVALID_ADDRESS;
6049                 }
6050
6051                 if ((new_prot & VM_PROT_EXECUTE) &&
6052                     map->pmap != kernel_pmap &&
6053                     (vm_map_cs_enforcement(map)
6054 #if XNU_TARGET_OS_OSX && __arm64__
6055                     || !VM_MAP_IS_EXOTIC(map)
6056 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
6057                     ) &&
6058                     VM_MAP_POLICY_WX_FAIL(map)) {
6059                         DTRACE_VM3(cs_wx,
6060                             uint64_t, (uint64_t) start,
6061                             uint64_t, (uint64_t) end,
6062                             vm_prot_t, new_prot);
6063                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6064                             proc_selfpid(),
6065                             (current_task()->bsd_info
6066                             ? proc_name_address(current_task()->bsd_info)
6067                             : "?"),
6068                             __FUNCTION__);
6069                         return KERN_PROTECTION_FAILURE;
6070                 }
6071
6072                 /*
6073                  * Let vm_map_remap_extract() know that it will need to:
6074                  * + make a copy of the mapping
6075                  * + add VM_PROT_WRITE to the max protections
6076                  * + remove any protections that are no longer allowed from the
6077                  *   max protections (to avoid any WRITE/EXECUTE conflict, for
6078                  *   example).
6079                  * Note that "max_prot" is an IN/OUT parameter only for this
6080                  * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
6081                  * only.
6082                  */
6083                 max_prot = new_prot & VM_PROT_ALL;
6084                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
6085                 kflags.vmkf_remap_prot_copy = TRUE;
6086                 kflags.vmkf_overwrite_immutable = TRUE;
6087                 new_start = start;
6088                 kr = vm_map_remap(map,
6089                     &new_start,
6090                     end - start,
6091                     0,               /* mask */
6092                     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
6093                     kflags,
6094                     0,
6095                     map,
6096                     start,
6097                     TRUE,               /* copy-on-write remapping! */
6098                     &cur_prot,
6099                     &max_prot,
6100                     VM_INHERIT_DEFAULT);
6101                 if (kr != KERN_SUCCESS) {
6102                         return kr;
6103                 }
6104                 new_prot &= ~VM_PROT_COPY;
6105         }
6106
6107         vm_map_lock(map);
6108
6109         /* LP64todo - remove this check when vm_map_commpage64()
6110          * no longer has to stuff in a map_entry for the commpage
6111          * above the map's max_offset.
6112          */
6113         if (start >= map->max_offset) {
6114                 vm_map_unlock(map);
6115                 return KERN_INVALID_ADDRESS;
6116         }
6117
6118         while (1) {
6119                 /*
6120                  *      Lookup the entry.  If it doesn't start in a valid
6121                  *      entry, return an error.
6122                  */
6123                 if (!vm_map_lookup_entry(map, start, &entry)) {
6124                         vm_map_unlock(map);
6125                         return KERN_INVALID_ADDRESS;
6126                 }
6127
6128                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
6129                         start = SUPERPAGE_ROUND_DOWN(start);
6130                         continue;
6131                 }
6132                 break;
6133         }
6134         if (entry->superpage_size) {
6135                 end = SUPERPAGE_ROUND_UP(end);
6136         }
6137
6138         /*
6139          *      Make a first pass to check for protection and address
6140          *      violations.
6141          */
6142
6143         current = entry;
6144         prev = current->vme_start;
6145         while ((current != vm_map_to_entry(map)) &&
6146             (current->vme_start < end)) {
6147                 /*
6148                  * If there is a hole, return an error.
6149                  */
6150                 if (current->vme_start != prev) {
6151                         vm_map_unlock(map);
6152                         return KERN_INVALID_ADDRESS;
6153                 }
6154
6155                 new_max = current->max_protection;
6156                 if ((new_prot & new_max) != new_prot) {
6157                         vm_map_unlock(map);
6158                         return KERN_PROTECTION_FAILURE;
6159                 }
6160
6161                 if ((new_prot & VM_PROT_WRITE) &&
6162                     (new_prot & VM_PROT_EXECUTE) &&
6163 #if XNU_TARGET_OS_OSX
6164                     map->pmap != kernel_pmap &&
6165                     (vm_map_cs_enforcement(map)
6166 #if __arm64__
6167                     || !VM_MAP_IS_EXOTIC(map)
6168 #endif /* __arm64__ */
6169                     ) &&
6170 #endif /* XNU_TARGET_OS_OSX */
6171                     !(current->used_for_jit)) {
6172                         DTRACE_VM3(cs_wx,
6173                             uint64_t, (uint64_t) current->vme_start,
6174                             uint64_t, (uint64_t) current->vme_end,
6175                             vm_prot_t, new_prot);
6176                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6177                             proc_selfpid(),
6178                             (current_task()->bsd_info
6179                             ? proc_name_address(current_task()->bsd_info)
6180                             : "?"),
6181                             __FUNCTION__);
6182                         new_prot &= ~VM_PROT_EXECUTE;
6183                         if (VM_MAP_POLICY_WX_FAIL(map)) {
6184                                 vm_map_unlock(map);
6185                                 return KERN_PROTECTION_FAILURE;
6186                         }
6187                 }
6188
6189                 /*
6190                  * If the task has requested executable lockdown,
6191                  * deny both:
6192                  * - adding executable protections OR
6193                  * - adding write protections to an existing executable mapping.
6194                  */
6195                 if (map->map_disallow_new_exec == TRUE) {
6196                         if ((new_prot & VM_PROT_EXECUTE) ||
6197                             ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6198                                 vm_map_unlock(map);
6199                                 return KERN_PROTECTION_FAILURE;
6200                         }
6201                 }
6202
6203                 prev = current->vme_end;
6204                 current = current->vme_next;
6205         }
6206
6207 #if __arm64__
6208         if (end > prev &&
6209             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6210                 vm_map_entry_t prev_entry;
6211
6212                 prev_entry = current->vme_prev;
6213                 if (prev_entry != vm_map_to_entry(map) &&
6214                     !prev_entry->map_aligned &&
6215                     (vm_map_round_page(prev_entry->vme_end,
6216                     VM_MAP_PAGE_MASK(map))
6217                     == end)) {
6218                         /*
6219                          * The last entry in our range is not "map-aligned"
6220                          * but it would have reached all the way to "end"
6221                          * if it had been map-aligned, so this is not really
6222                          * a hole in the range and we can proceed.
6223                          */
6224                         prev = end;
6225                 }
6226         }
6227 #endif /* __arm64__ */
6228
6229         if (end > prev) {
6230                 vm_map_unlock(map);
6231                 return KERN_INVALID_ADDRESS;
6232         }
6233
6234         /*
6235          *      Go back and fix up protections.
6236          *      Clip to start here if the range starts within
6237          *      the entry.
6238          */
6239
6240         current = entry;
6241         if (current != vm_map_to_entry(map)) {
6242                 /* clip and unnest if necessary */
6243                 vm_map_clip_start(map, current, start);
6244         }
6245
6246         while ((current != vm_map_to_entry(map)) &&
6247             (current->vme_start < end)) {
6248                 vm_prot_t       old_prot;
6249
6250                 vm_map_clip_end(map, current, end);
6251
6252                 if (current->is_sub_map) {
6253                         /* clipping did unnest if needed */
6254                         assert(!current->use_pmap);
6255                 }
6256
6257                 old_prot = current->protection;
6258
6259                 if (set_max) {
6260                         current->max_protection = new_prot;
6261                         current->protection = new_prot & old_prot;
6262                 } else {
6263                         current->protection = new_prot;
6264                 }
6265
6266                 /*
6267                  *      Update physical map if necessary.
6268                  *      If the request is to turn off write protection,
6269                  *      we won't do it for real (in pmap). This is because
6270                  *      it would cause copy-on-write to fail.  We've already
6271                  *      set, the new protection in the map, so if a
6272                  *      write-protect fault occurred, it will be fixed up
6273                  *      properly, COW or not.
6274                  */
6275                 if (current->protection != old_prot) {
6276                         /* Look one level in we support nested pmaps */
6277                         /* from mapped submaps which are direct entries */
6278                         /* in our map */
6279
6280                         vm_prot_t prot;
6281
6282                         prot = current->protection;
6283                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6284                                 prot &= ~VM_PROT_WRITE;
6285                         } else {
6286                                 assert(!VME_OBJECT(current)->code_signed);
6287                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6288                         }
6289
6290                         if (override_nx(map, VME_ALIAS(current)) && prot) {
6291                                 prot |= VM_PROT_EXECUTE;
6292                         }
6293
6294 #if DEVELOPMENT || DEBUG
6295                         if (!(old_prot & VM_PROT_EXECUTE) &&
6296                             (prot & VM_PROT_EXECUTE) &&
6297                             panic_on_unsigned_execute &&
6298                             (proc_selfcsflags() & CS_KILL)) {
6299                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6300                         }
6301 #endif /* DEVELOPMENT || DEBUG */
6302
6303                         if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6304                                 if (current->wired_count) {
6305                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6306                                             map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6307                                 }
6308
6309                                 /* If the pmap layer cares about this
6310                                  * protection type, force a fault for
6311                                  * each page so that vm_fault will
6312                                  * repopulate the page with the full
6313                                  * set of protections.
6314                                  */
6315                                 /*
6316                                  * TODO: We don't seem to need this,
6317                                  * but this is due to an internal
6318                                  * implementation detail of
6319                                  * pmap_protect.  Do we want to rely
6320                                  * on this?
6321                                  */
6322                                 prot = VM_PROT_NONE;
6323                         }
6324
6325                         if (current->is_sub_map && current->use_pmap) {
6326                                 pmap_protect(VME_SUBMAP(current)->pmap,
6327                                     current->vme_start,
6328                                     current->vme_end,
6329                                     prot);
6330                         } else {
6331                                 if (prot & VM_PROT_WRITE) {
6332                                         if (VME_OBJECT(current) == compressor_object) {
6333                                                 /*
6334                                                  * For write requests on the
6335                                                  * compressor, we wil ask the
6336                                                  * pmap layer to prevent us from
6337                                                  * taking a write fault when we
6338                                                  * attempt to access the mapping
6339                                                  * next.
6340                                                  */
6341                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6342                                         }
6343                                 }
6344
6345                                 pmap_protect_options(map->pmap,
6346                                     current->vme_start,
6347                                     current->vme_end,
6348                                     prot,
6349                                     pmap_options,
6350                                     NULL);
6351                         }
6352                 }
6353                 current = current->vme_next;
6354         }
6355
6356         current = entry;
6357         while ((current != vm_map_to_entry(map)) &&
6358             (current->vme_start <= end)) {
6359                 vm_map_simplify_entry(map, current);
6360                 current = current->vme_next;
6361         }
6362
6363         vm_map_unlock(map);
6364         return KERN_SUCCESS;
6365 }
6366
6367 /*
6368  *      vm_map_inherit:
6369  *
6370  *      Sets the inheritance of the specified address
6371  *      range in the target map.  Inheritance
6372  *      affects how the map will be shared with
6373  *      child maps at the time of vm_map_fork.
6374  */
6375 kern_return_t
6376 vm_map_inherit(
6377         vm_map_t        map,
6378         vm_map_offset_t start,
6379         vm_map_offset_t end,
6380         vm_inherit_t    new_inheritance)
6381 {
6382         vm_map_entry_t  entry;
6383         vm_map_entry_t  temp_entry;
6384
6385         vm_map_lock(map);
6386
6387         VM_MAP_RANGE_CHECK(map, start, end);
6388
6389         if (vm_map_lookup_entry(map, start, &temp_entry)) {
6390                 entry = temp_entry;
6391         } else {
6392                 temp_entry = temp_entry->vme_next;
6393                 entry = temp_entry;
6394         }
6395
6396         /* first check entire range for submaps which can't support the */
6397         /* given inheritance. */
6398         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6399                 if (entry->is_sub_map) {
6400                         if (new_inheritance == VM_INHERIT_COPY) {
6401                                 vm_map_unlock(map);
6402                                 return KERN_INVALID_ARGUMENT;
6403                         }
6404                 }
6405
6406                 entry = entry->vme_next;
6407         }
6408
6409         entry = temp_entry;
6410         if (entry != vm_map_to_entry(map)) {
6411                 /* clip and unnest if necessary */
6412                 vm_map_clip_start(map, entry, start);
6413         }
6414
6415         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6416                 vm_map_clip_end(map, entry, end);
6417                 if (entry->is_sub_map) {
6418                         /* clip did unnest if needed */
6419                         assert(!entry->use_pmap);
6420                 }
6421
6422                 entry->inheritance = new_inheritance;
6423
6424                 entry = entry->vme_next;
6425         }
6426
6427         vm_map_unlock(map);
6428         return KERN_SUCCESS;
6429 }
6430
6431 /*
6432  * Update the accounting for the amount of wired memory in this map.  If the user has
6433  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
6434  */
6435
6436 static kern_return_t
6437 add_wire_counts(
6438         vm_map_t        map,
6439         vm_map_entry_t  entry,
6440         boolean_t       user_wire)
6441 {
6442         vm_map_size_t   size;
6443
6444         if (user_wire) {
6445                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
6446
6447                 /*
6448                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6449                  * this map entry.
6450                  */
6451
6452                 if (entry->user_wired_count == 0) {
6453                         size = entry->vme_end - entry->vme_start;
6454
6455                         /*
6456                          * Since this is the first time the user is wiring this map entry, check to see if we're
6457                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6458                          * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value.  There is also
6459                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6460                          * limit, then we fail.
6461                          */
6462
6463                         if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6464                             size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6465                                 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6466                                         os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6467                                 } else {
6468                                         os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6469                                 }
6470                                 return KERN_RESOURCE_SHORTAGE;
6471                         }
6472
6473                         /*
6474                          * The first time the user wires an entry, we also increment the wired_count and add this to
6475                          * the total that has been wired in the map.
6476                          */
6477
6478                         if (entry->wired_count >= MAX_WIRE_COUNT) {
6479                                 return KERN_FAILURE;
6480                         }
6481
6482                         entry->wired_count++;
6483                         map->user_wire_size += size;
6484                 }
6485
6486                 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6487                         return KERN_FAILURE;
6488                 }
6489
6490                 entry->user_wired_count++;
6491         } else {
6492                 /*
6493                  * The kernel's wiring the memory.  Just bump the count and continue.
6494                  */
6495
6496                 if (entry->wired_count >= MAX_WIRE_COUNT) {
6497                         panic("vm_map_wire: too many wirings");
6498                 }
6499
6500                 entry->wired_count++;
6501         }
6502
6503         return KERN_SUCCESS;
6504 }
6505
6506 /*
6507  * Update the memory wiring accounting now that the given map entry is being unwired.
6508  */
6509
6510 static void
6511 subtract_wire_counts(
6512         vm_map_t        map,
6513         vm_map_entry_t  entry,
6514         boolean_t       user_wire)
6515 {
6516         if (user_wire) {
6517                 /*
6518                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6519                  */
6520
6521                 if (entry->user_wired_count == 1) {
6522                         /*
6523                          * We're removing the last user wire reference.  Decrement the wired_count and the total
6524                          * user wired memory for this map.
6525                          */
6526
6527                         assert(entry->wired_count >= 1);
6528                         entry->wired_count--;
6529                         map->user_wire_size -= entry->vme_end - entry->vme_start;
6530                 }
6531
6532                 assert(entry->user_wired_count >= 1);
6533                 entry->user_wired_count--;
6534         } else {
6535                 /*
6536                  * The kernel is unwiring the memory.   Just update the count.
6537                  */
6538
6539                 assert(entry->wired_count >= 1);
6540                 entry->wired_count--;
6541         }
6542 }
6543
6544 int cs_executable_wire = 0;
6545
6546 /*
6547  *      vm_map_wire:
6548  *
6549  *      Sets the pageability of the specified address range in the
6550  *      target map as wired.  Regions specified as not pageable require
6551  *      locked-down physical memory and physical page maps.  The
6552  *      access_type variable indicates types of accesses that must not
6553  *      generate page faults.  This is checked against protection of
6554  *      memory being locked-down.
6555  *
6556  *      The map must not be locked, but a reference must remain to the
6557  *      map throughout the call.
6558  */
6559 static kern_return_t
6560 vm_map_wire_nested(
6561         vm_map_t                map,
6562         vm_map_offset_t         start,
6563         vm_map_offset_t         end,
6564         vm_prot_t               caller_prot,
6565         vm_tag_t                tag,
6566         boolean_t               user_wire,
6567         pmap_t                  map_pmap,
6568         vm_map_offset_t         pmap_addr,
6569         ppnum_t                 *physpage_p)
6570 {
6571         vm_map_entry_t          entry;
6572         vm_prot_t               access_type;
6573         struct vm_map_entry     *first_entry, tmp_entry;
6574         vm_map_t                real_map;
6575         vm_map_offset_t         s, e;
6576         kern_return_t           rc;
6577         boolean_t               need_wakeup;
6578         boolean_t               main_map = FALSE;
6579         wait_interrupt_t        interruptible_state;
6580         thread_t                cur_thread;
6581         unsigned int            last_timestamp;
6582         vm_map_size_t           size;
6583         boolean_t               wire_and_extract;
6584         vm_prot_t               extra_prots;
6585
6586         extra_prots = VM_PROT_COPY;
6587         extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6588 #if XNU_TARGET_OS_OSX
6589         if (map->pmap == kernel_pmap ||
6590             !vm_map_cs_enforcement(map)) {
6591                 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6592         }
6593 #endif /* XNU_TARGET_OS_OSX */
6594
6595         access_type = (caller_prot & VM_PROT_ALL);
6596
6597         wire_and_extract = FALSE;
6598         if (physpage_p != NULL) {
6599                 /*
6600                  * The caller wants the physical page number of the
6601                  * wired page.  We return only one physical page number
6602                  * so this works for only one page at a time.
6603                  */
6604                 if ((end - start) != PAGE_SIZE) {
6605                         return KERN_INVALID_ARGUMENT;
6606                 }
6607                 wire_and_extract = TRUE;
6608                 *physpage_p = 0;
6609         }
6610
6611         vm_map_lock(map);
6612         if (map_pmap == NULL) {
6613                 main_map = TRUE;
6614         }
6615         last_timestamp = map->timestamp;
6616
6617         VM_MAP_RANGE_CHECK(map, start, end);
6618         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6619         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6620
6621         if (start == end) {
6622                 /* We wired what the caller asked for, zero pages */
6623                 vm_map_unlock(map);
6624                 return KERN_SUCCESS;
6625         }
6626
6627         need_wakeup = FALSE;
6628         cur_thread = current_thread();
6629
6630         s = start;
6631         rc = KERN_SUCCESS;
6632
6633         if (vm_map_lookup_entry(map, s, &first_entry)) {
6634                 entry = first_entry;
6635                 /*
6636                  * vm_map_clip_start will be done later.
6637                  * We don't want to unnest any nested submaps here !
6638                  */
6639         } else {
6640                 /* Start address is not in map */
6641                 rc = KERN_INVALID_ADDRESS;
6642                 goto done;
6643         }
6644
6645         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6646                 /*
6647                  * At this point, we have wired from "start" to "s".
6648                  * We still need to wire from "s" to "end".
6649                  *
6650                  * "entry" hasn't been clipped, so it could start before "s"
6651                  * and/or end after "end".
6652                  */
6653
6654                 /* "e" is how far we want to wire in this entry */
6655                 e = entry->vme_end;
6656                 if (e > end) {
6657                         e = end;
6658                 }
6659
6660                 /*
6661                  * If another thread is wiring/unwiring this entry then
6662                  * block after informing other thread to wake us up.
6663                  */
6664                 if (entry->in_transition) {
6665                         wait_result_t wait_result;
6666
6667                         /*
6668                          * We have not clipped the entry.  Make sure that
6669                          * the start address is in range so that the lookup
6670                          * below will succeed.
6671                          * "s" is the current starting point: we've already
6672                          * wired from "start" to "s" and we still have
6673                          * to wire from "s" to "end".
6674                          */
6675
6676                         entry->needs_wakeup = TRUE;
6677
6678                         /*
6679                          * wake up anybody waiting on entries that we have
6680                          * already wired.
6681                          */
6682                         if (need_wakeup) {
6683                                 vm_map_entry_wakeup(map);
6684                                 need_wakeup = FALSE;
6685                         }
6686                         /*
6687                          * User wiring is interruptible
6688                          */
6689                         wait_result = vm_map_entry_wait(map,
6690                             (user_wire) ? THREAD_ABORTSAFE :
6691                             THREAD_UNINT);
6692                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
6693                                 /*
6694                                  * undo the wirings we have done so far
6695                                  * We do not clear the needs_wakeup flag,
6696                                  * because we cannot tell if we were the
6697                                  * only one waiting.
6698                                  */
6699                                 rc = KERN_FAILURE;
6700                                 goto done;
6701                         }
6702
6703                         /*
6704                          * Cannot avoid a lookup here. reset timestamp.
6705                          */
6706                         last_timestamp = map->timestamp;
6707
6708                         /*
6709                          * The entry could have been clipped, look it up again.
6710                          * Worse that can happen is, it may not exist anymore.
6711                          */
6712                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6713                                 /*
6714                                  * User: undo everything upto the previous
6715                                  * entry.  let vm_map_unwire worry about
6716                                  * checking the validity of the range.
6717                                  */
6718                                 rc = KERN_FAILURE;
6719                                 goto done;
6720                         }
6721                         entry = first_entry;
6722                         continue;
6723                 }
6724
6725                 if (entry->is_sub_map) {
6726                         vm_map_offset_t sub_start;
6727                         vm_map_offset_t sub_end;
6728                         vm_map_offset_t local_start;
6729                         vm_map_offset_t local_end;
6730                         pmap_t          pmap;
6731
6732                         if (wire_and_extract) {
6733                                 /*
6734                                  * Wiring would result in copy-on-write
6735                                  * which would not be compatible with
6736                                  * the sharing we have with the original
6737                                  * provider of this memory.
6738                                  */
6739                                 rc = KERN_INVALID_ARGUMENT;
6740                                 goto done;
6741                         }
6742
6743                         vm_map_clip_start(map, entry, s);
6744                         vm_map_clip_end(map, entry, end);
6745
6746                         sub_start = VME_OFFSET(entry);
6747                         sub_end = entry->vme_end;
6748                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6749
6750                         local_end = entry->vme_end;
6751                         if (map_pmap == NULL) {
6752                                 vm_object_t             object;
6753                                 vm_object_offset_t      offset;
6754                                 vm_prot_t               prot;
6755                                 boolean_t               wired;
6756                                 vm_map_entry_t          local_entry;
6757                                 vm_map_version_t         version;
6758                                 vm_map_t                lookup_map;
6759
6760                                 if (entry->use_pmap) {
6761                                         pmap = VME_SUBMAP(entry)->pmap;
6762                                         /* ppc implementation requires that */
6763                                         /* submaps pmap address ranges line */
6764                                         /* up with parent map */
6765 #ifdef notdef
6766                                         pmap_addr = sub_start;
6767 #endif
6768                                         pmap_addr = s;
6769                                 } else {
6770                                         pmap = map->pmap;
6771                                         pmap_addr = s;
6772                                 }
6773
6774                                 if (entry->wired_count) {
6775                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6776                                                 goto done;
6777                                         }
6778
6779                                         /*
6780                                          * The map was not unlocked:
6781                                          * no need to goto re-lookup.
6782                                          * Just go directly to next entry.
6783                                          */
6784                                         entry = entry->vme_next;
6785                                         s = entry->vme_start;
6786                                         continue;
6787                                 }
6788
6789                                 /* call vm_map_lookup_locked to */
6790                                 /* cause any needs copy to be   */
6791                                 /* evaluated */
6792                                 local_start = entry->vme_start;
6793                                 lookup_map = map;
6794                                 vm_map_lock_write_to_read(map);
6795                                 rc = vm_map_lookup_locked(
6796                                         &lookup_map, local_start,
6797                                         (access_type | extra_prots),
6798                                         OBJECT_LOCK_EXCLUSIVE,
6799                                         &version, &object,
6800                                         &offset, &prot, &wired,
6801                                         NULL,
6802                                         &real_map, NULL);
6803                                 if (rc != KERN_SUCCESS) {
6804                                         vm_map_unlock_read(lookup_map);
6805                                         assert(map_pmap == NULL);
6806                                         vm_map_unwire(map, start,
6807                                             s, user_wire);
6808                                         return rc;
6809                                 }
6810                                 vm_object_unlock(object);
6811                                 if (real_map != lookup_map) {
6812                                         vm_map_unlock(real_map);
6813                                 }
6814                                 vm_map_unlock_read(lookup_map);
6815                                 vm_map_lock(map);
6816
6817                                 /* we unlocked, so must re-lookup */
6818                                 if (!vm_map_lookup_entry(map,
6819                                     local_start,
6820                                     &local_entry)) {
6821                                         rc = KERN_FAILURE;
6822                                         goto done;
6823                                 }
6824
6825                                 /*
6826                                  * entry could have been "simplified",
6827                                  * so re-clip
6828                                  */
6829                                 entry = local_entry;
6830                                 assert(s == local_start);
6831                                 vm_map_clip_start(map, entry, s);
6832                                 vm_map_clip_end(map, entry, end);
6833                                 /* re-compute "e" */
6834                                 e = entry->vme_end;
6835                                 if (e > end) {
6836                                         e = end;
6837                                 }
6838
6839                                 /* did we have a change of type? */
6840                                 if (!entry->is_sub_map) {
6841                                         last_timestamp = map->timestamp;
6842                                         continue;
6843                                 }
6844                         } else {
6845                                 local_start = entry->vme_start;
6846                                 pmap = map_pmap;
6847                         }
6848
6849                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6850                                 goto done;
6851                         }
6852
6853                         entry->in_transition = TRUE;
6854
6855                         vm_map_unlock(map);
6856                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6857                             sub_start, sub_end,
6858                             caller_prot, tag,
6859                             user_wire, pmap, pmap_addr,
6860                             NULL);
6861                         vm_map_lock(map);
6862
6863                         /*
6864                          * Find the entry again.  It could have been clipped
6865                          * after we unlocked the map.
6866                          */
6867                         if (!vm_map_lookup_entry(map, local_start,
6868                             &first_entry)) {
6869                                 panic("vm_map_wire: re-lookup failed");
6870                         }
6871                         entry = first_entry;
6872
6873                         assert(local_start == s);
6874                         /* re-compute "e" */
6875                         e = entry->vme_end;
6876                         if (e > end) {
6877                                 e = end;
6878                         }
6879
6880                         last_timestamp = map->timestamp;
6881                         while ((entry != vm_map_to_entry(map)) &&
6882                             (entry->vme_start < e)) {
6883                                 assert(entry->in_transition);
6884                                 entry->in_transition = FALSE;
6885                                 if (entry->needs_wakeup) {
6886                                         entry->needs_wakeup = FALSE;
6887                                         need_wakeup = TRUE;
6888                                 }
6889                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6890                                         subtract_wire_counts(map, entry, user_wire);
6891                                 }
6892                                 entry = entry->vme_next;
6893                         }
6894                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6895                                 goto done;
6896                         }
6897
6898                         /* no need to relookup again */
6899                         s = entry->vme_start;
6900                         continue;
6901                 }
6902
6903                 /*
6904                  * If this entry is already wired then increment
6905                  * the appropriate wire reference count.
6906                  */
6907                 if (entry->wired_count) {
6908                         if ((entry->protection & access_type) != access_type) {
6909                                 /* found a protection problem */
6910
6911                                 /*
6912                                  * XXX FBDP
6913                                  * We should always return an error
6914                                  * in this case but since we didn't
6915                                  * enforce it before, let's do
6916                                  * it only for the new "wire_and_extract"
6917                                  * code path for now...
6918                                  */
6919                                 if (wire_and_extract) {
6920                                         rc = KERN_PROTECTION_FAILURE;
6921                                         goto done;
6922                                 }
6923                         }
6924
6925                         /*
6926                          * entry is already wired down, get our reference
6927                          * after clipping to our range.
6928                          */
6929                         vm_map_clip_start(map, entry, s);
6930                         vm_map_clip_end(map, entry, end);
6931
6932                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6933                                 goto done;
6934                         }
6935
6936                         if (wire_and_extract) {
6937                                 vm_object_t             object;
6938                                 vm_object_offset_t      offset;
6939                                 vm_page_t               m;
6940
6941                                 /*
6942                                  * We don't have to "wire" the page again
6943                                  * bit we still have to "extract" its
6944                                  * physical page number, after some sanity
6945                                  * checks.
6946                                  */
6947                                 assert((entry->vme_end - entry->vme_start)
6948                                     == PAGE_SIZE);
6949                                 assert(!entry->needs_copy);
6950                                 assert(!entry->is_sub_map);
6951                                 assert(VME_OBJECT(entry));
6952                                 if (((entry->vme_end - entry->vme_start)
6953                                     != PAGE_SIZE) ||
6954                                     entry->needs_copy ||
6955                                     entry->is_sub_map ||
6956                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6957                                         rc = KERN_INVALID_ARGUMENT;
6958                                         goto done;
6959                                 }
6960
6961                                 object = VME_OBJECT(entry);
6962                                 offset = VME_OFFSET(entry);
6963                                 /* need exclusive lock to update m->dirty */
6964                                 if (entry->protection & VM_PROT_WRITE) {
6965                                         vm_object_lock(object);
6966                                 } else {
6967                                         vm_object_lock_shared(object);
6968                                 }
6969                                 m = vm_page_lookup(object, offset);
6970                                 assert(m != VM_PAGE_NULL);
6971                                 assert(VM_PAGE_WIRED(m));
6972                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6973                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6974                                         if (entry->protection & VM_PROT_WRITE) {
6975                                                 vm_object_lock_assert_exclusive(
6976                                                         object);
6977                                                 m->vmp_dirty = TRUE;
6978                                         }
6979                                 } else {
6980                                         /* not already wired !? */
6981                                         *physpage_p = 0;
6982                                 }
6983                                 vm_object_unlock(object);
6984                         }
6985
6986                         /* map was not unlocked: no need to relookup */
6987                         entry = entry->vme_next;
6988                         s = entry->vme_start;
6989                         continue;
6990                 }
6991
6992                 /*
6993                  * Unwired entry or wire request transmitted via submap
6994                  */
6995
6996                 /*
6997                  * Wiring would copy the pages to the shadow object.
6998                  * The shadow object would not be code-signed so
6999                  * attempting to execute code from these copied pages
7000                  * would trigger a code-signing violation.
7001                  */
7002
7003                 if ((entry->protection & VM_PROT_EXECUTE)
7004 #if XNU_TARGET_OS_OSX
7005                     &&
7006                     map->pmap != kernel_pmap &&
7007                     (vm_map_cs_enforcement(map)
7008 #if __arm64__
7009                     || !VM_MAP_IS_EXOTIC(map)
7010 #endif /* __arm64__ */
7011                     )
7012 #endif /* XNU_TARGET_OS_OSX */
7013                     ) {
7014 #if MACH_ASSERT
7015                         printf("pid %d[%s] wiring executable range from "
7016                             "0x%llx to 0x%llx: rejected to preserve "
7017                             "code-signing\n",
7018                             proc_selfpid(),
7019                             (current_task()->bsd_info
7020                             ? proc_name_address(current_task()->bsd_info)
7021                             : "?"),
7022                             (uint64_t) entry->vme_start,
7023                             (uint64_t) entry->vme_end);
7024 #endif /* MACH_ASSERT */
7025                         DTRACE_VM2(cs_executable_wire,
7026                             uint64_t, (uint64_t)entry->vme_start,
7027                             uint64_t, (uint64_t)entry->vme_end);
7028                         cs_executable_wire++;
7029                         rc = KERN_PROTECTION_FAILURE;
7030                         goto done;
7031                 }
7032
7033                 /*
7034                  * Perform actions of vm_map_lookup that need the write
7035                  * lock on the map: create a shadow object for a
7036                  * copy-on-write region, or an object for a zero-fill
7037                  * region.
7038                  */
7039                 size = entry->vme_end - entry->vme_start;
7040                 /*
7041                  * If wiring a copy-on-write page, we need to copy it now
7042                  * even if we're only (currently) requesting read access.
7043                  * This is aggressive, but once it's wired we can't move it.
7044                  */
7045                 if (entry->needs_copy) {
7046                         if (wire_and_extract) {
7047                                 /*
7048                                  * We're supposed to share with the original
7049                                  * provider so should not be "needs_copy"
7050                                  */
7051                                 rc = KERN_INVALID_ARGUMENT;
7052                                 goto done;
7053                         }
7054
7055                         VME_OBJECT_SHADOW(entry, size);
7056                         entry->needs_copy = FALSE;
7057                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
7058                         if (wire_and_extract) {
7059                                 /*
7060                                  * We're supposed to share with the original
7061                                  * provider so should already have an object.
7062                                  */
7063                                 rc = KERN_INVALID_ARGUMENT;
7064                                 goto done;
7065                         }
7066                         VME_OBJECT_SET(entry, vm_object_allocate(size));
7067                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
7068                         assert(entry->use_pmap);
7069                 }
7070
7071                 vm_map_clip_start(map, entry, s);
7072                 vm_map_clip_end(map, entry, end);
7073
7074                 /* re-compute "e" */
7075                 e = entry->vme_end;
7076                 if (e > end) {
7077                         e = end;
7078                 }
7079
7080                 /*
7081                  * Check for holes and protection mismatch.
7082                  * Holes: Next entry should be contiguous unless this
7083                  *        is the end of the region.
7084                  * Protection: Access requested must be allowed, unless
7085                  *      wiring is by protection class
7086                  */
7087                 if ((entry->vme_end < end) &&
7088                     ((entry->vme_next == vm_map_to_entry(map)) ||
7089                     (entry->vme_next->vme_start > entry->vme_end))) {
7090                         /* found a hole */
7091                         rc = KERN_INVALID_ADDRESS;
7092                         goto done;
7093                 }
7094                 if ((entry->protection & access_type) != access_type) {
7095                         /* found a protection problem */
7096                         rc = KERN_PROTECTION_FAILURE;
7097                         goto done;
7098                 }
7099
7100                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
7101
7102                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
7103                         goto done;
7104                 }
7105
7106                 entry->in_transition = TRUE;
7107
7108                 /*
7109                  * This entry might get split once we unlock the map.
7110                  * In vm_fault_wire(), we need the current range as
7111                  * defined by this entry.  In order for this to work
7112                  * along with a simultaneous clip operation, we make a
7113                  * temporary copy of this entry and use that for the
7114                  * wiring.  Note that the underlying objects do not
7115                  * change during a clip.
7116                  */
7117                 tmp_entry = *entry;
7118
7119                 /*
7120                  * The in_transition state guarentees that the entry
7121                  * (or entries for this range, if split occured) will be
7122                  * there when the map lock is acquired for the second time.
7123                  */
7124                 vm_map_unlock(map);
7125
7126                 if (!user_wire && cur_thread != THREAD_NULL) {
7127                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
7128                 } else {
7129                         interruptible_state = THREAD_UNINT;
7130                 }
7131
7132                 if (map_pmap) {
7133                         rc = vm_fault_wire(map,
7134                             &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7135                             physpage_p);
7136                 } else {
7137                         rc = vm_fault_wire(map,
7138                             &tmp_entry, caller_prot, tag, map->pmap,
7139                             tmp_entry.vme_start,
7140                             physpage_p);
7141                 }
7142
7143                 if (!user_wire && cur_thread != THREAD_NULL) {
7144                         thread_interrupt_level(interruptible_state);
7145                 }
7146
7147                 vm_map_lock(map);
7148
7149                 if (last_timestamp + 1 != map->timestamp) {
7150                         /*
7151                          * Find the entry again.  It could have been clipped
7152                          * after we unlocked the map.
7153                          */
7154                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7155                             &first_entry)) {
7156                                 panic("vm_map_wire: re-lookup failed");
7157                         }
7158
7159                         entry = first_entry;
7160                 }
7161
7162                 last_timestamp = map->timestamp;
7163
7164                 while ((entry != vm_map_to_entry(map)) &&
7165                     (entry->vme_start < tmp_entry.vme_end)) {
7166                         assert(entry->in_transition);
7167                         entry->in_transition = FALSE;
7168                         if (entry->needs_wakeup) {
7169                                 entry->needs_wakeup = FALSE;
7170                                 need_wakeup = TRUE;
7171                         }
7172                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
7173                                 subtract_wire_counts(map, entry, user_wire);
7174                         }
7175                         entry = entry->vme_next;
7176                 }
7177
7178                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
7179                         goto done;
7180                 }
7181
7182                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7183                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
7184                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7185                         /* found a "new" hole */
7186                         s = tmp_entry.vme_end;
7187                         rc = KERN_INVALID_ADDRESS;
7188                         goto done;
7189                 }
7190
7191                 s = entry->vme_start;
7192         } /* end while loop through map entries */
7193
7194 done:
7195         if (rc == KERN_SUCCESS) {
7196                 /* repair any damage we may have made to the VM map */
7197                 vm_map_simplify_range(map, start, end);
7198         }
7199
7200         vm_map_unlock(map);
7201
7202         /*
7203          * wake up anybody waiting on entries we wired.
7204          */
7205         if (need_wakeup) {
7206                 vm_map_entry_wakeup(map);
7207         }
7208
7209         if (rc != KERN_SUCCESS) {
7210                 /* undo what has been wired so far */
7211                 vm_map_unwire_nested(map, start, s, user_wire,
7212                     map_pmap, pmap_addr);
7213                 if (physpage_p) {
7214                         *physpage_p = 0;
7215                 }
7216         }
7217
7218         return rc;
7219 }
7220
7221 kern_return_t
7222 vm_map_wire_external(
7223         vm_map_t                map,
7224         vm_map_offset_t         start,
7225         vm_map_offset_t         end,
7226         vm_prot_t               caller_prot,
7227         boolean_t               user_wire)
7228 {
7229         kern_return_t   kret;
7230
7231         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7232             user_wire, (pmap_t)NULL, 0, NULL);
7233         return kret;
7234 }
7235
7236 kern_return_t
7237 vm_map_wire_kernel(
7238         vm_map_t                map,
7239         vm_map_offset_t         start,
7240         vm_map_offset_t         end,
7241         vm_prot_t               caller_prot,
7242         vm_tag_t                tag,
7243         boolean_t               user_wire)
7244 {
7245         kern_return_t   kret;
7246
7247         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7248             user_wire, (pmap_t)NULL, 0, NULL);
7249         return kret;
7250 }
7251
7252 kern_return_t
7253 vm_map_wire_and_extract_external(
7254         vm_map_t        map,
7255         vm_map_offset_t start,
7256         vm_prot_t       caller_prot,
7257         boolean_t       user_wire,
7258         ppnum_t         *physpage_p)
7259 {
7260         kern_return_t   kret;
7261
7262         kret = vm_map_wire_nested(map,
7263             start,
7264             start + VM_MAP_PAGE_SIZE(map),
7265             caller_prot,
7266             vm_tag_bt(),
7267             user_wire,
7268             (pmap_t)NULL,
7269             0,
7270             physpage_p);
7271         if (kret != KERN_SUCCESS &&
7272             physpage_p != NULL) {
7273                 *physpage_p = 0;
7274         }
7275         return kret;
7276 }
7277
7278 kern_return_t
7279 vm_map_wire_and_extract_kernel(
7280         vm_map_t        map,
7281         vm_map_offset_t start,
7282         vm_prot_t       caller_prot,
7283         vm_tag_t        tag,
7284         boolean_t       user_wire,
7285         ppnum_t         *physpage_p)
7286 {
7287         kern_return_t   kret;
7288
7289         kret = vm_map_wire_nested(map,
7290             start,
7291             start + VM_MAP_PAGE_SIZE(map),
7292             caller_prot,
7293             tag,
7294             user_wire,
7295             (pmap_t)NULL,
7296             0,
7297             physpage_p);
7298         if (kret != KERN_SUCCESS &&
7299             physpage_p != NULL) {
7300                 *physpage_p = 0;
7301         }
7302         return kret;
7303 }
7304
7305 /*
7306  *      vm_map_unwire:
7307  *
7308  *      Sets the pageability of the specified address range in the target
7309  *      as pageable.  Regions specified must have been wired previously.
7310  *
7311  *      The map must not be locked, but a reference must remain to the map
7312  *      throughout the call.
7313  *
7314  *      Kernel will panic on failures.  User unwire ignores holes and
7315  *      unwired and intransition entries to avoid losing memory by leaving
7316  *      it unwired.
7317  */
7318 static kern_return_t
7319 vm_map_unwire_nested(
7320         vm_map_t                map,
7321         vm_map_offset_t         start,
7322         vm_map_offset_t         end,
7323         boolean_t               user_wire,
7324         pmap_t                  map_pmap,
7325         vm_map_offset_t         pmap_addr)
7326 {
7327         vm_map_entry_t          entry;
7328         struct vm_map_entry     *first_entry, tmp_entry;
7329         boolean_t               need_wakeup;
7330         boolean_t               main_map = FALSE;
7331         unsigned int            last_timestamp;
7332
7333         vm_map_lock(map);
7334         if (map_pmap == NULL) {
7335                 main_map = TRUE;
7336         }
7337         last_timestamp = map->timestamp;
7338
7339         VM_MAP_RANGE_CHECK(map, start, end);
7340         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7341         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7342
7343         if (start == end) {
7344                 /* We unwired what the caller asked for: zero pages */
7345                 vm_map_unlock(map);
7346                 return KERN_SUCCESS;
7347         }
7348
7349         if (vm_map_lookup_entry(map, start, &first_entry)) {
7350                 entry = first_entry;
7351                 /*
7352                  * vm_map_clip_start will be done later.
7353                  * We don't want to unnest any nested sub maps here !
7354                  */
7355         } else {
7356                 if (!user_wire) {
7357                         panic("vm_map_unwire: start not found");
7358                 }
7359                 /*      Start address is not in map. */
7360                 vm_map_unlock(map);
7361                 return KERN_INVALID_ADDRESS;
7362         }
7363
7364         if (entry->superpage_size) {
7365                 /* superpages are always wired */
7366                 vm_map_unlock(map);
7367                 return KERN_INVALID_ADDRESS;
7368         }
7369
7370         need_wakeup = FALSE;
7371         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7372                 if (entry->in_transition) {
7373                         /*
7374                          * 1)
7375                          * Another thread is wiring down this entry. Note
7376                          * that if it is not for the other thread we would
7377                          * be unwiring an unwired entry.  This is not
7378                          * permitted.  If we wait, we will be unwiring memory
7379                          * we did not wire.
7380                          *
7381                          * 2)
7382                          * Another thread is unwiring this entry.  We did not
7383                          * have a reference to it, because if we did, this
7384                          * entry will not be getting unwired now.
7385                          */
7386                         if (!user_wire) {
7387                                 /*
7388                                  * XXX FBDP
7389                                  * This could happen:  there could be some
7390                                  * overlapping vslock/vsunlock operations
7391                                  * going on.
7392                                  * We should probably just wait and retry,
7393                                  * but then we have to be careful that this
7394                                  * entry could get "simplified" after
7395                                  * "in_transition" gets unset and before
7396                                  * we re-lookup the entry, so we would
7397                                  * have to re-clip the entry to avoid
7398                                  * re-unwiring what we have already unwired...
7399                                  * See vm_map_wire_nested().
7400                                  *
7401                                  * Or we could just ignore "in_transition"
7402                                  * here and proceed to decement the wired
7403                                  * count(s) on this entry.  That should be fine
7404                                  * as long as "wired_count" doesn't drop all
7405                                  * the way to 0 (and we should panic if THAT
7406                                  * happens).
7407                                  */
7408                                 panic("vm_map_unwire: in_transition entry");
7409                         }
7410
7411                         entry = entry->vme_next;
7412                         continue;
7413                 }
7414
7415                 if (entry->is_sub_map) {
7416                         vm_map_offset_t sub_start;
7417                         vm_map_offset_t sub_end;
7418                         vm_map_offset_t local_end;
7419                         pmap_t          pmap;
7420
7421                         vm_map_clip_start(map, entry, start);
7422                         vm_map_clip_end(map, entry, end);
7423
7424                         sub_start = VME_OFFSET(entry);
7425                         sub_end = entry->vme_end - entry->vme_start;
7426                         sub_end += VME_OFFSET(entry);
7427                         local_end = entry->vme_end;
7428                         if (map_pmap == NULL) {
7429                                 if (entry->use_pmap) {
7430                                         pmap = VME_SUBMAP(entry)->pmap;
7431                                         pmap_addr = sub_start;
7432                                 } else {
7433                                         pmap = map->pmap;
7434                                         pmap_addr = start;
7435                                 }
7436                                 if (entry->wired_count == 0 ||
7437                                     (user_wire && entry->user_wired_count == 0)) {
7438                                         if (!user_wire) {
7439                                                 panic("vm_map_unwire: entry is unwired");
7440                                         }
7441                                         entry = entry->vme_next;
7442                                         continue;
7443                                 }
7444
7445                                 /*
7446                                  * Check for holes
7447                                  * Holes: Next entry should be contiguous unless
7448                                  * this is the end of the region.
7449                                  */
7450                                 if (((entry->vme_end < end) &&
7451                                     ((entry->vme_next == vm_map_to_entry(map)) ||
7452                                     (entry->vme_next->vme_start
7453                                     > entry->vme_end)))) {
7454                                         if (!user_wire) {
7455                                                 panic("vm_map_unwire: non-contiguous region");
7456                                         }
7457 /*
7458  *                                       entry = entry->vme_next;
7459  *                                       continue;
7460  */
7461                                 }
7462
7463                                 subtract_wire_counts(map, entry, user_wire);
7464
7465                                 if (entry->wired_count != 0) {
7466                                         entry = entry->vme_next;
7467                                         continue;
7468                                 }
7469
7470                                 entry->in_transition = TRUE;
7471                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
7472
7473                                 /*
7474                                  * We can unlock the map now. The in_transition state
7475                                  * guarantees existance of the entry.
7476                                  */
7477                                 vm_map_unlock(map);
7478                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7479                                     sub_start, sub_end, user_wire, pmap, pmap_addr);
7480                                 vm_map_lock(map);
7481
7482                                 if (last_timestamp + 1 != map->timestamp) {
7483                                         /*
7484                                          * Find the entry again.  It could have been
7485                                          * clipped or deleted after we unlocked the map.
7486                                          */
7487                                         if (!vm_map_lookup_entry(map,
7488                                             tmp_entry.vme_start,
7489                                             &first_entry)) {
7490                                                 if (!user_wire) {
7491                                                         panic("vm_map_unwire: re-lookup failed");
7492                                                 }
7493                                                 entry = first_entry->vme_next;
7494                                         } else {
7495                                                 entry = first_entry;
7496                                         }
7497                                 }
7498                                 last_timestamp = map->timestamp;
7499
7500                                 /*
7501                                  * clear transition bit for all constituent entries
7502                                  * that were in the original entry (saved in
7503                                  * tmp_entry).  Also check for waiters.
7504                                  */
7505                                 while ((entry != vm_map_to_entry(map)) &&
7506                                     (entry->vme_start < tmp_entry.vme_end)) {
7507                                         assert(entry->in_transition);
7508                                         entry->in_transition = FALSE;
7509                                         if (entry->needs_wakeup) {
7510                                                 entry->needs_wakeup = FALSE;
7511                                                 need_wakeup = TRUE;
7512                                         }
7513                                         entry = entry->vme_next;
7514                                 }
7515                                 continue;
7516                         } else {
7517                                 vm_map_unlock(map);
7518                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7519                                     sub_start, sub_end, user_wire, map_pmap,
7520                                     pmap_addr);
7521                                 vm_map_lock(map);
7522
7523                                 if (last_timestamp + 1 != map->timestamp) {
7524                                         /*
7525                                          * Find the entry again.  It could have been
7526                                          * clipped or deleted after we unlocked the map.
7527                                          */
7528                                         if (!vm_map_lookup_entry(map,
7529                                             tmp_entry.vme_start,
7530                                             &first_entry)) {
7531                                                 if (!user_wire) {
7532                                                         panic("vm_map_unwire: re-lookup failed");
7533                                                 }
7534                                                 entry = first_entry->vme_next;
7535                                         } else {
7536                                                 entry = first_entry;
7537                                         }
7538                                 }
7539                                 last_timestamp = map->timestamp;
7540                         }
7541                 }
7542
7543
7544                 if ((entry->wired_count == 0) ||
7545                     (user_wire && entry->user_wired_count == 0)) {
7546                         if (!user_wire) {
7547                                 panic("vm_map_unwire: entry is unwired");
7548                         }
7549
7550                         entry = entry->vme_next;
7551                         continue;
7552                 }
7553
7554                 assert(entry->wired_count > 0 &&
7555                     (!user_wire || entry->user_wired_count > 0));
7556
7557                 vm_map_clip_start(map, entry, start);
7558                 vm_map_clip_end(map, entry, end);
7559
7560                 /*
7561                  * Check for holes
7562                  * Holes: Next entry should be contiguous unless
7563                  *        this is the end of the region.
7564                  */
7565                 if (((entry->vme_end < end) &&
7566                     ((entry->vme_next == vm_map_to_entry(map)) ||
7567                     (entry->vme_next->vme_start > entry->vme_end)))) {
7568                         if (!user_wire) {
7569                                 panic("vm_map_unwire: non-contiguous region");
7570                         }
7571                         entry = entry->vme_next;
7572                         continue;
7573                 }
7574
7575                 subtract_wire_counts(map, entry, user_wire);
7576
7577                 if (entry->wired_count != 0) {
7578                         entry = entry->vme_next;
7579                         continue;
7580                 }
7581
7582                 if (entry->zero_wired_pages) {
7583                         entry->zero_wired_pages = FALSE;
7584                 }
7585
7586                 entry->in_transition = TRUE;
7587                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
7588
7589                 /*
7590                  * We can unlock the map now. The in_transition state
7591                  * guarantees existance of the entry.
7592                  */
7593                 vm_map_unlock(map);
7594                 if (map_pmap) {
7595                         vm_fault_unwire(map,
7596                             &tmp_entry, FALSE, map_pmap, pmap_addr);
7597                 } else {
7598                         vm_fault_unwire(map,
7599                             &tmp_entry, FALSE, map->pmap,
7600                             tmp_entry.vme_start);
7601                 }
7602                 vm_map_lock(map);
7603
7604                 if (last_timestamp + 1 != map->timestamp) {
7605                         /*
7606                          * Find the entry again.  It could have been clipped
7607                          * or deleted after we unlocked the map.
7608                          */
7609                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7610                             &first_entry)) {
7611                                 if (!user_wire) {
7612                                         panic("vm_map_unwire: re-lookup failed");
7613                                 }
7614                                 entry = first_entry->vme_next;
7615                         } else {
7616                                 entry = first_entry;
7617                         }
7618                 }
7619                 last_timestamp = map->timestamp;
7620
7621                 /*
7622                  * clear transition bit for all constituent entries that
7623                  * were in the original entry (saved in tmp_entry).  Also
7624                  * check for waiters.
7625                  */
7626                 while ((entry != vm_map_to_entry(map)) &&
7627                     (entry->vme_start < tmp_entry.vme_end)) {
7628                         assert(entry->in_transition);
7629                         entry->in_transition = FALSE;
7630                         if (entry->needs_wakeup) {
7631                                 entry->needs_wakeup = FALSE;
7632                                 need_wakeup = TRUE;
7633                         }
7634                         entry = entry->vme_next;
7635                 }
7636         }
7637
7638         /*
7639          * We might have fragmented the address space when we wired this
7640          * range of addresses.  Attempt to re-coalesce these VM map entries
7641          * with their neighbors now that they're no longer wired.
7642          * Under some circumstances, address space fragmentation can
7643          * prevent VM object shadow chain collapsing, which can cause
7644          * swap space leaks.
7645          */
7646         vm_map_simplify_range(map, start, end);
7647
7648         vm_map_unlock(map);
7649         /*
7650          * wake up anybody waiting on entries that we have unwired.
7651          */
7652         if (need_wakeup) {
7653                 vm_map_entry_wakeup(map);
7654         }
7655         return KERN_SUCCESS;
7656 }
7657
7658 kern_return_t
7659 vm_map_unwire(
7660         vm_map_t                map,
7661         vm_map_offset_t         start,
7662         vm_map_offset_t         end,
7663         boolean_t               user_wire)
7664 {
7665         return vm_map_unwire_nested(map, start, end,
7666                    user_wire, (pmap_t)NULL, 0);
7667 }
7668
7669
7670 /*
7671  *      vm_map_entry_delete:    [ internal use only ]
7672  *
7673  *      Deallocate the given entry from the target map.
7674  */
7675 static void
7676 vm_map_entry_delete(
7677         vm_map_t        map,
7678         vm_map_entry_t  entry)
7679 {
7680         vm_map_offset_t s, e;
7681         vm_object_t     object;
7682         vm_map_t        submap;
7683
7684         s = entry->vme_start;
7685         e = entry->vme_end;
7686         assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7687         assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7688         if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7689                 assert(page_aligned(s));
7690                 assert(page_aligned(e));
7691         }
7692         if (entry->map_aligned == TRUE) {
7693                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7694                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7695         }
7696         assert(entry->wired_count == 0);
7697         assert(entry->user_wired_count == 0);
7698         assert(!entry->permanent);
7699
7700         if (entry->is_sub_map) {
7701                 object = NULL;
7702                 submap = VME_SUBMAP(entry);
7703         } else {
7704                 submap = NULL;
7705                 object = VME_OBJECT(entry);
7706         }
7707
7708         vm_map_store_entry_unlink(map, entry);
7709         map->size -= e - s;
7710
7711         vm_map_entry_dispose(map, entry);
7712
7713         vm_map_unlock(map);
7714         /*
7715          *      Deallocate the object only after removing all
7716          *      pmap entries pointing to its pages.
7717          */
7718         if (submap) {
7719                 vm_map_deallocate(submap);
7720         } else {
7721                 vm_object_deallocate(object);
7722         }
7723 }
7724
7725 void
7726 vm_map_submap_pmap_clean(
7727         vm_map_t        map,
7728         vm_map_offset_t start,
7729         vm_map_offset_t end,
7730         vm_map_t        sub_map,
7731         vm_map_offset_t offset)
7732 {
7733         vm_map_offset_t submap_start;
7734         vm_map_offset_t submap_end;
7735         vm_map_size_t   remove_size;
7736         vm_map_entry_t  entry;
7737
7738         submap_end = offset + (end - start);
7739         submap_start = offset;
7740
7741         vm_map_lock_read(sub_map);
7742         if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7743                 remove_size = (entry->vme_end - entry->vme_start);
7744                 if (offset > entry->vme_start) {
7745                         remove_size -= offset - entry->vme_start;
7746                 }
7747
7748
7749                 if (submap_end < entry->vme_end) {
7750                         remove_size -=
7751                             entry->vme_end - submap_end;
7752                 }
7753                 if (entry->is_sub_map) {
7754                         vm_map_submap_pmap_clean(
7755                                 sub_map,
7756                                 start,
7757                                 start + remove_size,
7758                                 VME_SUBMAP(entry),
7759                                 VME_OFFSET(entry));
7760                 } else {
7761                         if (map->mapped_in_other_pmaps &&
7762                             os_ref_get_count(&map->map_refcnt) != 0 &&
7763                             VME_OBJECT(entry) != NULL) {
7764                                 vm_object_pmap_protect_options(
7765                                         VME_OBJECT(entry),
7766                                         (VME_OFFSET(entry) +
7767                                         offset -
7768                                         entry->vme_start),
7769                                         remove_size,
7770                                         PMAP_NULL,
7771                                         PAGE_SIZE,
7772                                         entry->vme_start,
7773                                         VM_PROT_NONE,
7774                                         PMAP_OPTIONS_REMOVE);
7775                         } else {
7776                                 pmap_remove(map->pmap,
7777                                     (addr64_t)start,
7778                                     (addr64_t)(start + remove_size));
7779                         }
7780                 }
7781         }
7782
7783         entry = entry->vme_next;
7784
7785         while ((entry != vm_map_to_entry(sub_map))
7786             && (entry->vme_start < submap_end)) {
7787                 remove_size = (entry->vme_end - entry->vme_start);
7788                 if (submap_end < entry->vme_end) {
7789                         remove_size -= entry->vme_end - submap_end;
7790                 }
7791                 if (entry->is_sub_map) {
7792                         vm_map_submap_pmap_clean(
7793                                 sub_map,
7794                                 (start + entry->vme_start) - offset,
7795                                 ((start + entry->vme_start) - offset) + remove_size,
7796                                 VME_SUBMAP(entry),
7797                                 VME_OFFSET(entry));
7798                 } else {
7799                         if (map->mapped_in_other_pmaps &&
7800                             os_ref_get_count(&map->map_refcnt) != 0 &&
7801                             VME_OBJECT(entry) != NULL) {
7802                                 vm_object_pmap_protect_options(
7803                                         VME_OBJECT(entry),
7804                                         VME_OFFSET(entry),
7805                                         remove_size,
7806                                         PMAP_NULL,
7807                                         PAGE_SIZE,
7808                                         entry->vme_start,
7809                                         VM_PROT_NONE,
7810                                         PMAP_OPTIONS_REMOVE);
7811                         } else {
7812                                 pmap_remove(map->pmap,
7813                                     (addr64_t)((start + entry->vme_start)
7814                                     - offset),
7815                                     (addr64_t)(((start + entry->vme_start)
7816                                     - offset) + remove_size));
7817                         }
7818                 }
7819                 entry = entry->vme_next;
7820         }
7821         vm_map_unlock_read(sub_map);
7822         return;
7823 }
7824
7825 /*
7826  *     virt_memory_guard_ast:
7827  *
7828  *     Handle the AST callout for a virtual memory guard.
7829  *         raise an EXC_GUARD exception and terminate the task
7830  *     if configured to do so.
7831  */
7832 void
7833 virt_memory_guard_ast(
7834         thread_t thread,
7835         mach_exception_data_type_t code,
7836         mach_exception_data_type_t subcode)
7837 {
7838         task_t task = thread->task;
7839         assert(task != kernel_task);
7840         assert(task == current_task());
7841         uint32_t behavior;
7842
7843         behavior = task->task_exc_guard;
7844
7845         /* Is delivery enabled */
7846         if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7847                 return;
7848         }
7849
7850         /* If only once, make sure we're that once */
7851         while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7852                 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7853
7854                 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7855                         break;
7856                 }
7857                 behavior = task->task_exc_guard;
7858                 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7859                         return;
7860                 }
7861         }
7862
7863         /* Raise exception via corpse fork or synchronously */
7864         if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7865             (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7866                 task_violated_guard(code, subcode, NULL);
7867         } else {
7868                 task_exception_notify(EXC_GUARD, code, subcode);
7869         }
7870
7871         /* Terminate the task if desired */
7872         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7873                 task_bsdtask_kill(current_task());
7874         }
7875 }
7876
7877 /*
7878  *     vm_map_guard_exception:
7879  *
7880  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7881  *
7882  *     Right now, we do this when we find nothing mapped, or a
7883  *     gap in the mapping when a user address space deallocate
7884  *     was requested. We report the address of the first gap found.
7885  */
7886 static void
7887 vm_map_guard_exception(
7888         vm_map_offset_t gap_start,
7889         unsigned reason)
7890 {
7891         mach_exception_code_t code = 0;
7892         unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7893         unsigned int target = 0; /* should we pass in pid associated with map? */
7894         mach_exception_data_type_t subcode = (uint64_t)gap_start;
7895         boolean_t fatal = FALSE;
7896
7897         task_t task = current_task();
7898
7899         /* Can't deliver exceptions to kernel task */
7900         if (task == kernel_task) {
7901                 return;
7902         }
7903
7904         EXC_GUARD_ENCODE_TYPE(code, guard_type);
7905         EXC_GUARD_ENCODE_FLAVOR(code, reason);
7906         EXC_GUARD_ENCODE_TARGET(code, target);
7907
7908         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7909                 fatal = TRUE;
7910         }
7911         thread_guard_violation(current_thread(), code, subcode, fatal);
7912 }
7913
7914 /*
7915  *      vm_map_delete:  [ internal use only ]
7916  *
7917  *      Deallocates the given address range from the target map.
7918  *      Removes all user wirings. Unwires one kernel wiring if
7919  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7920  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7921  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7922  *
7923  *      This routine is called with map locked and leaves map locked.
7924  */
7925 static kern_return_t
7926 vm_map_delete(
7927         vm_map_t                map,
7928         vm_map_offset_t         start,
7929         vm_map_offset_t         end,
7930         int                     flags,
7931         vm_map_t                zap_map)
7932 {
7933         vm_map_entry_t          entry, next;
7934         struct   vm_map_entry   *first_entry, tmp_entry;
7935         vm_map_offset_t         s;
7936         vm_object_t             object;
7937         boolean_t               need_wakeup;
7938         unsigned int            last_timestamp = ~0; /* unlikely value */
7939         int                     interruptible;
7940         vm_map_offset_t         gap_start;
7941         __unused vm_map_offset_t save_start = start;
7942         __unused vm_map_offset_t save_end = end;
7943         const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7944         const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7945
7946         if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7947                 gap_start = FIND_GAP;
7948         } else {
7949                 gap_start = GAPS_OK;
7950         }
7951
7952         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7953             THREAD_ABORTSAFE : THREAD_UNINT;
7954
7955         /*
7956          * All our DMA I/O operations in IOKit are currently done by
7957          * wiring through the map entries of the task requesting the I/O.
7958          * Because of this, we must always wait for kernel wirings
7959          * to go away on the entries before deleting them.
7960          *
7961          * Any caller who wants to actually remove a kernel wiring
7962          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7963          * properly remove one wiring instead of blasting through
7964          * them all.
7965          */
7966         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7967
7968         while (1) {
7969                 /*
7970                  *      Find the start of the region, and clip it
7971                  */
7972                 if (vm_map_lookup_entry(map, start, &first_entry)) {
7973                         entry = first_entry;
7974                         if (map == kalloc_map &&
7975                             (entry->vme_start != start ||
7976                             entry->vme_end != end)) {
7977                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7978                                     "mismatched entry %p [0x%llx:0x%llx]\n",
7979                                     map,
7980                                     (uint64_t)start,
7981                                     (uint64_t)end,
7982                                     entry,
7983                                     (uint64_t)entry->vme_start,
7984                                     (uint64_t)entry->vme_end);
7985                         }
7986
7987                         /*
7988                          * If in a superpage, extend the range to include the start of the mapping.
7989                          */
7990                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7991                                 start = SUPERPAGE_ROUND_DOWN(start);
7992                                 continue;
7993                         }
7994
7995                         if (start == entry->vme_start) {
7996                                 /*
7997                                  * No need to clip.  We don't want to cause
7998                                  * any unnecessary unnesting in this case...
7999                                  */
8000                         } else {
8001                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8002                                     entry->map_aligned &&
8003                                     !VM_MAP_PAGE_ALIGNED(
8004                                             start,
8005                                             VM_MAP_PAGE_MASK(map))) {
8006                                         /*
8007                                          * The entry will no longer be
8008                                          * map-aligned after clipping
8009                                          * and the caller said it's OK.
8010                                          */
8011                                         entry->map_aligned = FALSE;
8012                                 }
8013                                 if (map == kalloc_map) {
8014                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
8015                                             " clipping %p at 0x%llx\n",
8016                                             map,
8017                                             (uint64_t)start,
8018                                             (uint64_t)end,
8019                                             entry,
8020                                             (uint64_t)start);
8021                                 }
8022                                 vm_map_clip_start(map, entry, start);
8023                         }
8024
8025                         /*
8026                          *      Fix the lookup hint now, rather than each
8027                          *      time through the loop.
8028                          */
8029                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8030                 } else {
8031                         if (map->pmap == kernel_pmap &&
8032                             os_ref_get_count(&map->map_refcnt) != 0) {
8033                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8034                                     "no map entry at 0x%llx\n",
8035                                     map,
8036                                     (uint64_t)start,
8037                                     (uint64_t)end,
8038                                     (uint64_t)start);
8039                         }
8040                         entry = first_entry->vme_next;
8041                         if (gap_start == FIND_GAP) {
8042                                 gap_start = start;
8043                         }
8044                 }
8045                 break;
8046         }
8047         if (entry->superpage_size) {
8048                 end = SUPERPAGE_ROUND_UP(end);
8049         }
8050
8051         need_wakeup = FALSE;
8052         /*
8053          *      Step through all entries in this region
8054          */
8055         s = entry->vme_start;
8056         while ((entry != vm_map_to_entry(map)) && (s < end)) {
8057                 /*
8058                  * At this point, we have deleted all the memory entries
8059                  * between "start" and "s".  We still need to delete
8060                  * all memory entries between "s" and "end".
8061                  * While we were blocked and the map was unlocked, some
8062                  * new memory entries could have been re-allocated between
8063                  * "start" and "s" and we don't want to mess with those.
8064                  * Some of those entries could even have been re-assembled
8065                  * with an entry after "s" (in vm_map_simplify_entry()), so
8066                  * we may have to vm_map_clip_start() again.
8067                  */
8068
8069                 if (entry->vme_start >= s) {
8070                         /*
8071                          * This entry starts on or after "s"
8072                          * so no need to clip its start.
8073                          */
8074                 } else {
8075                         /*
8076                          * This entry has been re-assembled by a
8077                          * vm_map_simplify_entry().  We need to
8078                          * re-clip its start.
8079                          */
8080                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8081                             entry->map_aligned &&
8082                             !VM_MAP_PAGE_ALIGNED(s,
8083                             VM_MAP_PAGE_MASK(map))) {
8084                                 /*
8085                                  * The entry will no longer be map-aligned
8086                                  * after clipping and the caller said it's OK.
8087                                  */
8088                                 entry->map_aligned = FALSE;
8089                         }
8090                         if (map == kalloc_map) {
8091                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8092                                     "clipping %p at 0x%llx\n",
8093                                     map,
8094                                     (uint64_t)start,
8095                                     (uint64_t)end,
8096                                     entry,
8097                                     (uint64_t)s);
8098                         }
8099                         vm_map_clip_start(map, entry, s);
8100                 }
8101                 if (entry->vme_end <= end) {
8102                         /*
8103                          * This entry is going away completely, so no need
8104                          * to clip and possibly cause an unnecessary unnesting.
8105                          */
8106                 } else {
8107                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8108                             entry->map_aligned &&
8109                             !VM_MAP_PAGE_ALIGNED(end,
8110                             VM_MAP_PAGE_MASK(map))) {
8111                                 /*
8112                                  * The entry will no longer be map-aligned
8113                                  * after clipping and the caller said it's OK.
8114                                  */
8115                                 entry->map_aligned = FALSE;
8116                         }
8117                         if (map == kalloc_map) {
8118                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8119                                     "clipping %p at 0x%llx\n",
8120                                     map,
8121                                     (uint64_t)start,
8122                                     (uint64_t)end,
8123                                     entry,
8124                                     (uint64_t)end);
8125                         }
8126                         vm_map_clip_end(map, entry, end);
8127                 }
8128
8129                 if (entry->permanent) {
8130                         if (map->pmap == kernel_pmap) {
8131                                 panic("%s(%p,0x%llx,0x%llx): "
8132                                     "attempt to remove permanent "
8133                                     "VM map entry "
8134                                     "%p [0x%llx:0x%llx]\n",
8135                                     __FUNCTION__,
8136                                     map,
8137                                     (uint64_t) start,
8138                                     (uint64_t) end,
8139                                     entry,
8140                                     (uint64_t) entry->vme_start,
8141                                     (uint64_t) entry->vme_end);
8142                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8143 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8144                                 entry->permanent = FALSE;
8145                         } else {
8146                                 if (vm_map_executable_immutable_verbose) {
8147                                         printf("%d[%s] %s(0x%llx,0x%llx): "
8148                                             "permanent entry [0x%llx:0x%llx] "
8149                                             "prot 0x%x/0x%x\n",
8150                                             proc_selfpid(),
8151                                             (current_task()->bsd_info
8152                                             ? proc_name_address(current_task()->bsd_info)
8153                                             : "?"),
8154                                             __FUNCTION__,
8155                                             (uint64_t) start,
8156                                             (uint64_t) end,
8157                                             (uint64_t)entry->vme_start,
8158                                             (uint64_t)entry->vme_end,
8159                                             entry->protection,
8160                                             entry->max_protection);
8161                                 }
8162                                 /*
8163                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8164                                  */
8165                                 DTRACE_VM5(vm_map_delete_permanent,
8166                                     vm_map_offset_t, entry->vme_start,
8167                                     vm_map_offset_t, entry->vme_end,
8168                                     vm_prot_t, entry->protection,
8169                                     vm_prot_t, entry->max_protection,
8170                                     int, VME_ALIAS(entry));
8171                         }
8172                 }
8173
8174
8175                 if (entry->in_transition) {
8176                         wait_result_t wait_result;
8177
8178                         /*
8179                          * Another thread is wiring/unwiring this entry.
8180                          * Let the other thread know we are waiting.
8181                          */
8182                         assert(s == entry->vme_start);
8183                         entry->needs_wakeup = TRUE;
8184
8185                         /*
8186                          * wake up anybody waiting on entries that we have
8187                          * already unwired/deleted.
8188                          */
8189                         if (need_wakeup) {
8190                                 vm_map_entry_wakeup(map);
8191                                 need_wakeup = FALSE;
8192                         }
8193
8194                         wait_result = vm_map_entry_wait(map, interruptible);
8195
8196                         if (interruptible &&
8197                             wait_result == THREAD_INTERRUPTED) {
8198                                 /*
8199                                  * We do not clear the needs_wakeup flag,
8200                                  * since we cannot tell if we were the only one.
8201                                  */
8202                                 return KERN_ABORTED;
8203                         }
8204
8205                         /*
8206                          * The entry could have been clipped or it
8207                          * may not exist anymore.  Look it up again.
8208                          */
8209                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
8210                                 /*
8211                                  * User: use the next entry
8212                                  */
8213                                 if (gap_start == FIND_GAP) {
8214                                         gap_start = s;
8215                                 }
8216                                 entry = first_entry->vme_next;
8217                                 s = entry->vme_start;
8218                         } else {
8219                                 entry = first_entry;
8220                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8221                         }
8222                         last_timestamp = map->timestamp;
8223                         continue;
8224                 } /* end in_transition */
8225
8226                 if (entry->wired_count) {
8227                         boolean_t       user_wire;
8228
8229                         user_wire = entry->user_wired_count > 0;
8230
8231                         /*
8232                          *      Remove a kernel wiring if requested
8233                          */
8234                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
8235                                 entry->wired_count--;
8236                         }
8237
8238                         /*
8239                          *      Remove all user wirings for proper accounting
8240                          */
8241                         if (entry->user_wired_count > 0) {
8242                                 while (entry->user_wired_count) {
8243                                         subtract_wire_counts(map, entry, user_wire);
8244                                 }
8245                         }
8246
8247                         if (entry->wired_count != 0) {
8248                                 assert(map != kernel_map);
8249                                 /*
8250                                  * Cannot continue.  Typical case is when
8251                                  * a user thread has physical io pending on
8252                                  * on this page.  Either wait for the
8253                                  * kernel wiring to go away or return an
8254                                  * error.
8255                                  */
8256                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8257                                         wait_result_t wait_result;
8258
8259                                         assert(s == entry->vme_start);
8260                                         entry->needs_wakeup = TRUE;
8261                                         wait_result = vm_map_entry_wait(map,
8262                                             interruptible);
8263
8264                                         if (interruptible &&
8265                                             wait_result == THREAD_INTERRUPTED) {
8266                                                 /*
8267                                                  * We do not clear the
8268                                                  * needs_wakeup flag, since we
8269                                                  * cannot tell if we were the
8270                                                  * only one.
8271                                                  */
8272                                                 return KERN_ABORTED;
8273                                         }
8274
8275                                         /*
8276                                          * The entry could have been clipped or
8277                                          * it may not exist anymore.  Look it
8278                                          * up again.
8279                                          */
8280                                         if (!vm_map_lookup_entry(map, s,
8281                                             &first_entry)) {
8282                                                 assert(map != kernel_map);
8283                                                 /*
8284                                                  * User: use the next entry
8285                                                  */
8286                                                 if (gap_start == FIND_GAP) {
8287                                                         gap_start = s;
8288                                                 }
8289                                                 entry = first_entry->vme_next;
8290                                                 s = entry->vme_start;
8291                                         } else {
8292                                                 entry = first_entry;
8293                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8294                                         }
8295                                         last_timestamp = map->timestamp;
8296                                         continue;
8297                                 } else {
8298                                         return KERN_FAILURE;
8299                                 }
8300                         }
8301
8302                         entry->in_transition = TRUE;
8303                         /*
8304                          * copy current entry.  see comment in vm_map_wire()
8305                          */
8306                         tmp_entry = *entry;
8307                         assert(s == entry->vme_start);
8308
8309                         /*
8310                          * We can unlock the map now. The in_transition
8311                          * state guarentees existance of the entry.
8312                          */
8313                         vm_map_unlock(map);
8314
8315                         if (tmp_entry.is_sub_map) {
8316                                 vm_map_t sub_map;
8317                                 vm_map_offset_t sub_start, sub_end;
8318                                 pmap_t pmap;
8319                                 vm_map_offset_t pmap_addr;
8320
8321
8322                                 sub_map = VME_SUBMAP(&tmp_entry);
8323                                 sub_start = VME_OFFSET(&tmp_entry);
8324                                 sub_end = sub_start + (tmp_entry.vme_end -
8325                                     tmp_entry.vme_start);
8326                                 if (tmp_entry.use_pmap) {
8327                                         pmap = sub_map->pmap;
8328                                         pmap_addr = tmp_entry.vme_start;
8329                                 } else {
8330                                         pmap = map->pmap;
8331                                         pmap_addr = tmp_entry.vme_start;
8332                                 }
8333                                 (void) vm_map_unwire_nested(sub_map,
8334                                     sub_start, sub_end,
8335                                     user_wire,
8336                                     pmap, pmap_addr);
8337                         } else {
8338                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8339                                         pmap_protect_options(
8340                                                 map->pmap,
8341                                                 tmp_entry.vme_start,
8342                                                 tmp_entry.vme_end,
8343                                                 VM_PROT_NONE,
8344                                                 PMAP_OPTIONS_REMOVE,
8345                                                 NULL);
8346                                 }
8347                                 vm_fault_unwire(map, &tmp_entry,
8348                                     VME_OBJECT(&tmp_entry) == kernel_object,
8349                                     map->pmap, tmp_entry.vme_start);
8350                         }
8351
8352                         vm_map_lock(map);
8353
8354                         if (last_timestamp + 1 != map->timestamp) {
8355                                 /*
8356                                  * Find the entry again.  It could have
8357                                  * been clipped after we unlocked the map.
8358                                  */
8359                                 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8360                                         assert((map != kernel_map) &&
8361                                             (!entry->is_sub_map));
8362                                         if (gap_start == FIND_GAP) {
8363                                                 gap_start = s;
8364                                         }
8365                                         first_entry = first_entry->vme_next;
8366                                         s = first_entry->vme_start;
8367                                 } else {
8368                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8369                                 }
8370                         } else {
8371                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8372                                 first_entry = entry;
8373                         }
8374
8375                         last_timestamp = map->timestamp;
8376
8377                         entry = first_entry;
8378                         while ((entry != vm_map_to_entry(map)) &&
8379                             (entry->vme_start < tmp_entry.vme_end)) {
8380                                 assert(entry->in_transition);
8381                                 entry->in_transition = FALSE;
8382                                 if (entry->needs_wakeup) {
8383                                         entry->needs_wakeup = FALSE;
8384                                         need_wakeup = TRUE;
8385                                 }
8386                                 entry = entry->vme_next;
8387                         }
8388                         /*
8389                          * We have unwired the entry(s).  Go back and
8390                          * delete them.
8391                          */
8392                         entry = first_entry;
8393                         continue;
8394                 }
8395
8396                 /* entry is unwired */
8397                 assert(entry->wired_count == 0);
8398                 assert(entry->user_wired_count == 0);
8399
8400                 assert(s == entry->vme_start);
8401
8402                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8403                         /*
8404                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8405                          * vm_map_delete(), some map entries might have been
8406                          * transferred to a "zap_map", which doesn't have a
8407                          * pmap.  The original pmap has already been flushed
8408                          * in the vm_map_delete() call targeting the original
8409                          * map, but when we get to destroying the "zap_map",
8410                          * we don't have any pmap to flush, so let's just skip
8411                          * all this.
8412                          */
8413                 } else if (entry->is_sub_map) {
8414                         assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8415                             "map %p (%d) entry %p submap %p (%d)\n",
8416                             map, VM_MAP_PAGE_SHIFT(map), entry,
8417                             VME_SUBMAP(entry),
8418                             VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8419                         if (entry->use_pmap) {
8420                                 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8421                                     "map %p (%d) entry %p submap %p (%d)\n",
8422                                     map, VM_MAP_PAGE_SHIFT(map), entry,
8423                                     VME_SUBMAP(entry),
8424                                     VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8425 #ifndef NO_NESTED_PMAP
8426                                 int pmap_flags;
8427
8428                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8429                                         /*
8430                                          * This is the final cleanup of the
8431                                          * address space being terminated.
8432                                          * No new mappings are expected and
8433                                          * we don't really need to unnest the
8434                                          * shared region (and lose the "global"
8435                                          * pmap mappings, if applicable).
8436                                          *
8437                                          * Tell the pmap layer that we're
8438                                          * "clean" wrt nesting.
8439                                          */
8440                                         pmap_flags = PMAP_UNNEST_CLEAN;
8441                                 } else {
8442                                         /*
8443                                          * We're unmapping part of the nested
8444                                          * shared region, so we can't keep the
8445                                          * nested pmap.
8446                                          */
8447                                         pmap_flags = 0;
8448                                 }
8449                                 pmap_unnest_options(
8450                                         map->pmap,
8451                                         (addr64_t)entry->vme_start,
8452                                         entry->vme_end - entry->vme_start,
8453                                         pmap_flags);
8454 #endif  /* NO_NESTED_PMAP */
8455                                 if (map->mapped_in_other_pmaps &&
8456                                     os_ref_get_count(&map->map_refcnt) != 0) {
8457                                         /* clean up parent map/maps */
8458                                         vm_map_submap_pmap_clean(
8459                                                 map, entry->vme_start,
8460                                                 entry->vme_end,
8461                                                 VME_SUBMAP(entry),
8462                                                 VME_OFFSET(entry));
8463                                 }
8464                         } else {
8465                                 vm_map_submap_pmap_clean(
8466                                         map, entry->vme_start, entry->vme_end,
8467                                         VME_SUBMAP(entry),
8468                                         VME_OFFSET(entry));
8469                         }
8470                 } else if (VME_OBJECT(entry) != kernel_object &&
8471                     VME_OBJECT(entry) != compressor_object) {
8472                         object = VME_OBJECT(entry);
8473                         if (map->mapped_in_other_pmaps &&
8474                             os_ref_get_count(&map->map_refcnt) != 0) {
8475                                 vm_object_pmap_protect_options(
8476                                         object, VME_OFFSET(entry),
8477                                         entry->vme_end - entry->vme_start,
8478                                         PMAP_NULL,
8479                                         PAGE_SIZE,
8480                                         entry->vme_start,
8481                                         VM_PROT_NONE,
8482                                         PMAP_OPTIONS_REMOVE);
8483                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8484                             (map->pmap == kernel_pmap)) {
8485                                 /* Remove translations associated
8486                                  * with this range unless the entry
8487                                  * does not have an object, or
8488                                  * it's the kernel map or a descendant
8489                                  * since the platform could potentially
8490                                  * create "backdoor" mappings invisible
8491                                  * to the VM. It is expected that
8492                                  * objectless, non-kernel ranges
8493                                  * do not have such VM invisible
8494                                  * translations.
8495                                  */
8496                                 pmap_remove_options(map->pmap,
8497                                     (addr64_t)entry->vme_start,
8498                                     (addr64_t)entry->vme_end,
8499                                     PMAP_OPTIONS_REMOVE);
8500                         }
8501                 }
8502
8503                 if (entry->iokit_acct) {
8504                         /* alternate accounting */
8505                         DTRACE_VM4(vm_map_iokit_unmapped_region,
8506                             vm_map_t, map,
8507                             vm_map_offset_t, entry->vme_start,
8508                             vm_map_offset_t, entry->vme_end,
8509                             int, VME_ALIAS(entry));
8510                         vm_map_iokit_unmapped_region(map,
8511                             (entry->vme_end -
8512                             entry->vme_start));
8513                         entry->iokit_acct = FALSE;
8514                         entry->use_pmap = FALSE;
8515                 }
8516
8517                 /*
8518                  * All pmap mappings for this map entry must have been
8519                  * cleared by now.
8520                  */
8521 #if DEBUG
8522                 assert(vm_map_pmap_is_empty(map,
8523                     entry->vme_start,
8524                     entry->vme_end));
8525 #endif /* DEBUG */
8526
8527                 next = entry->vme_next;
8528
8529                 if (map->pmap == kernel_pmap &&
8530                     os_ref_get_count(&map->map_refcnt) != 0 &&
8531                     entry->vme_end < end &&
8532                     (next == vm_map_to_entry(map) ||
8533                     next->vme_start != entry->vme_end)) {
8534                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
8535                             "hole after %p at 0x%llx\n",
8536                             map,
8537                             (uint64_t)start,
8538                             (uint64_t)end,
8539                             entry,
8540                             (uint64_t)entry->vme_end);
8541                 }
8542
8543                 /*
8544                  * If the desired range didn't end with "entry", then there is a gap if
8545                  * we wrapped around to the start of the map or if "entry" and "next"
8546                  * aren't contiguous.
8547                  *
8548                  * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8549                  * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8550                  */
8551                 if (gap_start == FIND_GAP &&
8552                     vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8553                     (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8554                         gap_start = entry->vme_end;
8555                 }
8556                 s = next->vme_start;
8557                 last_timestamp = map->timestamp;
8558
8559                 if (entry->permanent) {
8560                         /*
8561                          * A permanent entry can not be removed, so leave it
8562                          * in place but remove all access permissions.
8563                          */
8564                         entry->protection = VM_PROT_NONE;
8565                         entry->max_protection = VM_PROT_NONE;
8566                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8567                     zap_map != VM_MAP_NULL) {
8568                         vm_map_size_t entry_size;
8569                         /*
8570                          * The caller wants to save the affected VM map entries
8571                          * into the "zap_map".  The caller will take care of
8572                          * these entries.
8573                          */
8574                         /* unlink the entry from "map" ... */
8575                         vm_map_store_entry_unlink(map, entry);
8576                         /* ... and add it to the end of the "zap_map" */
8577                         vm_map_store_entry_link(zap_map,
8578                             vm_map_last_entry(zap_map),
8579                             entry,
8580                             VM_MAP_KERNEL_FLAGS_NONE);
8581                         entry_size = entry->vme_end - entry->vme_start;
8582                         map->size -= entry_size;
8583                         zap_map->size += entry_size;
8584                         /* we didn't unlock the map, so no timestamp increase */
8585                         last_timestamp--;
8586                 } else {
8587                         vm_map_entry_delete(map, entry);
8588                         /* vm_map_entry_delete unlocks the map */
8589                         vm_map_lock(map);
8590                 }
8591
8592                 entry = next;
8593
8594                 if (entry == vm_map_to_entry(map)) {
8595                         break;
8596                 }
8597                 if (last_timestamp + 1 != map->timestamp) {
8598                         /*
8599                          * We are responsible for deleting everything
8600                          * from the given space. If someone has interfered,
8601                          * we pick up where we left off. Back fills should
8602                          * be all right for anyone, except map_delete, and
8603                          * we have to assume that the task has been fully
8604                          * disabled before we get here
8605                          */
8606                         if (!vm_map_lookup_entry(map, s, &entry)) {
8607                                 entry = entry->vme_next;
8608
8609                                 /*
8610                                  * Nothing found for s. If we weren't already done, then there is a gap.
8611                                  */
8612                                 if (gap_start == FIND_GAP && s < end) {
8613                                         gap_start = s;
8614                                 }
8615                                 s = entry->vme_start;
8616                         } else {
8617                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8618                         }
8619                         /*
8620                          * others can not only allocate behind us, we can
8621                          * also see coalesce while we don't have the map lock
8622                          */
8623                         if (entry == vm_map_to_entry(map)) {
8624                                 break;
8625                         }
8626                 }
8627                 last_timestamp = map->timestamp;
8628         }
8629
8630         if (map->wait_for_space) {
8631                 thread_wakeup((event_t) map);
8632         }
8633         /*
8634          * wake up anybody waiting on entries that we have already deleted.
8635          */
8636         if (need_wakeup) {
8637                 vm_map_entry_wakeup(map);
8638         }
8639
8640         if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8641                 DTRACE_VM3(kern_vm_deallocate_gap,
8642                     vm_map_offset_t, gap_start,
8643                     vm_map_offset_t, save_start,
8644                     vm_map_offset_t, save_end);
8645                 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8646                         vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8647                 }
8648         }
8649
8650         return KERN_SUCCESS;
8651 }
8652
8653
8654 /*
8655  *      vm_map_terminate:
8656  *
8657  *      Clean out a task's map.
8658  */
8659 kern_return_t
8660 vm_map_terminate(
8661         vm_map_t        map)
8662 {
8663         vm_map_lock(map);
8664         map->terminated = TRUE;
8665         vm_map_unlock(map);
8666
8667         return vm_map_remove(map,
8668                    map->min_offset,
8669                    map->max_offset,
8670                    /*
8671                     * Final cleanup:
8672                     * + no unnesting
8673                     * + remove immutable mappings
8674                     * + allow gaps in range
8675                     */
8676                    (VM_MAP_REMOVE_NO_UNNESTING |
8677                    VM_MAP_REMOVE_IMMUTABLE |
8678                    VM_MAP_REMOVE_GAPS_OK));
8679 }
8680
8681 /*
8682  *      vm_map_remove:
8683  *
8684  *      Remove the given address range from the target map.
8685  *      This is the exported form of vm_map_delete.
8686  */
8687 kern_return_t
8688 vm_map_remove(
8689         vm_map_t        map,
8690         vm_map_offset_t start,
8691         vm_map_offset_t end,
8692         boolean_t      flags)
8693 {
8694         kern_return_t   result;
8695
8696         vm_map_lock(map);
8697         VM_MAP_RANGE_CHECK(map, start, end);
8698         /*
8699          * For the zone maps, the kernel controls the allocation/freeing of memory.
8700          * Any free to the zone maps should be within the bounds of the map and
8701          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8702          * free to the zone maps into a no-op, there is a problem and we should
8703          * panic.
8704          */
8705         if ((start == end) && zone_maps_owned(start, 1)) {
8706                 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start);
8707         }
8708         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8709         vm_map_unlock(map);
8710
8711         return result;
8712 }
8713
8714 /*
8715  *      vm_map_remove_locked:
8716  *
8717  *      Remove the given address range from the target locked map.
8718  *      This is the exported form of vm_map_delete.
8719  */
8720 kern_return_t
8721 vm_map_remove_locked(
8722         vm_map_t        map,
8723         vm_map_offset_t start,
8724         vm_map_offset_t end,
8725         boolean_t       flags)
8726 {
8727         kern_return_t   result;
8728
8729         VM_MAP_RANGE_CHECK(map, start, end);
8730         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8731         return result;
8732 }
8733
8734
8735 /*
8736  *      Routine:        vm_map_copy_allocate
8737  *
8738  *      Description:
8739  *              Allocates and initializes a map copy object.
8740  */
8741 static vm_map_copy_t
8742 vm_map_copy_allocate(void)
8743 {
8744         vm_map_copy_t new_copy;
8745
8746         new_copy = zalloc(vm_map_copy_zone);
8747         bzero(new_copy, sizeof(*new_copy));
8748         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8749         vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8750         vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8751         return new_copy;
8752 }
8753
8754 /*
8755  *      Routine:        vm_map_copy_discard
8756  *
8757  *      Description:
8758  *              Dispose of a map copy object (returned by
8759  *              vm_map_copyin).
8760  */
8761 void
8762 vm_map_copy_discard(
8763         vm_map_copy_t   copy)
8764 {
8765         if (copy == VM_MAP_COPY_NULL) {
8766                 return;
8767         }
8768
8769         switch (copy->type) {
8770         case VM_MAP_COPY_ENTRY_LIST:
8771                 while (vm_map_copy_first_entry(copy) !=
8772                     vm_map_copy_to_entry(copy)) {
8773                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8774
8775                         vm_map_copy_entry_unlink(copy, entry);
8776                         if (entry->is_sub_map) {
8777                                 vm_map_deallocate(VME_SUBMAP(entry));
8778                         } else {
8779                                 vm_object_deallocate(VME_OBJECT(entry));
8780                         }
8781                         vm_map_copy_entry_dispose(copy, entry);
8782                 }
8783                 break;
8784         case VM_MAP_COPY_OBJECT:
8785                 vm_object_deallocate(copy->cpy_object);
8786                 break;
8787         case VM_MAP_COPY_KERNEL_BUFFER:
8788
8789                 /*
8790                  * The vm_map_copy_t and possibly the data buffer were
8791                  * allocated by a single call to kheap_alloc(), i.e. the
8792                  * vm_map_copy_t was not allocated out of the zone.
8793                  */
8794                 if (copy->size > msg_ool_size_small || copy->offset) {
8795                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8796                             (long long)copy->size, (long long)copy->offset);
8797                 }
8798                 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy->size);
8799         }
8800         zfree(vm_map_copy_zone, copy);
8801 }
8802
8803 /*
8804  *      Routine:        vm_map_copy_copy
8805  *
8806  *      Description:
8807  *                      Move the information in a map copy object to
8808  *                      a new map copy object, leaving the old one
8809  *                      empty.
8810  *
8811  *                      This is used by kernel routines that need
8812  *                      to look at out-of-line data (in copyin form)
8813  *                      before deciding whether to return SUCCESS.
8814  *                      If the routine returns FAILURE, the original
8815  *                      copy object will be deallocated; therefore,
8816  *                      these routines must make a copy of the copy
8817  *                      object and leave the original empty so that
8818  *                      deallocation will not fail.
8819  */
8820 vm_map_copy_t
8821 vm_map_copy_copy(
8822         vm_map_copy_t   copy)
8823 {
8824         vm_map_copy_t   new_copy;
8825
8826         if (copy == VM_MAP_COPY_NULL) {
8827                 return VM_MAP_COPY_NULL;
8828         }
8829
8830         /*
8831          * Allocate a new copy object, and copy the information
8832          * from the old one into it.
8833          */
8834
8835         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8836         memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8837 #if __has_feature(ptrauth_calls)
8838         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8839                 new_copy->cpy_kdata = copy->cpy_kdata;
8840         }
8841 #endif
8842
8843         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8844                 /*
8845                  * The links in the entry chain must be
8846                  * changed to point to the new copy object.
8847                  */
8848                 vm_map_copy_first_entry(copy)->vme_prev
8849                         = vm_map_copy_to_entry(new_copy);
8850                 vm_map_copy_last_entry(copy)->vme_next
8851                         = vm_map_copy_to_entry(new_copy);
8852         }
8853
8854         /*
8855          * Change the old copy object into one that contains
8856          * nothing to be deallocated.
8857          */
8858         copy->type = VM_MAP_COPY_OBJECT;
8859         copy->cpy_object = VM_OBJECT_NULL;
8860
8861         /*
8862          * Return the new object.
8863          */
8864         return new_copy;
8865 }
8866
8867 static kern_return_t
8868 vm_map_overwrite_submap_recurse(
8869         vm_map_t        dst_map,
8870         vm_map_offset_t dst_addr,
8871         vm_map_size_t   dst_size)
8872 {
8873         vm_map_offset_t dst_end;
8874         vm_map_entry_t  tmp_entry;
8875         vm_map_entry_t  entry;
8876         kern_return_t   result;
8877         boolean_t       encountered_sub_map = FALSE;
8878
8879
8880
8881         /*
8882          *      Verify that the destination is all writeable
8883          *      initially.  We have to trunc the destination
8884          *      address and round the copy size or we'll end up
8885          *      splitting entries in strange ways.
8886          */
8887
8888         dst_end = vm_map_round_page(dst_addr + dst_size,
8889             VM_MAP_PAGE_MASK(dst_map));
8890         vm_map_lock(dst_map);
8891
8892 start_pass_1:
8893         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8894                 vm_map_unlock(dst_map);
8895                 return KERN_INVALID_ADDRESS;
8896         }
8897
8898         vm_map_clip_start(dst_map,
8899             tmp_entry,
8900             vm_map_trunc_page(dst_addr,
8901             VM_MAP_PAGE_MASK(dst_map)));
8902         if (tmp_entry->is_sub_map) {
8903                 /* clipping did unnest if needed */
8904                 assert(!tmp_entry->use_pmap);
8905         }
8906
8907         for (entry = tmp_entry;;) {
8908                 vm_map_entry_t  next;
8909
8910                 next = entry->vme_next;
8911                 while (entry->is_sub_map) {
8912                         vm_map_offset_t sub_start;
8913                         vm_map_offset_t sub_end;
8914                         vm_map_offset_t local_end;
8915
8916                         if (entry->in_transition) {
8917                                 /*
8918                                  * Say that we are waiting, and wait for entry.
8919                                  */
8920                                 entry->needs_wakeup = TRUE;
8921                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8922
8923                                 goto start_pass_1;
8924                         }
8925
8926                         encountered_sub_map = TRUE;
8927                         sub_start = VME_OFFSET(entry);
8928
8929                         if (entry->vme_end < dst_end) {
8930                                 sub_end = entry->vme_end;
8931                         } else {
8932                                 sub_end = dst_end;
8933                         }
8934                         sub_end -= entry->vme_start;
8935                         sub_end += VME_OFFSET(entry);
8936                         local_end = entry->vme_end;
8937                         vm_map_unlock(dst_map);
8938
8939                         result = vm_map_overwrite_submap_recurse(
8940                                 VME_SUBMAP(entry),
8941                                 sub_start,
8942                                 sub_end - sub_start);
8943
8944                         if (result != KERN_SUCCESS) {
8945                                 return result;
8946                         }
8947                         if (dst_end <= entry->vme_end) {
8948                                 return KERN_SUCCESS;
8949                         }
8950                         vm_map_lock(dst_map);
8951                         if (!vm_map_lookup_entry(dst_map, local_end,
8952                             &tmp_entry)) {
8953                                 vm_map_unlock(dst_map);
8954                                 return KERN_INVALID_ADDRESS;
8955                         }
8956                         entry = tmp_entry;
8957                         next = entry->vme_next;
8958                 }
8959
8960                 if (!(entry->protection & VM_PROT_WRITE)) {
8961                         vm_map_unlock(dst_map);
8962                         return KERN_PROTECTION_FAILURE;
8963                 }
8964
8965                 /*
8966                  *      If the entry is in transition, we must wait
8967                  *      for it to exit that state.  Anything could happen
8968                  *      when we unlock the map, so start over.
8969                  */
8970                 if (entry->in_transition) {
8971                         /*
8972                          * Say that we are waiting, and wait for entry.
8973                          */
8974                         entry->needs_wakeup = TRUE;
8975                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8976
8977                         goto start_pass_1;
8978                 }
8979
8980 /*
8981  *              our range is contained completely within this map entry
8982  */
8983                 if (dst_end <= entry->vme_end) {
8984                         vm_map_unlock(dst_map);
8985                         return KERN_SUCCESS;
8986                 }
8987 /*
8988  *              check that range specified is contiguous region
8989  */
8990                 if ((next == vm_map_to_entry(dst_map)) ||
8991                     (next->vme_start != entry->vme_end)) {
8992                         vm_map_unlock(dst_map);
8993                         return KERN_INVALID_ADDRESS;
8994                 }
8995
8996                 /*
8997                  *      Check for permanent objects in the destination.
8998                  */
8999                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9000                     ((!VME_OBJECT(entry)->internal) ||
9001                     (VME_OBJECT(entry)->true_share))) {
9002                         if (encountered_sub_map) {
9003                                 vm_map_unlock(dst_map);
9004                                 return KERN_FAILURE;
9005                         }
9006                 }
9007
9008
9009                 entry = next;
9010         }/* for */
9011         vm_map_unlock(dst_map);
9012         return KERN_SUCCESS;
9013 }
9014
9015 /*
9016  *      Routine:        vm_map_copy_overwrite
9017  *
9018  *      Description:
9019  *              Copy the memory described by the map copy
9020  *              object (copy; returned by vm_map_copyin) onto
9021  *              the specified destination region (dst_map, dst_addr).
9022  *              The destination must be writeable.
9023  *
9024  *              Unlike vm_map_copyout, this routine actually
9025  *              writes over previously-mapped memory.  If the
9026  *              previous mapping was to a permanent (user-supplied)
9027  *              memory object, it is preserved.
9028  *
9029  *              The attributes (protection and inheritance) of the
9030  *              destination region are preserved.
9031  *
9032  *              If successful, consumes the copy object.
9033  *              Otherwise, the caller is responsible for it.
9034  *
9035  *      Implementation notes:
9036  *              To overwrite aligned temporary virtual memory, it is
9037  *              sufficient to remove the previous mapping and insert
9038  *              the new copy.  This replacement is done either on
9039  *              the whole region (if no permanent virtual memory
9040  *              objects are embedded in the destination region) or
9041  *              in individual map entries.
9042  *
9043  *              To overwrite permanent virtual memory , it is necessary
9044  *              to copy each page, as the external memory management
9045  *              interface currently does not provide any optimizations.
9046  *
9047  *              Unaligned memory also has to be copied.  It is possible
9048  *              to use 'vm_trickery' to copy the aligned data.  This is
9049  *              not done but not hard to implement.
9050  *
9051  *              Once a page of permanent memory has been overwritten,
9052  *              it is impossible to interrupt this function; otherwise,
9053  *              the call would be neither atomic nor location-independent.
9054  *              The kernel-state portion of a user thread must be
9055  *              interruptible.
9056  *
9057  *              It may be expensive to forward all requests that might
9058  *              overwrite permanent memory (vm_write, vm_copy) to
9059  *              uninterruptible kernel threads.  This routine may be
9060  *              called by interruptible threads; however, success is
9061  *              not guaranteed -- if the request cannot be performed
9062  *              atomically and interruptibly, an error indication is
9063  *              returned.
9064  */
9065
9066 static kern_return_t
9067 vm_map_copy_overwrite_nested(
9068         vm_map_t                dst_map,
9069         vm_map_address_t        dst_addr,
9070         vm_map_copy_t           copy,
9071         boolean_t               interruptible,
9072         pmap_t                  pmap,
9073         boolean_t               discard_on_success)
9074 {
9075         vm_map_offset_t         dst_end;
9076         vm_map_entry_t          tmp_entry;
9077         vm_map_entry_t          entry;
9078         kern_return_t           kr;
9079         boolean_t               aligned = TRUE;
9080         boolean_t               contains_permanent_objects = FALSE;
9081         boolean_t               encountered_sub_map = FALSE;
9082         vm_map_offset_t         base_addr;
9083         vm_map_size_t           copy_size;
9084         vm_map_size_t           total_size;
9085         int                     copy_page_shift;
9086
9087
9088         /*
9089          *      Check for null copy object.
9090          */
9091
9092         if (copy == VM_MAP_COPY_NULL) {
9093                 return KERN_SUCCESS;
9094         }
9095
9096         /*
9097          * Assert that the vm_map_copy is coming from the right
9098          * zone and hasn't been forged
9099          */
9100         vm_map_copy_require(copy);
9101
9102         /*
9103          *      Check for special kernel buffer allocated
9104          *      by new_ipc_kmsg_copyin.
9105          */
9106
9107         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9108                 return vm_map_copyout_kernel_buffer(
9109                         dst_map, &dst_addr,
9110                         copy, copy->size, TRUE, discard_on_success);
9111         }
9112
9113         /*
9114          *      Only works for entry lists at the moment.  Will
9115          *      support page lists later.
9116          */
9117
9118         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9119
9120         if (copy->size == 0) {
9121                 if (discard_on_success) {
9122                         vm_map_copy_discard(copy);
9123                 }
9124                 return KERN_SUCCESS;
9125         }
9126
9127         copy_page_shift = copy->cpy_hdr.page_shift;
9128
9129         /*
9130          *      Verify that the destination is all writeable
9131          *      initially.  We have to trunc the destination
9132          *      address and round the copy size or we'll end up
9133          *      splitting entries in strange ways.
9134          */
9135
9136         if (!VM_MAP_PAGE_ALIGNED(copy->size,
9137             VM_MAP_PAGE_MASK(dst_map)) ||
9138             !VM_MAP_PAGE_ALIGNED(copy->offset,
9139             VM_MAP_PAGE_MASK(dst_map)) ||
9140             !VM_MAP_PAGE_ALIGNED(dst_addr,
9141             VM_MAP_PAGE_MASK(dst_map)) ||
9142             copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9143                 aligned = FALSE;
9144                 dst_end = vm_map_round_page(dst_addr + copy->size,
9145                     VM_MAP_PAGE_MASK(dst_map));
9146         } else {
9147                 dst_end = dst_addr + copy->size;
9148         }
9149
9150         vm_map_lock(dst_map);
9151
9152         /* LP64todo - remove this check when vm_map_commpage64()
9153          * no longer has to stuff in a map_entry for the commpage
9154          * above the map's max_offset.
9155          */
9156         if (dst_addr >= dst_map->max_offset) {
9157                 vm_map_unlock(dst_map);
9158                 return KERN_INVALID_ADDRESS;
9159         }
9160
9161 start_pass_1:
9162         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9163                 vm_map_unlock(dst_map);
9164                 return KERN_INVALID_ADDRESS;
9165         }
9166         vm_map_clip_start(dst_map,
9167             tmp_entry,
9168             vm_map_trunc_page(dst_addr,
9169             VM_MAP_PAGE_MASK(dst_map)));
9170         for (entry = tmp_entry;;) {
9171                 vm_map_entry_t  next = entry->vme_next;
9172
9173                 while (entry->is_sub_map) {
9174                         vm_map_offset_t sub_start;
9175                         vm_map_offset_t sub_end;
9176                         vm_map_offset_t local_end;
9177
9178                         if (entry->in_transition) {
9179                                 /*
9180                                  * Say that we are waiting, and wait for entry.
9181                                  */
9182                                 entry->needs_wakeup = TRUE;
9183                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9184
9185                                 goto start_pass_1;
9186                         }
9187
9188                         local_end = entry->vme_end;
9189                         if (!(entry->needs_copy)) {
9190                                 /* if needs_copy we are a COW submap */
9191                                 /* in such a case we just replace so */
9192                                 /* there is no need for the follow-  */
9193                                 /* ing check.                        */
9194                                 encountered_sub_map = TRUE;
9195                                 sub_start = VME_OFFSET(entry);
9196
9197                                 if (entry->vme_end < dst_end) {
9198                                         sub_end = entry->vme_end;
9199                                 } else {
9200                                         sub_end = dst_end;
9201                                 }
9202                                 sub_end -= entry->vme_start;
9203                                 sub_end += VME_OFFSET(entry);
9204                                 vm_map_unlock(dst_map);
9205
9206                                 kr = vm_map_overwrite_submap_recurse(
9207                                         VME_SUBMAP(entry),
9208                                         sub_start,
9209                                         sub_end - sub_start);
9210                                 if (kr != KERN_SUCCESS) {
9211                                         return kr;
9212                                 }
9213                                 vm_map_lock(dst_map);
9214                         }
9215
9216                         if (dst_end <= entry->vme_end) {
9217                                 goto start_overwrite;
9218                         }
9219                         if (!vm_map_lookup_entry(dst_map, local_end,
9220                             &entry)) {
9221                                 vm_map_unlock(dst_map);
9222                                 return KERN_INVALID_ADDRESS;
9223                         }
9224                         next = entry->vme_next;
9225                 }
9226
9227                 if (!(entry->protection & VM_PROT_WRITE)) {
9228                         vm_map_unlock(dst_map);
9229                         return KERN_PROTECTION_FAILURE;
9230                 }
9231
9232                 /*
9233                  *      If the entry is in transition, we must wait
9234                  *      for it to exit that state.  Anything could happen
9235                  *      when we unlock the map, so start over.
9236                  */
9237                 if (entry->in_transition) {
9238                         /*
9239                          * Say that we are waiting, and wait for entry.
9240                          */
9241                         entry->needs_wakeup = TRUE;
9242                         vm_map_entry_wait(dst_map, THREAD_UNINT);
9243
9244                         goto start_pass_1;
9245                 }
9246
9247 /*
9248  *              our range is contained completely within this map entry
9249  */
9250                 if (dst_end <= entry->vme_end) {
9251                         break;
9252                 }
9253 /*
9254  *              check that range specified is contiguous region
9255  */
9256                 if ((next == vm_map_to_entry(dst_map)) ||
9257                     (next->vme_start != entry->vme_end)) {
9258                         vm_map_unlock(dst_map);
9259                         return KERN_INVALID_ADDRESS;
9260                 }
9261
9262
9263                 /*
9264                  *      Check for permanent objects in the destination.
9265                  */
9266                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9267                     ((!VME_OBJECT(entry)->internal) ||
9268                     (VME_OBJECT(entry)->true_share))) {
9269                         contains_permanent_objects = TRUE;
9270                 }
9271
9272                 entry = next;
9273         }/* for */
9274
9275 start_overwrite:
9276         /*
9277          *      If there are permanent objects in the destination, then
9278          *      the copy cannot be interrupted.
9279          */
9280
9281         if (interruptible && contains_permanent_objects) {
9282                 vm_map_unlock(dst_map);
9283                 return KERN_FAILURE;   /* XXX */
9284         }
9285
9286         /*
9287          *
9288          *      Make a second pass, overwriting the data
9289          *      At the beginning of each loop iteration,
9290          *      the next entry to be overwritten is "tmp_entry"
9291          *      (initially, the value returned from the lookup above),
9292          *      and the starting address expected in that entry
9293          *      is "start".
9294          */
9295
9296         total_size = copy->size;
9297         if (encountered_sub_map) {
9298                 copy_size = 0;
9299                 /* re-calculate tmp_entry since we've had the map */
9300                 /* unlocked */
9301                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9302                         vm_map_unlock(dst_map);
9303                         return KERN_INVALID_ADDRESS;
9304                 }
9305         } else {
9306                 copy_size = copy->size;
9307         }
9308
9309         base_addr = dst_addr;
9310         while (TRUE) {
9311                 /* deconstruct the copy object and do in parts */
9312                 /* only in sub_map, interruptable case */
9313                 vm_map_entry_t  copy_entry;
9314                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
9315                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
9316                 int             nentries;
9317                 int             remaining_entries = 0;
9318                 vm_map_offset_t new_offset = 0;
9319
9320                 for (entry = tmp_entry; copy_size == 0;) {
9321                         vm_map_entry_t  next;
9322
9323                         next = entry->vme_next;
9324
9325                         /* tmp_entry and base address are moved along */
9326                         /* each time we encounter a sub-map.  Otherwise */
9327                         /* entry can outpase tmp_entry, and the copy_size */
9328                         /* may reflect the distance between them */
9329                         /* if the current entry is found to be in transition */
9330                         /* we will start over at the beginning or the last */
9331                         /* encounter of a submap as dictated by base_addr */
9332                         /* we will zero copy_size accordingly. */
9333                         if (entry->in_transition) {
9334                                 /*
9335                                  * Say that we are waiting, and wait for entry.
9336                                  */
9337                                 entry->needs_wakeup = TRUE;
9338                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9339
9340                                 if (!vm_map_lookup_entry(dst_map, base_addr,
9341                                     &tmp_entry)) {
9342                                         vm_map_unlock(dst_map);
9343                                         return KERN_INVALID_ADDRESS;
9344                                 }
9345                                 copy_size = 0;
9346                                 entry = tmp_entry;
9347                                 continue;
9348                         }
9349                         if (entry->is_sub_map) {
9350                                 vm_map_offset_t sub_start;
9351                                 vm_map_offset_t sub_end;
9352                                 vm_map_offset_t local_end;
9353
9354                                 if (entry->needs_copy) {
9355                                         /* if this is a COW submap */
9356                                         /* just back the range with a */
9357                                         /* anonymous entry */
9358                                         if (entry->vme_end < dst_end) {
9359                                                 sub_end = entry->vme_end;
9360                                         } else {
9361                                                 sub_end = dst_end;
9362                                         }
9363                                         if (entry->vme_start < base_addr) {
9364                                                 sub_start = base_addr;
9365                                         } else {
9366                                                 sub_start = entry->vme_start;
9367                                         }
9368                                         vm_map_clip_end(
9369                                                 dst_map, entry, sub_end);
9370                                         vm_map_clip_start(
9371                                                 dst_map, entry, sub_start);
9372                                         assert(!entry->use_pmap);
9373                                         assert(!entry->iokit_acct);
9374                                         entry->use_pmap = TRUE;
9375                                         entry->is_sub_map = FALSE;
9376                                         vm_map_deallocate(
9377                                                 VME_SUBMAP(entry));
9378                                         VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9379                                         VME_OFFSET_SET(entry, 0);
9380                                         entry->is_shared = FALSE;
9381                                         entry->needs_copy = FALSE;
9382                                         entry->protection = VM_PROT_DEFAULT;
9383                                         entry->max_protection = VM_PROT_ALL;
9384                                         entry->wired_count = 0;
9385                                         entry->user_wired_count = 0;
9386                                         if (entry->inheritance
9387                                             == VM_INHERIT_SHARE) {
9388                                                 entry->inheritance = VM_INHERIT_COPY;
9389                                         }
9390                                         continue;
9391                                 }
9392                                 /* first take care of any non-sub_map */
9393                                 /* entries to send */
9394                                 if (base_addr < entry->vme_start) {
9395                                         /* stuff to send */
9396                                         copy_size =
9397                                             entry->vme_start - base_addr;
9398                                         break;
9399                                 }
9400                                 sub_start = VME_OFFSET(entry);
9401
9402                                 if (entry->vme_end < dst_end) {
9403                                         sub_end = entry->vme_end;
9404                                 } else {
9405                                         sub_end = dst_end;
9406                                 }
9407                                 sub_end -= entry->vme_start;
9408                                 sub_end += VME_OFFSET(entry);
9409                                 local_end = entry->vme_end;
9410                                 vm_map_unlock(dst_map);
9411                                 copy_size = sub_end - sub_start;
9412
9413                                 /* adjust the copy object */
9414                                 if (total_size > copy_size) {
9415                                         vm_map_size_t   local_size = 0;
9416                                         vm_map_size_t   entry_size;
9417
9418                                         nentries = 1;
9419                                         new_offset = copy->offset;
9420                                         copy_entry = vm_map_copy_first_entry(copy);
9421                                         while (copy_entry !=
9422                                             vm_map_copy_to_entry(copy)) {
9423                                                 entry_size = copy_entry->vme_end -
9424                                                     copy_entry->vme_start;
9425                                                 if ((local_size < copy_size) &&
9426                                                     ((local_size + entry_size)
9427                                                     >= copy_size)) {
9428                                                         vm_map_copy_clip_end(copy,
9429                                                             copy_entry,
9430                                                             copy_entry->vme_start +
9431                                                             (copy_size - local_size));
9432                                                         entry_size = copy_entry->vme_end -
9433                                                             copy_entry->vme_start;
9434                                                         local_size += entry_size;
9435                                                         new_offset += entry_size;
9436                                                 }
9437                                                 if (local_size >= copy_size) {
9438                                                         next_copy = copy_entry->vme_next;
9439                                                         copy_entry->vme_next =
9440                                                             vm_map_copy_to_entry(copy);
9441                                                         previous_prev =
9442                                                             copy->cpy_hdr.links.prev;
9443                                                         copy->cpy_hdr.links.prev = copy_entry;
9444                                                         copy->size = copy_size;
9445                                                         remaining_entries =
9446                                                             copy->cpy_hdr.nentries;
9447                                                         remaining_entries -= nentries;
9448                                                         copy->cpy_hdr.nentries = nentries;
9449                                                         break;
9450                                                 } else {
9451                                                         local_size += entry_size;
9452                                                         new_offset += entry_size;
9453                                                         nentries++;
9454                                                 }
9455                                                 copy_entry = copy_entry->vme_next;
9456                                         }
9457                                 }
9458
9459                                 if ((entry->use_pmap) && (pmap == NULL)) {
9460                                         kr = vm_map_copy_overwrite_nested(
9461                                                 VME_SUBMAP(entry),
9462                                                 sub_start,
9463                                                 copy,
9464                                                 interruptible,
9465                                                 VME_SUBMAP(entry)->pmap,
9466                                                 TRUE);
9467                                 } else if (pmap != NULL) {
9468                                         kr = vm_map_copy_overwrite_nested(
9469                                                 VME_SUBMAP(entry),
9470                                                 sub_start,
9471                                                 copy,
9472                                                 interruptible, pmap,
9473                                                 TRUE);
9474                                 } else {
9475                                         kr = vm_map_copy_overwrite_nested(
9476                                                 VME_SUBMAP(entry),
9477                                                 sub_start,
9478                                                 copy,
9479                                                 interruptible,
9480                                                 dst_map->pmap,
9481                                                 TRUE);
9482                                 }
9483                                 if (kr != KERN_SUCCESS) {
9484                                         if (next_copy != NULL) {
9485                                                 copy->cpy_hdr.nentries +=
9486                                                     remaining_entries;
9487                                                 copy->cpy_hdr.links.prev->vme_next =
9488                                                     next_copy;
9489                                                 copy->cpy_hdr.links.prev
9490                                                         = previous_prev;
9491                                                 copy->size = total_size;
9492                                         }
9493                                         return kr;
9494                                 }
9495                                 if (dst_end <= local_end) {
9496                                         return KERN_SUCCESS;
9497                                 }
9498                                 /* otherwise copy no longer exists, it was */
9499                                 /* destroyed after successful copy_overwrite */
9500                                 copy = vm_map_copy_allocate();
9501                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
9502                                 copy->offset = new_offset;
9503                                 copy->cpy_hdr.page_shift = copy_page_shift;
9504
9505                                 /*
9506                                  * XXX FBDP
9507                                  * this does not seem to deal with
9508                                  * the VM map store (R&B tree)
9509                                  */
9510
9511                                 total_size -= copy_size;
9512                                 copy_size = 0;
9513                                 /* put back remainder of copy in container */
9514                                 if (next_copy != NULL) {
9515                                         copy->cpy_hdr.nentries = remaining_entries;
9516                                         copy->cpy_hdr.links.next = next_copy;
9517                                         copy->cpy_hdr.links.prev = previous_prev;
9518                                         copy->size = total_size;
9519                                         next_copy->vme_prev =
9520                                             vm_map_copy_to_entry(copy);
9521                                         next_copy = NULL;
9522                                 }
9523                                 base_addr = local_end;
9524                                 vm_map_lock(dst_map);
9525                                 if (!vm_map_lookup_entry(dst_map,
9526                                     local_end, &tmp_entry)) {
9527                                         vm_map_unlock(dst_map);
9528                                         return KERN_INVALID_ADDRESS;
9529                                 }
9530                                 entry = tmp_entry;
9531                                 continue;
9532                         }
9533                         if (dst_end <= entry->vme_end) {
9534                                 copy_size = dst_end - base_addr;
9535                                 break;
9536                         }
9537
9538                         if ((next == vm_map_to_entry(dst_map)) ||
9539                             (next->vme_start != entry->vme_end)) {
9540                                 vm_map_unlock(dst_map);
9541                                 return KERN_INVALID_ADDRESS;
9542                         }
9543
9544                         entry = next;
9545                 }/* for */
9546
9547                 next_copy = NULL;
9548                 nentries = 1;
9549
9550                 /* adjust the copy object */
9551                 if (total_size > copy_size) {
9552                         vm_map_size_t   local_size = 0;
9553                         vm_map_size_t   entry_size;
9554
9555                         new_offset = copy->offset;
9556                         copy_entry = vm_map_copy_first_entry(copy);
9557                         while (copy_entry != vm_map_copy_to_entry(copy)) {
9558                                 entry_size = copy_entry->vme_end -
9559                                     copy_entry->vme_start;
9560                                 if ((local_size < copy_size) &&
9561                                     ((local_size + entry_size)
9562                                     >= copy_size)) {
9563                                         vm_map_copy_clip_end(copy, copy_entry,
9564                                             copy_entry->vme_start +
9565                                             (copy_size - local_size));
9566                                         entry_size = copy_entry->vme_end -
9567                                             copy_entry->vme_start;
9568                                         local_size += entry_size;
9569                                         new_offset += entry_size;
9570                                 }
9571                                 if (local_size >= copy_size) {
9572                                         next_copy = copy_entry->vme_next;
9573                                         copy_entry->vme_next =
9574                                             vm_map_copy_to_entry(copy);
9575                                         previous_prev =
9576                                             copy->cpy_hdr.links.prev;
9577                                         copy->cpy_hdr.links.prev = copy_entry;
9578                                         copy->size = copy_size;
9579                                         remaining_entries =
9580                                             copy->cpy_hdr.nentries;
9581                                         remaining_entries -= nentries;
9582                                         copy->cpy_hdr.nentries = nentries;
9583                                         break;
9584                                 } else {
9585                                         local_size += entry_size;
9586                                         new_offset += entry_size;
9587                                         nentries++;
9588                                 }
9589                                 copy_entry = copy_entry->vme_next;
9590                         }
9591                 }
9592
9593                 if (aligned) {
9594                         pmap_t  local_pmap;
9595
9596                         if (pmap) {
9597                                 local_pmap = pmap;
9598                         } else {
9599                                 local_pmap = dst_map->pmap;
9600                         }
9601
9602                         if ((kr =  vm_map_copy_overwrite_aligned(
9603                                     dst_map, tmp_entry, copy,
9604                                     base_addr, local_pmap)) != KERN_SUCCESS) {
9605                                 if (next_copy != NULL) {
9606                                         copy->cpy_hdr.nentries +=
9607                                             remaining_entries;
9608                                         copy->cpy_hdr.links.prev->vme_next =
9609                                             next_copy;
9610                                         copy->cpy_hdr.links.prev =
9611                                             previous_prev;
9612                                         copy->size += copy_size;
9613                                 }
9614                                 return kr;
9615                         }
9616                         vm_map_unlock(dst_map);
9617                 } else {
9618                         /*
9619                          * Performance gain:
9620                          *
9621                          * if the copy and dst address are misaligned but the same
9622                          * offset within the page we can copy_not_aligned the
9623                          * misaligned parts and copy aligned the rest.  If they are
9624                          * aligned but len is unaligned we simply need to copy
9625                          * the end bit unaligned.  We'll need to split the misaligned
9626                          * bits of the region in this case !
9627                          */
9628                         /* ALWAYS UNLOCKS THE dst_map MAP */
9629                         kr = vm_map_copy_overwrite_unaligned(
9630                                 dst_map,
9631                                 tmp_entry,
9632                                 copy,
9633                                 base_addr,
9634                                 discard_on_success);
9635                         if (kr != KERN_SUCCESS) {
9636                                 if (next_copy != NULL) {
9637                                         copy->cpy_hdr.nentries +=
9638                                             remaining_entries;
9639                                         copy->cpy_hdr.links.prev->vme_next =
9640                                             next_copy;
9641                                         copy->cpy_hdr.links.prev =
9642                                             previous_prev;
9643                                         copy->size += copy_size;
9644                                 }
9645                                 return kr;
9646                         }
9647                 }
9648                 total_size -= copy_size;
9649                 if (total_size == 0) {
9650                         break;
9651                 }
9652                 base_addr += copy_size;
9653                 copy_size = 0;
9654                 copy->offset = new_offset;
9655                 if (next_copy != NULL) {
9656                         copy->cpy_hdr.nentries = remaining_entries;
9657                         copy->cpy_hdr.links.next = next_copy;
9658                         copy->cpy_hdr.links.prev = previous_prev;
9659                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
9660                         copy->size = total_size;
9661                 }
9662                 vm_map_lock(dst_map);
9663                 while (TRUE) {
9664                         if (!vm_map_lookup_entry(dst_map,
9665                             base_addr, &tmp_entry)) {
9666                                 vm_map_unlock(dst_map);
9667                                 return KERN_INVALID_ADDRESS;
9668                         }
9669                         if (tmp_entry->in_transition) {
9670                                 entry->needs_wakeup = TRUE;
9671                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9672                         } else {
9673                                 break;
9674                         }
9675                 }
9676                 vm_map_clip_start(dst_map,
9677                     tmp_entry,
9678                     vm_map_trunc_page(base_addr,
9679                     VM_MAP_PAGE_MASK(dst_map)));
9680
9681                 entry = tmp_entry;
9682         } /* while */
9683
9684         /*
9685          *      Throw away the vm_map_copy object
9686          */
9687         if (discard_on_success) {
9688                 vm_map_copy_discard(copy);
9689         }
9690
9691         return KERN_SUCCESS;
9692 }/* vm_map_copy_overwrite */
9693
9694 kern_return_t
9695 vm_map_copy_overwrite(
9696         vm_map_t        dst_map,
9697         vm_map_offset_t dst_addr,
9698         vm_map_copy_t   copy,
9699         vm_map_size_t   copy_size,
9700         boolean_t       interruptible)
9701 {
9702         vm_map_size_t   head_size, tail_size;
9703         vm_map_copy_t   head_copy, tail_copy;
9704         vm_map_offset_t head_addr, tail_addr;
9705         vm_map_entry_t  entry;
9706         kern_return_t   kr;
9707         vm_map_offset_t effective_page_mask, effective_page_size;
9708         int             copy_page_shift;
9709
9710         head_size = 0;
9711         tail_size = 0;
9712         head_copy = NULL;
9713         tail_copy = NULL;
9714         head_addr = 0;
9715         tail_addr = 0;
9716
9717         if (interruptible ||
9718             copy == VM_MAP_COPY_NULL ||
9719             copy->type != VM_MAP_COPY_ENTRY_LIST) {
9720                 /*
9721                  * We can't split the "copy" map if we're interruptible
9722                  * or if we don't have a "copy" map...
9723                  */
9724 blunt_copy:
9725                 return vm_map_copy_overwrite_nested(dst_map,
9726                            dst_addr,
9727                            copy,
9728                            interruptible,
9729                            (pmap_t) NULL,
9730                            TRUE);
9731         }
9732
9733         copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9734         if (copy_page_shift < PAGE_SHIFT ||
9735             VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9736                 goto blunt_copy;
9737         }
9738
9739         if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9740                 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9741         } else {
9742                 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9743                 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9744                     effective_page_mask);
9745         }
9746         effective_page_size = effective_page_mask + 1;
9747
9748         if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9749                 /*
9750                  * Too small to bother with optimizing...
9751                  */
9752                 goto blunt_copy;
9753         }
9754
9755         if ((dst_addr & effective_page_mask) !=
9756             (copy->offset & effective_page_mask)) {
9757                 /*
9758                  * Incompatible mis-alignment of source and destination...
9759                  */
9760                 goto blunt_copy;
9761         }
9762
9763         /*
9764          * Proper alignment or identical mis-alignment at the beginning.
9765          * Let's try and do a small unaligned copy first (if needed)
9766          * and then an aligned copy for the rest.
9767          */
9768         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9769                 head_addr = dst_addr;
9770                 head_size = (effective_page_size -
9771                     (copy->offset & effective_page_mask));
9772                 head_size = MIN(head_size, copy_size);
9773         }
9774         if (!vm_map_page_aligned(copy->offset + copy_size,
9775             effective_page_mask)) {
9776                 /*
9777                  * Mis-alignment at the end.
9778                  * Do an aligned copy up to the last page and
9779                  * then an unaligned copy for the remaining bytes.
9780                  */
9781                 tail_size = ((copy->offset + copy_size) &
9782                     effective_page_mask);
9783                 tail_size = MIN(tail_size, copy_size);
9784                 tail_addr = dst_addr + copy_size - tail_size;
9785                 assert(tail_addr >= head_addr + head_size);
9786         }
9787         assert(head_size + tail_size <= copy_size);
9788
9789         if (head_size + tail_size == copy_size) {
9790                 /*
9791                  * It's all unaligned, no optimization possible...
9792                  */
9793                 goto blunt_copy;
9794         }
9795
9796         /*
9797          * Can't optimize if there are any submaps in the
9798          * destination due to the way we free the "copy" map
9799          * progressively in vm_map_copy_overwrite_nested()
9800          * in that case.
9801          */
9802         vm_map_lock_read(dst_map);
9803         if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9804                 vm_map_unlock_read(dst_map);
9805                 goto blunt_copy;
9806         }
9807         for (;
9808             (entry != vm_map_copy_to_entry(copy) &&
9809             entry->vme_start < dst_addr + copy_size);
9810             entry = entry->vme_next) {
9811                 if (entry->is_sub_map) {
9812                         vm_map_unlock_read(dst_map);
9813                         goto blunt_copy;
9814                 }
9815         }
9816         vm_map_unlock_read(dst_map);
9817
9818         if (head_size) {
9819                 /*
9820                  * Unaligned copy of the first "head_size" bytes, to reach
9821                  * a page boundary.
9822                  */
9823
9824                 /*
9825                  * Extract "head_copy" out of "copy".
9826                  */
9827                 head_copy = vm_map_copy_allocate();
9828                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9829                 head_copy->cpy_hdr.entries_pageable =
9830                     copy->cpy_hdr.entries_pageable;
9831                 vm_map_store_init(&head_copy->cpy_hdr);
9832                 head_copy->cpy_hdr.page_shift = copy_page_shift;
9833
9834                 entry = vm_map_copy_first_entry(copy);
9835                 if (entry->vme_end < copy->offset + head_size) {
9836                         head_size = entry->vme_end - copy->offset;
9837                 }
9838
9839                 head_copy->offset = copy->offset;
9840                 head_copy->size = head_size;
9841                 copy->offset += head_size;
9842                 copy->size -= head_size;
9843                 copy_size -= head_size;
9844                 assert(copy_size > 0);
9845
9846                 vm_map_copy_clip_end(copy, entry, copy->offset);
9847                 vm_map_copy_entry_unlink(copy, entry);
9848                 vm_map_copy_entry_link(head_copy,
9849                     vm_map_copy_to_entry(head_copy),
9850                     entry);
9851
9852                 /*
9853                  * Do the unaligned copy.
9854                  */
9855                 kr = vm_map_copy_overwrite_nested(dst_map,
9856                     head_addr,
9857                     head_copy,
9858                     interruptible,
9859                     (pmap_t) NULL,
9860                     FALSE);
9861                 if (kr != KERN_SUCCESS) {
9862                         goto done;
9863                 }
9864         }
9865
9866         if (tail_size) {
9867                 /*
9868                  * Extract "tail_copy" out of "copy".
9869                  */
9870                 tail_copy = vm_map_copy_allocate();
9871                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9872                 tail_copy->cpy_hdr.entries_pageable =
9873                     copy->cpy_hdr.entries_pageable;
9874                 vm_map_store_init(&tail_copy->cpy_hdr);
9875                 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9876
9877                 tail_copy->offset = copy->offset + copy_size - tail_size;
9878                 tail_copy->size = tail_size;
9879
9880                 copy->size -= tail_size;
9881                 copy_size -= tail_size;
9882                 assert(copy_size > 0);
9883
9884                 entry = vm_map_copy_last_entry(copy);
9885                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9886                 entry = vm_map_copy_last_entry(copy);
9887                 vm_map_copy_entry_unlink(copy, entry);
9888                 vm_map_copy_entry_link(tail_copy,
9889                     vm_map_copy_last_entry(tail_copy),
9890                     entry);
9891         }
9892
9893         /*
9894          * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9895          * we want to avoid TOCTOU issues w.r.t copy->size but
9896          * we don't need to change vm_map_copy_overwrite_nested()
9897          * and all other vm_map_copy_overwrite variants.
9898          *
9899          * So we assign the original copy_size that was passed into
9900          * this routine back to copy.
9901          *
9902          * This use of local 'copy_size' passed into this routine is
9903          * to try and protect against TOCTOU attacks where the kernel
9904          * has been exploited. We don't expect this to be an issue
9905          * during normal system operation.
9906          */
9907         assertf(copy->size == copy_size,
9908             "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9909         copy->size = copy_size;
9910
9911         /*
9912          * Copy most (or possibly all) of the data.
9913          */
9914         kr = vm_map_copy_overwrite_nested(dst_map,
9915             dst_addr + head_size,
9916             copy,
9917             interruptible,
9918             (pmap_t) NULL,
9919             FALSE);
9920         if (kr != KERN_SUCCESS) {
9921                 goto done;
9922         }
9923
9924         if (tail_size) {
9925                 kr = vm_map_copy_overwrite_nested(dst_map,
9926                     tail_addr,
9927                     tail_copy,
9928                     interruptible,
9929                     (pmap_t) NULL,
9930                     FALSE);
9931         }
9932
9933 done:
9934         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9935         if (kr == KERN_SUCCESS) {
9936                 /*
9937                  * Discard all the copy maps.
9938                  */
9939                 if (head_copy) {
9940                         vm_map_copy_discard(head_copy);
9941                         head_copy = NULL;
9942                 }
9943                 vm_map_copy_discard(copy);
9944                 if (tail_copy) {
9945                         vm_map_copy_discard(tail_copy);
9946                         tail_copy = NULL;
9947                 }
9948         } else {
9949                 /*
9950                  * Re-assemble the original copy map.
9951                  */
9952                 if (head_copy) {
9953                         entry = vm_map_copy_first_entry(head_copy);
9954                         vm_map_copy_entry_unlink(head_copy, entry);
9955                         vm_map_copy_entry_link(copy,
9956                             vm_map_copy_to_entry(copy),
9957                             entry);
9958                         copy->offset -= head_size;
9959                         copy->size += head_size;
9960                         vm_map_copy_discard(head_copy);
9961                         head_copy = NULL;
9962                 }
9963                 if (tail_copy) {
9964                         entry = vm_map_copy_last_entry(tail_copy);
9965                         vm_map_copy_entry_unlink(tail_copy, entry);
9966                         vm_map_copy_entry_link(copy,
9967                             vm_map_copy_last_entry(copy),
9968                             entry);
9969                         copy->size += tail_size;
9970                         vm_map_copy_discard(tail_copy);
9971                         tail_copy = NULL;
9972                 }
9973         }
9974         return kr;
9975 }
9976
9977
9978 /*
9979  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
9980  *
9981  *      Decription:
9982  *      Physically copy unaligned data
9983  *
9984  *      Implementation:
9985  *      Unaligned parts of pages have to be physically copied.  We use
9986  *      a modified form of vm_fault_copy (which understands none-aligned
9987  *      page offsets and sizes) to do the copy.  We attempt to copy as
9988  *      much memory in one go as possibly, however vm_fault_copy copies
9989  *      within 1 memory object so we have to find the smaller of "amount left"
9990  *      "source object data size" and "target object data size".  With
9991  *      unaligned data we don't need to split regions, therefore the source
9992  *      (copy) object should be one map entry, the target range may be split
9993  *      over multiple map entries however.  In any event we are pessimistic
9994  *      about these assumptions.
9995  *
9996  *      Assumptions:
9997  *      dst_map is locked on entry and is return locked on success,
9998  *      unlocked on error.
9999  */
10000
10001 static kern_return_t
10002 vm_map_copy_overwrite_unaligned(
10003         vm_map_t        dst_map,
10004         vm_map_entry_t  entry,
10005         vm_map_copy_t   copy,
10006         vm_map_offset_t start,
10007         boolean_t       discard_on_success)
10008 {
10009         vm_map_entry_t          copy_entry;
10010         vm_map_entry_t          copy_entry_next;
10011         vm_map_version_t        version;
10012         vm_object_t             dst_object;
10013         vm_object_offset_t      dst_offset;
10014         vm_object_offset_t      src_offset;
10015         vm_object_offset_t      entry_offset;
10016         vm_map_offset_t         entry_end;
10017         vm_map_size_t           src_size,
10018             dst_size,
10019             copy_size,
10020             amount_left;
10021         kern_return_t           kr = KERN_SUCCESS;
10022
10023
10024         copy_entry = vm_map_copy_first_entry(copy);
10025
10026         vm_map_lock_write_to_read(dst_map);
10027
10028         src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10029         amount_left = copy->size;
10030 /*
10031  *      unaligned so we never clipped this entry, we need the offset into
10032  *      the vm_object not just the data.
10033  */
10034         while (amount_left > 0) {
10035                 if (entry == vm_map_to_entry(dst_map)) {
10036                         vm_map_unlock_read(dst_map);
10037                         return KERN_INVALID_ADDRESS;
10038                 }
10039
10040                 /* "start" must be within the current map entry */
10041                 assert((start >= entry->vme_start) && (start < entry->vme_end));
10042
10043                 dst_offset = start - entry->vme_start;
10044
10045                 dst_size = entry->vme_end - start;
10046
10047                 src_size = copy_entry->vme_end -
10048                     (copy_entry->vme_start + src_offset);
10049
10050                 if (dst_size < src_size) {
10051 /*
10052  *                      we can only copy dst_size bytes before
10053  *                      we have to get the next destination entry
10054  */
10055                         copy_size = dst_size;
10056                 } else {
10057 /*
10058  *                      we can only copy src_size bytes before
10059  *                      we have to get the next source copy entry
10060  */
10061                         copy_size = src_size;
10062                 }
10063
10064                 if (copy_size > amount_left) {
10065                         copy_size = amount_left;
10066                 }
10067 /*
10068  *              Entry needs copy, create a shadow shadow object for
10069  *              Copy on write region.
10070  */
10071                 if (entry->needs_copy &&
10072                     ((entry->protection & VM_PROT_WRITE) != 0)) {
10073                         if (vm_map_lock_read_to_write(dst_map)) {
10074                                 vm_map_lock_read(dst_map);
10075                                 goto RetryLookup;
10076                         }
10077                         VME_OBJECT_SHADOW(entry,
10078                             (vm_map_size_t)(entry->vme_end
10079                             - entry->vme_start));
10080                         entry->needs_copy = FALSE;
10081                         vm_map_lock_write_to_read(dst_map);
10082                 }
10083                 dst_object = VME_OBJECT(entry);
10084 /*
10085  *              unlike with the virtual (aligned) copy we're going
10086  *              to fault on it therefore we need a target object.
10087  */
10088                 if (dst_object == VM_OBJECT_NULL) {
10089                         if (vm_map_lock_read_to_write(dst_map)) {
10090                                 vm_map_lock_read(dst_map);
10091                                 goto RetryLookup;
10092                         }
10093                         dst_object = vm_object_allocate((vm_map_size_t)
10094                             entry->vme_end - entry->vme_start);
10095                         VME_OBJECT_SET(entry, dst_object);
10096                         VME_OFFSET_SET(entry, 0);
10097                         assert(entry->use_pmap);
10098                         vm_map_lock_write_to_read(dst_map);
10099                 }
10100 /*
10101  *              Take an object reference and unlock map. The "entry" may
10102  *              disappear or change when the map is unlocked.
10103  */
10104                 vm_object_reference(dst_object);
10105                 version.main_timestamp = dst_map->timestamp;
10106                 entry_offset = VME_OFFSET(entry);
10107                 entry_end = entry->vme_end;
10108                 vm_map_unlock_read(dst_map);
10109 /*
10110  *              Copy as much as possible in one pass
10111  */
10112                 kr = vm_fault_copy(
10113                         VME_OBJECT(copy_entry),
10114                         VME_OFFSET(copy_entry) + src_offset,
10115                         &copy_size,
10116                         dst_object,
10117                         entry_offset + dst_offset,
10118                         dst_map,
10119                         &version,
10120                         THREAD_UNINT );
10121
10122                 start += copy_size;
10123                 src_offset += copy_size;
10124                 amount_left -= copy_size;
10125 /*
10126  *              Release the object reference
10127  */
10128                 vm_object_deallocate(dst_object);
10129 /*
10130  *              If a hard error occurred, return it now
10131  */
10132                 if (kr != KERN_SUCCESS) {
10133                         return kr;
10134                 }
10135
10136                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10137                     || amount_left == 0) {
10138 /*
10139  *                      all done with this copy entry, dispose.
10140  */
10141                         copy_entry_next = copy_entry->vme_next;
10142
10143                         if (discard_on_success) {
10144                                 vm_map_copy_entry_unlink(copy, copy_entry);
10145                                 assert(!copy_entry->is_sub_map);
10146                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10147                                 vm_map_copy_entry_dispose(copy, copy_entry);
10148                         }
10149
10150                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10151                             amount_left) {
10152 /*
10153  *                              not finished copying but run out of source
10154  */
10155                                 return KERN_INVALID_ADDRESS;
10156                         }
10157
10158                         copy_entry = copy_entry_next;
10159
10160                         src_offset = 0;
10161                 }
10162
10163                 if (amount_left == 0) {
10164                         return KERN_SUCCESS;
10165                 }
10166
10167                 vm_map_lock_read(dst_map);
10168                 if (version.main_timestamp == dst_map->timestamp) {
10169                         if (start == entry_end) {
10170 /*
10171  *                              destination region is split.  Use the version
10172  *                              information to avoid a lookup in the normal
10173  *                              case.
10174  */
10175                                 entry = entry->vme_next;
10176 /*
10177  *                              should be contiguous. Fail if we encounter
10178  *                              a hole in the destination.
10179  */
10180                                 if (start != entry->vme_start) {
10181                                         vm_map_unlock_read(dst_map);
10182                                         return KERN_INVALID_ADDRESS;
10183                                 }
10184                         }
10185                 } else {
10186 /*
10187  *                      Map version check failed.
10188  *                      we must lookup the entry because somebody
10189  *                      might have changed the map behind our backs.
10190  */
10191 RetryLookup:
10192                         if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10193                                 vm_map_unlock_read(dst_map);
10194                                 return KERN_INVALID_ADDRESS;
10195                         }
10196                 }
10197         }/* while */
10198
10199         return KERN_SUCCESS;
10200 }/* vm_map_copy_overwrite_unaligned */
10201
10202 /*
10203  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
10204  *
10205  *      Description:
10206  *      Does all the vm_trickery possible for whole pages.
10207  *
10208  *      Implementation:
10209  *
10210  *      If there are no permanent objects in the destination,
10211  *      and the source and destination map entry zones match,
10212  *      and the destination map entry is not shared,
10213  *      then the map entries can be deleted and replaced
10214  *      with those from the copy.  The following code is the
10215  *      basic idea of what to do, but there are lots of annoying
10216  *      little details about getting protection and inheritance
10217  *      right.  Should add protection, inheritance, and sharing checks
10218  *      to the above pass and make sure that no wiring is involved.
10219  */
10220
10221 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10222 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10223 int vm_map_copy_overwrite_aligned_src_large = 0;
10224
10225 static kern_return_t
10226 vm_map_copy_overwrite_aligned(
10227         vm_map_t        dst_map,
10228         vm_map_entry_t  tmp_entry,
10229         vm_map_copy_t   copy,
10230         vm_map_offset_t start,
10231         __unused pmap_t pmap)
10232 {
10233         vm_object_t     object;
10234         vm_map_entry_t  copy_entry;
10235         vm_map_size_t   copy_size;
10236         vm_map_size_t   size;
10237         vm_map_entry_t  entry;
10238
10239         while ((copy_entry = vm_map_copy_first_entry(copy))
10240             != vm_map_copy_to_entry(copy)) {
10241                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10242
10243                 entry = tmp_entry;
10244                 if (entry->is_sub_map) {
10245                         /* unnested when clipped earlier */
10246                         assert(!entry->use_pmap);
10247                 }
10248                 if (entry == vm_map_to_entry(dst_map)) {
10249                         vm_map_unlock(dst_map);
10250                         return KERN_INVALID_ADDRESS;
10251                 }
10252                 size = (entry->vme_end - entry->vme_start);
10253                 /*
10254                  *      Make sure that no holes popped up in the
10255                  *      address map, and that the protection is
10256                  *      still valid, in case the map was unlocked
10257                  *      earlier.
10258                  */
10259
10260                 if ((entry->vme_start != start) || ((entry->is_sub_map)
10261                     && !entry->needs_copy)) {
10262                         vm_map_unlock(dst_map);
10263                         return KERN_INVALID_ADDRESS;
10264                 }
10265                 assert(entry != vm_map_to_entry(dst_map));
10266
10267                 /*
10268                  *      Check protection again
10269                  */
10270
10271                 if (!(entry->protection & VM_PROT_WRITE)) {
10272                         vm_map_unlock(dst_map);
10273                         return KERN_PROTECTION_FAILURE;
10274                 }
10275
10276                 /*
10277                  *      Adjust to source size first
10278                  */
10279
10280                 if (copy_size < size) {
10281                         if (entry->map_aligned &&
10282                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10283                             VM_MAP_PAGE_MASK(dst_map))) {
10284                                 /* no longer map-aligned */
10285                                 entry->map_aligned = FALSE;
10286                         }
10287                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10288                         size = copy_size;
10289                 }
10290
10291                 /*
10292                  *      Adjust to destination size
10293                  */
10294
10295                 if (size < copy_size) {
10296                         vm_map_copy_clip_end(copy, copy_entry,
10297                             copy_entry->vme_start + size);
10298                         copy_size = size;
10299                 }
10300
10301                 assert((entry->vme_end - entry->vme_start) == size);
10302                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10303                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10304
10305                 /*
10306                  *      If the destination contains temporary unshared memory,
10307                  *      we can perform the copy by throwing it away and
10308                  *      installing the source data.
10309                  */
10310
10311                 object = VME_OBJECT(entry);
10312                 if ((!entry->is_shared &&
10313                     ((object == VM_OBJECT_NULL) ||
10314                     (object->internal && !object->true_share))) ||
10315                     entry->needs_copy) {
10316                         vm_object_t     old_object = VME_OBJECT(entry);
10317                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
10318                         vm_object_offset_t      offset;
10319
10320                         /*
10321                          * Ensure that the source and destination aren't
10322                          * identical
10323                          */
10324                         if (old_object == VME_OBJECT(copy_entry) &&
10325                             old_offset == VME_OFFSET(copy_entry)) {
10326                                 vm_map_copy_entry_unlink(copy, copy_entry);
10327                                 vm_map_copy_entry_dispose(copy, copy_entry);
10328
10329                                 if (old_object != VM_OBJECT_NULL) {
10330                                         vm_object_deallocate(old_object);
10331                                 }
10332
10333                                 start = tmp_entry->vme_end;
10334                                 tmp_entry = tmp_entry->vme_next;
10335                                 continue;
10336                         }
10337
10338 #if XNU_TARGET_OS_OSX
10339 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10340 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
10341                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10342                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10343                             copy_size <= __TRADEOFF1_COPY_SIZE) {
10344                                 /*
10345                                  * Virtual vs. Physical copy tradeoff #1.
10346                                  *
10347                                  * Copying only a few pages out of a large
10348                                  * object:  do a physical copy instead of
10349                                  * a virtual copy, to avoid possibly keeping
10350                                  * the entire large object alive because of
10351                                  * those few copy-on-write pages.
10352                                  */
10353                                 vm_map_copy_overwrite_aligned_src_large++;
10354                                 goto slow_copy;
10355                         }
10356 #endif /* XNU_TARGET_OS_OSX */
10357
10358                         if ((dst_map->pmap != kernel_pmap) &&
10359                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10360                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10361                                 vm_object_t new_object, new_shadow;
10362
10363                                 /*
10364                                  * We're about to map something over a mapping
10365                                  * established by malloc()...
10366                                  */
10367                                 new_object = VME_OBJECT(copy_entry);
10368                                 if (new_object != VM_OBJECT_NULL) {
10369                                         vm_object_lock_shared(new_object);
10370                                 }
10371                                 while (new_object != VM_OBJECT_NULL &&
10372 #if XNU_TARGET_OS_OSX
10373                                     !new_object->true_share &&
10374                                     new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10375 #endif /* XNU_TARGET_OS_OSX */
10376                                     new_object->internal) {
10377                                         new_shadow = new_object->shadow;
10378                                         if (new_shadow == VM_OBJECT_NULL) {
10379                                                 break;
10380                                         }
10381                                         vm_object_lock_shared(new_shadow);
10382                                         vm_object_unlock(new_object);
10383                                         new_object = new_shadow;
10384                                 }
10385                                 if (new_object != VM_OBJECT_NULL) {
10386                                         if (!new_object->internal) {
10387                                                 /*
10388                                                  * The new mapping is backed
10389                                                  * by an external object.  We
10390                                                  * don't want malloc'ed memory
10391                                                  * to be replaced with such a
10392                                                  * non-anonymous mapping, so
10393                                                  * let's go off the optimized
10394                                                  * path...
10395                                                  */
10396                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
10397                                                 vm_object_unlock(new_object);
10398                                                 goto slow_copy;
10399                                         }
10400 #if XNU_TARGET_OS_OSX
10401                                         if (new_object->true_share ||
10402                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10403                                                 /*
10404                                                  * Same if there's a "true_share"
10405                                                  * object in the shadow chain, or
10406                                                  * an object with a non-default
10407                                                  * (SYMMETRIC) copy strategy.
10408                                                  */
10409                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10410                                                 vm_object_unlock(new_object);
10411                                                 goto slow_copy;
10412                                         }
10413 #endif /* XNU_TARGET_OS_OSX */
10414                                         vm_object_unlock(new_object);
10415                                 }
10416                                 /*
10417                                  * The new mapping is still backed by
10418                                  * anonymous (internal) memory, so it's
10419                                  * OK to substitute it for the original
10420                                  * malloc() mapping.
10421                                  */
10422                         }
10423
10424                         if (old_object != VM_OBJECT_NULL) {
10425                                 if (entry->is_sub_map) {
10426                                         if (entry->use_pmap) {
10427 #ifndef NO_NESTED_PMAP
10428                                                 pmap_unnest(dst_map->pmap,
10429                                                     (addr64_t)entry->vme_start,
10430                                                     entry->vme_end - entry->vme_start);
10431 #endif  /* NO_NESTED_PMAP */
10432                                                 if (dst_map->mapped_in_other_pmaps) {
10433                                                         /* clean up parent */
10434                                                         /* map/maps */
10435                                                         vm_map_submap_pmap_clean(
10436                                                                 dst_map, entry->vme_start,
10437                                                                 entry->vme_end,
10438                                                                 VME_SUBMAP(entry),
10439                                                                 VME_OFFSET(entry));
10440                                                 }
10441                                         } else {
10442                                                 vm_map_submap_pmap_clean(
10443                                                         dst_map, entry->vme_start,
10444                                                         entry->vme_end,
10445                                                         VME_SUBMAP(entry),
10446                                                         VME_OFFSET(entry));
10447                                         }
10448                                         vm_map_deallocate(VME_SUBMAP(entry));
10449                                 } else {
10450                                         if (dst_map->mapped_in_other_pmaps) {
10451                                                 vm_object_pmap_protect_options(
10452                                                         VME_OBJECT(entry),
10453                                                         VME_OFFSET(entry),
10454                                                         entry->vme_end
10455                                                         - entry->vme_start,
10456                                                         PMAP_NULL,
10457                                                         PAGE_SIZE,
10458                                                         entry->vme_start,
10459                                                         VM_PROT_NONE,
10460                                                         PMAP_OPTIONS_REMOVE);
10461                                         } else {
10462                                                 pmap_remove_options(
10463                                                         dst_map->pmap,
10464                                                         (addr64_t)(entry->vme_start),
10465                                                         (addr64_t)(entry->vme_end),
10466                                                         PMAP_OPTIONS_REMOVE);
10467                                         }
10468                                         vm_object_deallocate(old_object);
10469                                 }
10470                         }
10471
10472                         if (entry->iokit_acct) {
10473                                 /* keep using iokit accounting */
10474                                 entry->use_pmap = FALSE;
10475                         } else {
10476                                 /* use pmap accounting */
10477                                 entry->use_pmap = TRUE;
10478                         }
10479                         entry->is_sub_map = FALSE;
10480                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10481                         object = VME_OBJECT(entry);
10482                         entry->needs_copy = copy_entry->needs_copy;
10483                         entry->wired_count = 0;
10484                         entry->user_wired_count = 0;
10485                         offset = VME_OFFSET(copy_entry);
10486                         VME_OFFSET_SET(entry, offset);
10487
10488                         vm_map_copy_entry_unlink(copy, copy_entry);
10489                         vm_map_copy_entry_dispose(copy, copy_entry);
10490
10491                         /*
10492                          * we could try to push pages into the pmap at this point, BUT
10493                          * this optimization only saved on average 2 us per page if ALL
10494                          * the pages in the source were currently mapped
10495                          * and ALL the pages in the dest were touched, if there were fewer
10496                          * than 2/3 of the pages touched, this optimization actually cost more cycles
10497                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10498                          */
10499
10500                         /*
10501                          *      Set up for the next iteration.  The map
10502                          *      has not been unlocked, so the next
10503                          *      address should be at the end of this
10504                          *      entry, and the next map entry should be
10505                          *      the one following it.
10506                          */
10507
10508                         start = tmp_entry->vme_end;
10509                         tmp_entry = tmp_entry->vme_next;
10510                 } else {
10511                         vm_map_version_t        version;
10512                         vm_object_t             dst_object;
10513                         vm_object_offset_t      dst_offset;
10514                         kern_return_t           r;
10515
10516 slow_copy:
10517                         if (entry->needs_copy) {
10518                                 VME_OBJECT_SHADOW(entry,
10519                                     (entry->vme_end -
10520                                     entry->vme_start));
10521                                 entry->needs_copy = FALSE;
10522                         }
10523
10524                         dst_object = VME_OBJECT(entry);
10525                         dst_offset = VME_OFFSET(entry);
10526
10527                         /*
10528                          *      Take an object reference, and record
10529                          *      the map version information so that the
10530                          *      map can be safely unlocked.
10531                          */
10532
10533                         if (dst_object == VM_OBJECT_NULL) {
10534                                 /*
10535                                  * We would usually have just taken the
10536                                  * optimized path above if the destination
10537                                  * object has not been allocated yet.  But we
10538                                  * now disable that optimization if the copy
10539                                  * entry's object is not backed by anonymous
10540                                  * memory to avoid replacing malloc'ed
10541                                  * (i.e. re-usable) anonymous memory with a
10542                                  * not-so-anonymous mapping.
10543                                  * So we have to handle this case here and
10544                                  * allocate a new VM object for this map entry.
10545                                  */
10546                                 dst_object = vm_object_allocate(
10547                                         entry->vme_end - entry->vme_start);
10548                                 dst_offset = 0;
10549                                 VME_OBJECT_SET(entry, dst_object);
10550                                 VME_OFFSET_SET(entry, dst_offset);
10551                                 assert(entry->use_pmap);
10552                         }
10553
10554                         vm_object_reference(dst_object);
10555
10556                         /* account for unlock bumping up timestamp */
10557                         version.main_timestamp = dst_map->timestamp + 1;
10558
10559                         vm_map_unlock(dst_map);
10560
10561                         /*
10562                          *      Copy as much as possible in one pass
10563                          */
10564
10565                         copy_size = size;
10566                         r = vm_fault_copy(
10567                                 VME_OBJECT(copy_entry),
10568                                 VME_OFFSET(copy_entry),
10569                                 &copy_size,
10570                                 dst_object,
10571                                 dst_offset,
10572                                 dst_map,
10573                                 &version,
10574                                 THREAD_UNINT );
10575
10576                         /*
10577                          *      Release the object reference
10578                          */
10579
10580                         vm_object_deallocate(dst_object);
10581
10582                         /*
10583                          *      If a hard error occurred, return it now
10584                          */
10585
10586                         if (r != KERN_SUCCESS) {
10587                                 return r;
10588                         }
10589
10590                         if (copy_size != 0) {
10591                                 /*
10592                                  *      Dispose of the copied region
10593                                  */
10594
10595                                 vm_map_copy_clip_end(copy, copy_entry,
10596                                     copy_entry->vme_start + copy_size);
10597                                 vm_map_copy_entry_unlink(copy, copy_entry);
10598                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10599                                 vm_map_copy_entry_dispose(copy, copy_entry);
10600                         }
10601
10602                         /*
10603                          *      Pick up in the destination map where we left off.
10604                          *
10605                          *      Use the version information to avoid a lookup
10606                          *      in the normal case.
10607                          */
10608
10609                         start += copy_size;
10610                         vm_map_lock(dst_map);
10611                         if (version.main_timestamp == dst_map->timestamp &&
10612                             copy_size != 0) {
10613                                 /* We can safely use saved tmp_entry value */
10614
10615                                 if (tmp_entry->map_aligned &&
10616                                     !VM_MAP_PAGE_ALIGNED(
10617                                             start,
10618                                             VM_MAP_PAGE_MASK(dst_map))) {
10619                                         /* no longer map-aligned */
10620                                         tmp_entry->map_aligned = FALSE;
10621                                 }
10622                                 vm_map_clip_end(dst_map, tmp_entry, start);
10623                                 tmp_entry = tmp_entry->vme_next;
10624                         } else {
10625                                 /* Must do lookup of tmp_entry */
10626
10627                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10628                                         vm_map_unlock(dst_map);
10629                                         return KERN_INVALID_ADDRESS;
10630                                 }
10631                                 if (tmp_entry->map_aligned &&
10632                                     !VM_MAP_PAGE_ALIGNED(
10633                                             start,
10634                                             VM_MAP_PAGE_MASK(dst_map))) {
10635                                         /* no longer map-aligned */
10636                                         tmp_entry->map_aligned = FALSE;
10637                                 }
10638                                 vm_map_clip_start(dst_map, tmp_entry, start);
10639                         }
10640                 }
10641         }/* while */
10642
10643         return KERN_SUCCESS;
10644 }/* vm_map_copy_overwrite_aligned */
10645
10646 /*
10647  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
10648  *
10649  *      Description:
10650  *              Copy in data to a kernel buffer from space in the
10651  *              source map. The original space may be optionally
10652  *              deallocated.
10653  *
10654  *              If successful, returns a new copy object.
10655  */
10656 static kern_return_t
10657 vm_map_copyin_kernel_buffer(
10658         vm_map_t        src_map,
10659         vm_map_offset_t src_addr,
10660         vm_map_size_t   len,
10661         boolean_t       src_destroy,
10662         vm_map_copy_t   *copy_result)
10663 {
10664         kern_return_t kr;
10665         vm_map_copy_t copy;
10666
10667         if (len > msg_ool_size_small) {
10668                 return KERN_INVALID_ARGUMENT;
10669         }
10670
10671         copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
10672         if (copy == VM_MAP_COPY_NULL) {
10673                 return KERN_RESOURCE_SHORTAGE;
10674         }
10675         copy->cpy_kdata = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
10676         if (copy->cpy_kdata == NULL) {
10677                 zfree(vm_map_copy_zone, copy);
10678                 return KERN_RESOURCE_SHORTAGE;
10679         }
10680
10681         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10682         copy->size = len;
10683         copy->offset = 0;
10684
10685         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10686         if (kr != KERN_SUCCESS) {
10687                 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, len);
10688                 zfree(vm_map_copy_zone, copy);
10689                 return kr;
10690         }
10691         if (src_destroy) {
10692                 (void) vm_map_remove(
10693                         src_map,
10694                         vm_map_trunc_page(src_addr,
10695                         VM_MAP_PAGE_MASK(src_map)),
10696                         vm_map_round_page(src_addr + len,
10697                         VM_MAP_PAGE_MASK(src_map)),
10698                         (VM_MAP_REMOVE_INTERRUPTIBLE |
10699                         VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10700                         ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10701         }
10702         *copy_result = copy;
10703         return KERN_SUCCESS;
10704 }
10705
10706 /*
10707  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
10708  *
10709  *      Description:
10710  *              Copy out data from a kernel buffer into space in the
10711  *              destination map. The space may be otpionally dynamically
10712  *              allocated.
10713  *
10714  *              If successful, consumes the copy object.
10715  *              Otherwise, the caller is responsible for it.
10716  */
10717 static int vm_map_copyout_kernel_buffer_failures = 0;
10718 static kern_return_t
10719 vm_map_copyout_kernel_buffer(
10720         vm_map_t                map,
10721         vm_map_address_t        *addr,  /* IN/OUT */
10722         vm_map_copy_t           copy,
10723         vm_map_size_t           copy_size,
10724         boolean_t               overwrite,
10725         boolean_t               consume_on_success)
10726 {
10727         kern_return_t kr = KERN_SUCCESS;
10728         thread_t thread = current_thread();
10729
10730         assert(copy->size == copy_size);
10731
10732         /*
10733          * check for corrupted vm_map_copy structure
10734          */
10735         if (copy_size > msg_ool_size_small || copy->offset) {
10736                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10737                     (long long)copy->size, (long long)copy->offset);
10738         }
10739
10740         if (!overwrite) {
10741                 /*
10742                  * Allocate space in the target map for the data
10743                  */
10744                 *addr = 0;
10745                 kr = vm_map_enter(map,
10746                     addr,
10747                     vm_map_round_page(copy_size,
10748                     VM_MAP_PAGE_MASK(map)),
10749                     (vm_map_offset_t) 0,
10750                     VM_FLAGS_ANYWHERE,
10751                     VM_MAP_KERNEL_FLAGS_NONE,
10752                     VM_KERN_MEMORY_NONE,
10753                     VM_OBJECT_NULL,
10754                     (vm_object_offset_t) 0,
10755                     FALSE,
10756                     VM_PROT_DEFAULT,
10757                     VM_PROT_ALL,
10758                     VM_INHERIT_DEFAULT);
10759                 if (kr != KERN_SUCCESS) {
10760                         return kr;
10761                 }
10762 #if KASAN
10763                 if (map->pmap == kernel_pmap) {
10764                         kasan_notify_address(*addr, copy->size);
10765                 }
10766 #endif
10767         }
10768
10769         /*
10770          * Copyout the data from the kernel buffer to the target map.
10771          */
10772         if (thread->map == map) {
10773                 /*
10774                  * If the target map is the current map, just do
10775                  * the copy.
10776                  */
10777                 assert((vm_size_t)copy_size == copy_size);
10778                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10779                         kr = KERN_INVALID_ADDRESS;
10780                 }
10781         } else {
10782                 vm_map_t oldmap;
10783
10784                 /*
10785                  * If the target map is another map, assume the
10786                  * target's address space identity for the duration
10787                  * of the copy.
10788                  */
10789                 vm_map_reference(map);
10790                 oldmap = vm_map_switch(map);
10791
10792                 assert((vm_size_t)copy_size == copy_size);
10793                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10794                         vm_map_copyout_kernel_buffer_failures++;
10795                         kr = KERN_INVALID_ADDRESS;
10796                 }
10797
10798                 (void) vm_map_switch(oldmap);
10799                 vm_map_deallocate(map);
10800         }
10801
10802         if (kr != KERN_SUCCESS) {
10803                 /* the copy failed, clean up */
10804                 if (!overwrite) {
10805                         /*
10806                          * Deallocate the space we allocated in the target map.
10807                          */
10808                         (void) vm_map_remove(
10809                                 map,
10810                                 vm_map_trunc_page(*addr,
10811                                 VM_MAP_PAGE_MASK(map)),
10812                                 vm_map_round_page((*addr +
10813                                 vm_map_round_page(copy_size,
10814                                 VM_MAP_PAGE_MASK(map))),
10815                                 VM_MAP_PAGE_MASK(map)),
10816                                 VM_MAP_REMOVE_NO_FLAGS);
10817                         *addr = 0;
10818                 }
10819         } else {
10820                 /* copy was successful, dicard the copy structure */
10821                 if (consume_on_success) {
10822                         kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy_size);
10823                         zfree(vm_map_copy_zone, copy);
10824                 }
10825         }
10826
10827         return kr;
10828 }
10829
10830 /*
10831  *      Routine:        vm_map_copy_insert      [internal use only]
10832  *
10833  *      Description:
10834  *              Link a copy chain ("copy") into a map at the
10835  *              specified location (after "where").
10836  *      Side effects:
10837  *              The copy chain is destroyed.
10838  */
10839 static void
10840 vm_map_copy_insert(
10841         vm_map_t        map,
10842         vm_map_entry_t  after_where,
10843         vm_map_copy_t   copy)
10844 {
10845         vm_map_entry_t  entry;
10846
10847         while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10848                 entry = vm_map_copy_first_entry(copy);
10849                 vm_map_copy_entry_unlink(copy, entry);
10850                 vm_map_store_entry_link(map, after_where, entry,
10851                     VM_MAP_KERNEL_FLAGS_NONE);
10852                 after_where = entry;
10853         }
10854         zfree(vm_map_copy_zone, copy);
10855 }
10856
10857 void
10858 vm_map_copy_remap(
10859         vm_map_t        map,
10860         vm_map_entry_t  where,
10861         vm_map_copy_t   copy,
10862         vm_map_offset_t adjustment,
10863         vm_prot_t       cur_prot,
10864         vm_prot_t       max_prot,
10865         vm_inherit_t    inheritance)
10866 {
10867         vm_map_entry_t  copy_entry, new_entry;
10868
10869         for (copy_entry = vm_map_copy_first_entry(copy);
10870             copy_entry != vm_map_copy_to_entry(copy);
10871             copy_entry = copy_entry->vme_next) {
10872                 /* get a new VM map entry for the map */
10873                 new_entry = vm_map_entry_create(map,
10874                     !map->hdr.entries_pageable);
10875                 /* copy the "copy entry" to the new entry */
10876                 vm_map_entry_copy(map, new_entry, copy_entry);
10877                 /* adjust "start" and "end" */
10878                 new_entry->vme_start += adjustment;
10879                 new_entry->vme_end += adjustment;
10880                 /* clear some attributes */
10881                 new_entry->inheritance = inheritance;
10882                 new_entry->protection = cur_prot;
10883                 new_entry->max_protection = max_prot;
10884                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10885                 /* take an extra reference on the entry's "object" */
10886                 if (new_entry->is_sub_map) {
10887                         assert(!new_entry->use_pmap); /* not nested */
10888                         vm_map_lock(VME_SUBMAP(new_entry));
10889                         vm_map_reference(VME_SUBMAP(new_entry));
10890                         vm_map_unlock(VME_SUBMAP(new_entry));
10891                 } else {
10892                         vm_object_reference(VME_OBJECT(new_entry));
10893                 }
10894                 /* insert the new entry in the map */
10895                 vm_map_store_entry_link(map, where, new_entry,
10896                     VM_MAP_KERNEL_FLAGS_NONE);
10897                 /* continue inserting the "copy entries" after the new entry */
10898                 where = new_entry;
10899         }
10900 }
10901
10902
10903 /*
10904  * Returns true if *size matches (or is in the range of) copy->size.
10905  * Upon returning true, the *size field is updated with the actual size of the
10906  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10907  */
10908 boolean_t
10909 vm_map_copy_validate_size(
10910         vm_map_t                dst_map,
10911         vm_map_copy_t           copy,
10912         vm_map_size_t           *size)
10913 {
10914         if (copy == VM_MAP_COPY_NULL) {
10915                 return FALSE;
10916         }
10917         vm_map_size_t copy_sz = copy->size;
10918         vm_map_size_t sz = *size;
10919         switch (copy->type) {
10920         case VM_MAP_COPY_OBJECT:
10921         case VM_MAP_COPY_KERNEL_BUFFER:
10922                 if (sz == copy_sz) {
10923                         return TRUE;
10924                 }
10925                 break;
10926         case VM_MAP_COPY_ENTRY_LIST:
10927                 /*
10928                  * potential page-size rounding prevents us from exactly
10929                  * validating this flavor of vm_map_copy, but we can at least
10930                  * assert that it's within a range.
10931                  */
10932                 if (copy_sz >= sz &&
10933                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10934                         *size = copy_sz;
10935                         return TRUE;
10936                 }
10937                 break;
10938         default:
10939                 break;
10940         }
10941         return FALSE;
10942 }
10943
10944 /*
10945  *      Routine:        vm_map_copyout_size
10946  *
10947  *      Description:
10948  *              Copy out a copy chain ("copy") into newly-allocated
10949  *              space in the destination map. Uses a prevalidated
10950  *              size for the copy object (vm_map_copy_validate_size).
10951  *
10952  *              If successful, consumes the copy object.
10953  *              Otherwise, the caller is responsible for it.
10954  */
10955 kern_return_t
10956 vm_map_copyout_size(
10957         vm_map_t                dst_map,
10958         vm_map_address_t        *dst_addr,      /* OUT */
10959         vm_map_copy_t           copy,
10960         vm_map_size_t           copy_size)
10961 {
10962         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10963                    TRUE,                     /* consume_on_success */
10964                    VM_PROT_DEFAULT,
10965                    VM_PROT_ALL,
10966                    VM_INHERIT_DEFAULT);
10967 }
10968
10969 /*
10970  *      Routine:        vm_map_copyout
10971  *
10972  *      Description:
10973  *              Copy out a copy chain ("copy") into newly-allocated
10974  *              space in the destination map.
10975  *
10976  *              If successful, consumes the copy object.
10977  *              Otherwise, the caller is responsible for it.
10978  */
10979 kern_return_t
10980 vm_map_copyout(
10981         vm_map_t                dst_map,
10982         vm_map_address_t        *dst_addr,      /* OUT */
10983         vm_map_copy_t           copy)
10984 {
10985         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10986                    TRUE,                     /* consume_on_success */
10987                    VM_PROT_DEFAULT,
10988                    VM_PROT_ALL,
10989                    VM_INHERIT_DEFAULT);
10990 }
10991
10992 kern_return_t
10993 vm_map_copyout_internal(
10994         vm_map_t                dst_map,
10995         vm_map_address_t        *dst_addr,      /* OUT */
10996         vm_map_copy_t           copy,
10997         vm_map_size_t           copy_size,
10998         boolean_t               consume_on_success,
10999         vm_prot_t               cur_protection,
11000         vm_prot_t               max_protection,
11001         vm_inherit_t            inheritance)
11002 {
11003         vm_map_size_t           size;
11004         vm_map_size_t           adjustment;
11005         vm_map_offset_t         start;
11006         vm_object_offset_t      vm_copy_start;
11007         vm_map_entry_t          last;
11008         vm_map_entry_t          entry;
11009         vm_map_entry_t          hole_entry;
11010         vm_map_copy_t           original_copy;
11011
11012         /*
11013          *      Check for null copy object.
11014          */
11015
11016         if (copy == VM_MAP_COPY_NULL) {
11017                 *dst_addr = 0;
11018                 return KERN_SUCCESS;
11019         }
11020
11021         /*
11022          * Assert that the vm_map_copy is coming from the right
11023          * zone and hasn't been forged
11024          */
11025         vm_map_copy_require(copy);
11026
11027         if (copy->size != copy_size) {
11028                 *dst_addr = 0;
11029                 return KERN_FAILURE;
11030         }
11031
11032         /*
11033          *      Check for special copy object, created
11034          *      by vm_map_copyin_object.
11035          */
11036
11037         if (copy->type == VM_MAP_COPY_OBJECT) {
11038                 vm_object_t             object = copy->cpy_object;
11039                 kern_return_t           kr;
11040                 vm_object_offset_t      offset;
11041
11042                 offset = vm_object_trunc_page(copy->offset);
11043                 size = vm_map_round_page((copy_size +
11044                     (vm_map_size_t)(copy->offset -
11045                     offset)),
11046                     VM_MAP_PAGE_MASK(dst_map));
11047                 *dst_addr = 0;
11048                 kr = vm_map_enter(dst_map, dst_addr, size,
11049                     (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
11050                     VM_MAP_KERNEL_FLAGS_NONE,
11051                     VM_KERN_MEMORY_NONE,
11052                     object, offset, FALSE,
11053                     VM_PROT_DEFAULT, VM_PROT_ALL,
11054                     VM_INHERIT_DEFAULT);
11055                 if (kr != KERN_SUCCESS) {
11056                         return kr;
11057                 }
11058                 /* Account for non-pagealigned copy object */
11059                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
11060                 if (consume_on_success) {
11061                         zfree(vm_map_copy_zone, copy);
11062                 }
11063                 return KERN_SUCCESS;
11064         }
11065
11066         /*
11067          *      Check for special kernel buffer allocated
11068          *      by new_ipc_kmsg_copyin.
11069          */
11070
11071         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11072                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11073                            copy, copy_size, FALSE,
11074                            consume_on_success);
11075         }
11076
11077         original_copy = copy;
11078         if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11079                 kern_return_t kr;
11080                 vm_map_copy_t target_copy;
11081                 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11082
11083                 target_copy = VM_MAP_COPY_NULL;
11084                 DEBUG4K_ADJUST("adjusting...\n");
11085                 kr = vm_map_copy_adjust_to_target(
11086                         copy,
11087                         0, /* offset */
11088                         copy->size, /* size */
11089                         dst_map,
11090                         TRUE, /* copy */
11091                         &target_copy,
11092                         &overmap_start,
11093                         &overmap_end,
11094                         &trimmed_start);
11095                 if (kr != KERN_SUCCESS) {
11096                         DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11097                         return kr;
11098                 }
11099                 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11100                 if (target_copy != copy) {
11101                         copy = target_copy;
11102                 }
11103                 copy_size = copy->size;
11104         }
11105
11106         /*
11107          *      Find space for the data
11108          */
11109
11110         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11111             VM_MAP_COPY_PAGE_MASK(copy));
11112         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11113             VM_MAP_COPY_PAGE_MASK(copy))
11114             - vm_copy_start;
11115
11116
11117 StartAgain:;
11118
11119         vm_map_lock(dst_map);
11120         if (dst_map->disable_vmentry_reuse == TRUE) {
11121                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11122                 last = entry;
11123         } else {
11124                 if (dst_map->holelistenabled) {
11125                         hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11126
11127                         if (hole_entry == NULL) {
11128                                 /*
11129                                  * No more space in the map?
11130                                  */
11131                                 vm_map_unlock(dst_map);
11132                                 return KERN_NO_SPACE;
11133                         }
11134
11135                         last = hole_entry;
11136                         start = last->vme_start;
11137                 } else {
11138                         assert(first_free_is_valid(dst_map));
11139                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11140                             vm_map_min(dst_map) : last->vme_end;
11141                 }
11142                 start = vm_map_round_page(start,
11143                     VM_MAP_PAGE_MASK(dst_map));
11144         }
11145
11146         while (TRUE) {
11147                 vm_map_entry_t  next = last->vme_next;
11148                 vm_map_offset_t end = start + size;
11149
11150                 if ((end > dst_map->max_offset) || (end < start)) {
11151                         if (dst_map->wait_for_space) {
11152                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11153                                         assert_wait((event_t) dst_map,
11154                                             THREAD_INTERRUPTIBLE);
11155                                         vm_map_unlock(dst_map);
11156                                         thread_block(THREAD_CONTINUE_NULL);
11157                                         goto StartAgain;
11158                                 }
11159                         }
11160                         vm_map_unlock(dst_map);
11161                         return KERN_NO_SPACE;
11162                 }
11163
11164                 if (dst_map->holelistenabled) {
11165                         if (last->vme_end >= end) {
11166                                 break;
11167                         }
11168                 } else {
11169                         /*
11170                          *      If there are no more entries, we must win.
11171                          *
11172                          *      OR
11173                          *
11174                          *      If there is another entry, it must be
11175                          *      after the end of the potential new region.
11176                          */
11177
11178                         if (next == vm_map_to_entry(dst_map)) {
11179                                 break;
11180                         }
11181
11182                         if (next->vme_start >= end) {
11183                                 break;
11184                         }
11185                 }
11186
11187                 last = next;
11188
11189                 if (dst_map->holelistenabled) {
11190                         if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11191                                 /*
11192                                  * Wrapped around
11193                                  */
11194                                 vm_map_unlock(dst_map);
11195                                 return KERN_NO_SPACE;
11196                         }
11197                         start = last->vme_start;
11198                 } else {
11199                         start = last->vme_end;
11200                 }
11201                 start = vm_map_round_page(start,
11202                     VM_MAP_PAGE_MASK(dst_map));
11203         }
11204
11205         if (dst_map->holelistenabled) {
11206                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11207                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
11208                 }
11209         }
11210
11211
11212         adjustment = start - vm_copy_start;
11213         if (!consume_on_success) {
11214                 /*
11215                  * We're not allowed to consume "copy", so we'll have to
11216                  * copy its map entries into the destination map below.
11217                  * No need to re-allocate map entries from the correct
11218                  * (pageable or not) zone, since we'll get new map entries
11219                  * during the transfer.
11220                  * We'll also adjust the map entries's "start" and "end"
11221                  * during the transfer, to keep "copy"'s entries consistent
11222                  * with its "offset".
11223                  */
11224                 goto after_adjustments;
11225         }
11226
11227         /*
11228          *      Since we're going to just drop the map
11229          *      entries from the copy into the destination
11230          *      map, they must come from the same pool.
11231          */
11232
11233         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11234                 /*
11235                  * Mismatches occur when dealing with the default
11236                  * pager.
11237                  */
11238                 zone_t          old_zone;
11239                 vm_map_entry_t  next, new;
11240
11241                 /*
11242                  * Find the zone that the copies were allocated from
11243                  */
11244
11245                 entry = vm_map_copy_first_entry(copy);
11246
11247                 /*
11248                  * Reinitialize the copy so that vm_map_copy_entry_link
11249                  * will work.
11250                  */
11251                 vm_map_store_copy_reset(copy, entry);
11252                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11253
11254                 /*
11255                  * Copy each entry.
11256                  */
11257                 while (entry != vm_map_copy_to_entry(copy)) {
11258                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11259                         vm_map_entry_copy_full(new, entry);
11260                         new->vme_no_copy_on_read = FALSE;
11261                         assert(!new->iokit_acct);
11262                         if (new->is_sub_map) {
11263                                 /* clr address space specifics */
11264                                 new->use_pmap = FALSE;
11265                         }
11266                         vm_map_copy_entry_link(copy,
11267                             vm_map_copy_last_entry(copy),
11268                             new);
11269                         next = entry->vme_next;
11270                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
11271                         zfree(old_zone, entry);
11272                         entry = next;
11273                 }
11274         }
11275
11276         /*
11277          *      Adjust the addresses in the copy chain, and
11278          *      reset the region attributes.
11279          */
11280
11281         for (entry = vm_map_copy_first_entry(copy);
11282             entry != vm_map_copy_to_entry(copy);
11283             entry = entry->vme_next) {
11284                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11285                         /*
11286                          * We're injecting this copy entry into a map that
11287                          * has the standard page alignment, so clear
11288                          * "map_aligned" (which might have been inherited
11289                          * from the original map entry).
11290                          */
11291                         entry->map_aligned = FALSE;
11292                 }
11293
11294                 entry->vme_start += adjustment;
11295                 entry->vme_end += adjustment;
11296
11297                 if (entry->map_aligned) {
11298                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11299                             VM_MAP_PAGE_MASK(dst_map)));
11300                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11301                             VM_MAP_PAGE_MASK(dst_map)));
11302                 }
11303
11304                 entry->inheritance = VM_INHERIT_DEFAULT;
11305                 entry->protection = VM_PROT_DEFAULT;
11306                 entry->max_protection = VM_PROT_ALL;
11307                 entry->behavior = VM_BEHAVIOR_DEFAULT;
11308
11309                 /*
11310                  * If the entry is now wired,
11311                  * map the pages into the destination map.
11312                  */
11313                 if (entry->wired_count != 0) {
11314                         vm_map_offset_t va;
11315                         vm_object_offset_t       offset;
11316                         vm_object_t object;
11317                         vm_prot_t prot;
11318                         int     type_of_fault;
11319
11320                         /* TODO4K would need to use actual page size */
11321                         assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11322
11323                         object = VME_OBJECT(entry);
11324                         offset = VME_OFFSET(entry);
11325                         va = entry->vme_start;
11326
11327                         pmap_pageable(dst_map->pmap,
11328                             entry->vme_start,
11329                             entry->vme_end,
11330                             TRUE);
11331
11332                         while (va < entry->vme_end) {
11333                                 vm_page_t       m;
11334                                 struct vm_object_fault_info fault_info = {};
11335
11336                                 /*
11337                                  * Look up the page in the object.
11338                                  * Assert that the page will be found in the
11339                                  * top object:
11340                                  * either
11341                                  *      the object was newly created by
11342                                  *      vm_object_copy_slowly, and has
11343                                  *      copies of all of the pages from
11344                                  *      the source object
11345                                  * or
11346                                  *      the object was moved from the old
11347                                  *      map entry; because the old map
11348                                  *      entry was wired, all of the pages
11349                                  *      were in the top-level object.
11350                                  *      (XXX not true if we wire pages for
11351                                  *       reading)
11352                                  */
11353                                 vm_object_lock(object);
11354
11355                                 m = vm_page_lookup(object, offset);
11356                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11357                                     m->vmp_absent) {
11358                                         panic("vm_map_copyout: wiring %p", m);
11359                                 }
11360
11361                                 prot = entry->protection;
11362
11363                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11364                                     prot) {
11365                                         prot |= VM_PROT_EXECUTE;
11366                                 }
11367
11368                                 type_of_fault = DBG_CACHE_HIT_FAULT;
11369
11370                                 fault_info.user_tag = VME_ALIAS(entry);
11371                                 fault_info.pmap_options = 0;
11372                                 if (entry->iokit_acct ||
11373                                     (!entry->is_sub_map && !entry->use_pmap)) {
11374                                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11375                                 }
11376
11377                                 vm_fault_enter(m,
11378                                     dst_map->pmap,
11379                                     va,
11380                                     PAGE_SIZE, 0,
11381                                     prot,
11382                                     prot,
11383                                     VM_PAGE_WIRED(m),
11384                                     FALSE,            /* change_wiring */
11385                                     VM_KERN_MEMORY_NONE,            /* tag - not wiring */
11386                                     &fault_info,
11387                                     NULL,             /* need_retry */
11388                                     &type_of_fault);
11389
11390                                 vm_object_unlock(object);
11391
11392                                 offset += PAGE_SIZE_64;
11393                                 va += PAGE_SIZE;
11394                         }
11395                 }
11396         }
11397
11398 after_adjustments:
11399
11400         /*
11401          *      Correct the page alignment for the result
11402          */
11403
11404         *dst_addr = start + (copy->offset - vm_copy_start);
11405
11406 #if KASAN
11407         kasan_notify_address(*dst_addr, size);
11408 #endif
11409
11410         /*
11411          *      Update the hints and the map size
11412          */
11413
11414         if (consume_on_success) {
11415                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11416         } else {
11417                 SAVE_HINT_MAP_WRITE(dst_map, last);
11418         }
11419
11420         dst_map->size += size;
11421
11422         /*
11423          *      Link in the copy
11424          */
11425
11426         if (consume_on_success) {
11427                 vm_map_copy_insert(dst_map, last, copy);
11428                 if (copy != original_copy) {
11429                         vm_map_copy_discard(original_copy);
11430                         original_copy = VM_MAP_COPY_NULL;
11431                 }
11432         } else {
11433                 vm_map_copy_remap(dst_map, last, copy, adjustment,
11434                     cur_protection, max_protection,
11435                     inheritance);
11436                 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11437                         vm_map_copy_discard(copy);
11438                         copy = original_copy;
11439                 }
11440         }
11441
11442
11443         vm_map_unlock(dst_map);
11444
11445         /*
11446          * XXX  If wiring_required, call vm_map_pageable
11447          */
11448
11449         return KERN_SUCCESS;
11450 }
11451
11452 /*
11453  *      Routine:        vm_map_copyin
11454  *
11455  *      Description:
11456  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
11457  *
11458  */
11459
11460 #undef vm_map_copyin
11461
11462 kern_return_t
11463 vm_map_copyin(
11464         vm_map_t                        src_map,
11465         vm_map_address_t        src_addr,
11466         vm_map_size_t           len,
11467         boolean_t                       src_destroy,
11468         vm_map_copy_t           *copy_result)   /* OUT */
11469 {
11470         return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11471                    FALSE, copy_result, FALSE);
11472 }
11473
11474 /*
11475  *      Routine:        vm_map_copyin_common
11476  *
11477  *      Description:
11478  *              Copy the specified region (src_addr, len) from the
11479  *              source address space (src_map), possibly removing
11480  *              the region from the source address space (src_destroy).
11481  *
11482  *      Returns:
11483  *              A vm_map_copy_t object (copy_result), suitable for
11484  *              insertion into another address space (using vm_map_copyout),
11485  *              copying over another address space region (using
11486  *              vm_map_copy_overwrite).  If the copy is unused, it
11487  *              should be destroyed (using vm_map_copy_discard).
11488  *
11489  *      In/out conditions:
11490  *              The source map should not be locked on entry.
11491  */
11492
11493 typedef struct submap_map {
11494         vm_map_t        parent_map;
11495         vm_map_offset_t base_start;
11496         vm_map_offset_t base_end;
11497         vm_map_size_t   base_len;
11498         struct submap_map *next;
11499 } submap_map_t;
11500
11501 kern_return_t
11502 vm_map_copyin_common(
11503         vm_map_t        src_map,
11504         vm_map_address_t src_addr,
11505         vm_map_size_t   len,
11506         boolean_t       src_destroy,
11507         __unused boolean_t      src_volatile,
11508         vm_map_copy_t   *copy_result,   /* OUT */
11509         boolean_t       use_maxprot)
11510 {
11511         int flags;
11512
11513         flags = 0;
11514         if (src_destroy) {
11515                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11516         }
11517         if (use_maxprot) {
11518                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11519         }
11520         return vm_map_copyin_internal(src_map,
11521                    src_addr,
11522                    len,
11523                    flags,
11524                    copy_result);
11525 }
11526 kern_return_t
11527 vm_map_copyin_internal(
11528         vm_map_t        src_map,
11529         vm_map_address_t src_addr,
11530         vm_map_size_t   len,
11531         int             flags,
11532         vm_map_copy_t   *copy_result)   /* OUT */
11533 {
11534         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
11535                                          * in multi-level lookup, this
11536                                          * entry contains the actual
11537                                          * vm_object/offset.
11538                                          */
11539         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
11540
11541         vm_map_offset_t src_start;      /* Start of current entry --
11542                                          * where copy is taking place now
11543                                          */
11544         vm_map_offset_t src_end;        /* End of entire region to be
11545                                          * copied */
11546         vm_map_offset_t src_base;
11547         vm_map_t        base_map = src_map;
11548         boolean_t       map_share = FALSE;
11549         submap_map_t    *parent_maps = NULL;
11550
11551         vm_map_copy_t   copy;           /* Resulting copy */
11552         vm_map_address_t copy_addr;
11553         vm_map_size_t   copy_size;
11554         boolean_t       src_destroy;
11555         boolean_t       use_maxprot;
11556         boolean_t       preserve_purgeable;
11557         boolean_t       entry_was_shared;
11558         vm_map_entry_t  saved_src_entry;
11559
11560         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11561                 return KERN_INVALID_ARGUMENT;
11562         }
11563
11564         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11565         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11566         preserve_purgeable =
11567             (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11568
11569         /*
11570          *      Check for copies of zero bytes.
11571          */
11572
11573         if (len == 0) {
11574                 *copy_result = VM_MAP_COPY_NULL;
11575                 return KERN_SUCCESS;
11576         }
11577
11578         /*
11579          *      Check that the end address doesn't overflow
11580          */
11581         src_end = src_addr + len;
11582         if (src_end < src_addr) {
11583                 return KERN_INVALID_ADDRESS;
11584         }
11585
11586         /*
11587          *      Compute (page aligned) start and end of region
11588          */
11589         src_start = vm_map_trunc_page(src_addr,
11590             VM_MAP_PAGE_MASK(src_map));
11591         src_end = vm_map_round_page(src_end,
11592             VM_MAP_PAGE_MASK(src_map));
11593
11594         /*
11595          * If the copy is sufficiently small, use a kernel buffer instead
11596          * of making a virtual copy.  The theory being that the cost of
11597          * setting up VM (and taking C-O-W faults) dominates the copy costs
11598          * for small regions.
11599          */
11600         if ((len < msg_ool_size_small) &&
11601             !use_maxprot &&
11602             !preserve_purgeable &&
11603             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11604             /*
11605              * Since the "msg_ool_size_small" threshold was increased and
11606              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11607              * address space limits, we revert to doing a virtual copy if the
11608              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
11609              * of the commpage would now fail when it used to work.
11610              */
11611             (src_start >= vm_map_min(src_map) &&
11612             src_start < vm_map_max(src_map) &&
11613             src_end >= vm_map_min(src_map) &&
11614             src_end < vm_map_max(src_map))) {
11615                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11616                            src_destroy, copy_result);
11617         }
11618
11619         /*
11620          *      Allocate a header element for the list.
11621          *
11622          *      Use the start and end in the header to
11623          *      remember the endpoints prior to rounding.
11624          */
11625
11626         copy = vm_map_copy_allocate();
11627         copy->type = VM_MAP_COPY_ENTRY_LIST;
11628         copy->cpy_hdr.entries_pageable = TRUE;
11629         copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
11630
11631         vm_map_store_init( &(copy->cpy_hdr));
11632
11633         copy->offset = src_addr;
11634         copy->size = len;
11635
11636         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11637
11638 #define RETURN(x)                                               \
11639         MACRO_BEGIN                                             \
11640         vm_map_unlock(src_map);                                 \
11641         if(src_map != base_map)                                 \
11642                 vm_map_deallocate(src_map);                     \
11643         if (new_entry != VM_MAP_ENTRY_NULL)                     \
11644                 vm_map_copy_entry_dispose(copy,new_entry);      \
11645         vm_map_copy_discard(copy);                              \
11646         {                                                       \
11647                 submap_map_t    *_ptr;                          \
11648                                                                 \
11649                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11650                         parent_maps=parent_maps->next;          \
11651                         if (_ptr->parent_map != base_map)       \
11652                                 vm_map_deallocate(_ptr->parent_map);    \
11653                         kfree(_ptr, sizeof(submap_map_t));      \
11654                 }                                               \
11655         }                                                       \
11656         MACRO_RETURN(x);                                        \
11657         MACRO_END
11658
11659         /*
11660          *      Find the beginning of the region.
11661          */
11662
11663         vm_map_lock(src_map);
11664
11665         /*
11666          * Lookup the original "src_addr" rather than the truncated
11667          * "src_start", in case "src_start" falls in a non-map-aligned
11668          * map entry *before* the map entry that contains "src_addr"...
11669          */
11670         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11671                 RETURN(KERN_INVALID_ADDRESS);
11672         }
11673         if (!tmp_entry->is_sub_map) {
11674                 /*
11675                  * ... but clip to the map-rounded "src_start" rather than
11676                  * "src_addr" to preserve map-alignment.  We'll adjust the
11677                  * first copy entry at the end, if needed.
11678                  */
11679                 vm_map_clip_start(src_map, tmp_entry, src_start);
11680         }
11681         if (src_start < tmp_entry->vme_start) {
11682                 /*
11683                  * Move "src_start" up to the start of the
11684                  * first map entry to copy.
11685                  */
11686                 src_start = tmp_entry->vme_start;
11687         }
11688         /* set for later submap fix-up */
11689         copy_addr = src_start;
11690
11691         /*
11692          *      Go through entries until we get to the end.
11693          */
11694
11695         while (TRUE) {
11696                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11697                 vm_map_size_t   src_size;               /* Size of source
11698                                                          * map entry (in both
11699                                                          * maps)
11700                                                          */
11701
11702                 vm_object_t             src_object;     /* Object to copy */
11703                 vm_object_offset_t      src_offset;
11704
11705                 boolean_t       src_needs_copy;         /* Should source map
11706                                                          * be made read-only
11707                                                          * for copy-on-write?
11708                                                          */
11709
11710                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11711
11712                 boolean_t       was_wired;              /* Was source wired? */
11713                 vm_map_version_t version;               /* Version before locks
11714                                                          * dropped to make copy
11715                                                          */
11716                 kern_return_t   result;                 /* Return value from
11717                                                          * copy_strategically.
11718                                                          */
11719                 while (tmp_entry->is_sub_map) {
11720                         vm_map_size_t submap_len;
11721                         submap_map_t *ptr;
11722
11723                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11724                         ptr->next = parent_maps;
11725                         parent_maps = ptr;
11726                         ptr->parent_map = src_map;
11727                         ptr->base_start = src_start;
11728                         ptr->base_end = src_end;
11729                         submap_len = tmp_entry->vme_end - src_start;
11730                         if (submap_len > (src_end - src_start)) {
11731                                 submap_len = src_end - src_start;
11732                         }
11733                         ptr->base_len = submap_len;
11734
11735                         src_start -= tmp_entry->vme_start;
11736                         src_start += VME_OFFSET(tmp_entry);
11737                         src_end = src_start + submap_len;
11738                         src_map = VME_SUBMAP(tmp_entry);
11739                         vm_map_lock(src_map);
11740                         /* keep an outstanding reference for all maps in */
11741                         /* the parents tree except the base map */
11742                         vm_map_reference(src_map);
11743                         vm_map_unlock(ptr->parent_map);
11744                         if (!vm_map_lookup_entry(
11745                                     src_map, src_start, &tmp_entry)) {
11746                                 RETURN(KERN_INVALID_ADDRESS);
11747                         }
11748                         map_share = TRUE;
11749                         if (!tmp_entry->is_sub_map) {
11750                                 vm_map_clip_start(src_map, tmp_entry, src_start);
11751                         }
11752                         src_entry = tmp_entry;
11753                 }
11754                 /* we are now in the lowest level submap... */
11755
11756                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11757                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11758                         /* This is not, supported for now.In future */
11759                         /* we will need to detect the phys_contig   */
11760                         /* condition and then upgrade copy_slowly   */
11761                         /* to do physical copy from the device mem  */
11762                         /* based object. We can piggy-back off of   */
11763                         /* the was wired boolean to set-up the      */
11764                         /* proper handling */
11765                         RETURN(KERN_PROTECTION_FAILURE);
11766                 }
11767                 /*
11768                  *      Create a new address map entry to hold the result.
11769                  *      Fill in the fields from the appropriate source entries.
11770                  *      We must unlock the source map to do this if we need
11771                  *      to allocate a map entry.
11772                  */
11773                 if (new_entry == VM_MAP_ENTRY_NULL) {
11774                         version.main_timestamp = src_map->timestamp;
11775                         vm_map_unlock(src_map);
11776
11777                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11778
11779                         vm_map_lock(src_map);
11780                         if ((version.main_timestamp + 1) != src_map->timestamp) {
11781                                 if (!vm_map_lookup_entry(src_map, src_start,
11782                                     &tmp_entry)) {
11783                                         RETURN(KERN_INVALID_ADDRESS);
11784                                 }
11785                                 if (!tmp_entry->is_sub_map) {
11786                                         vm_map_clip_start(src_map, tmp_entry, src_start);
11787                                 }
11788                                 continue; /* restart w/ new tmp_entry */
11789                         }
11790                 }
11791
11792                 /*
11793                  *      Verify that the region can be read.
11794                  */
11795                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11796                     !use_maxprot) ||
11797                     (src_entry->max_protection & VM_PROT_READ) == 0) {
11798                         RETURN(KERN_PROTECTION_FAILURE);
11799                 }
11800
11801                 /*
11802                  *      Clip against the endpoints of the entire region.
11803                  */
11804
11805                 vm_map_clip_end(src_map, src_entry, src_end);
11806
11807                 src_size = src_entry->vme_end - src_start;
11808                 src_object = VME_OBJECT(src_entry);
11809                 src_offset = VME_OFFSET(src_entry);
11810                 was_wired = (src_entry->wired_count != 0);
11811
11812                 vm_map_entry_copy(src_map, new_entry, src_entry);
11813                 if (new_entry->is_sub_map) {
11814                         /* clr address space specifics */
11815                         new_entry->use_pmap = FALSE;
11816                 } else {
11817                         /*
11818                          * We're dealing with a copy-on-write operation,
11819                          * so the resulting mapping should not inherit the
11820                          * original mapping's accounting settings.
11821                          * "iokit_acct" should have been cleared in
11822                          * vm_map_entry_copy().
11823                          * "use_pmap" should be reset to its default (TRUE)
11824                          * so that the new mapping gets accounted for in
11825                          * the task's memory footprint.
11826                          */
11827                         assert(!new_entry->iokit_acct);
11828                         new_entry->use_pmap = TRUE;
11829                 }
11830
11831                 /*
11832                  *      Attempt non-blocking copy-on-write optimizations.
11833                  */
11834
11835                 /*
11836                  * If we are destroying the source, and the object
11837                  * is internal, we could move the object reference
11838                  * from the source to the copy.  The copy is
11839                  * copy-on-write only if the source is.
11840                  * We make another reference to the object, because
11841                  * destroying the source entry will deallocate it.
11842                  *
11843                  * This memory transfer has to be atomic, (to prevent
11844                  * the VM object from being shared or copied while
11845                  * it's being moved here), so we could only do this
11846                  * if we won't have to unlock the VM map until the
11847                  * original mapping has been fully removed.
11848                  */
11849
11850 RestartCopy:
11851                 if ((src_object == VM_OBJECT_NULL ||
11852                     (!was_wired && !map_share && !tmp_entry->is_shared
11853                     && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11854                     vm_object_copy_quickly(
11855                             VME_OBJECT_PTR(new_entry),
11856                             src_offset,
11857                             src_size,
11858                             &src_needs_copy,
11859                             &new_entry_needs_copy)) {
11860                         new_entry->needs_copy = new_entry_needs_copy;
11861
11862                         /*
11863                          *      Handle copy-on-write obligations
11864                          */
11865
11866                         if (src_needs_copy && !tmp_entry->needs_copy) {
11867                                 vm_prot_t prot;
11868
11869                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11870
11871                                 if (override_nx(src_map, VME_ALIAS(src_entry))
11872                                     && prot) {
11873                                         prot |= VM_PROT_EXECUTE;
11874                                 }
11875
11876                                 vm_object_pmap_protect(
11877                                         src_object,
11878                                         src_offset,
11879                                         src_size,
11880                                         (src_entry->is_shared ?
11881                                         PMAP_NULL
11882                                         : src_map->pmap),
11883                                         VM_MAP_PAGE_SIZE(src_map),
11884                                         src_entry->vme_start,
11885                                         prot);
11886
11887                                 assert(tmp_entry->wired_count == 0);
11888                                 tmp_entry->needs_copy = TRUE;
11889                         }
11890
11891                         /*
11892                          *      The map has never been unlocked, so it's safe
11893                          *      to move to the next entry rather than doing
11894                          *      another lookup.
11895                          */
11896
11897                         goto CopySuccessful;
11898                 }
11899
11900                 entry_was_shared = tmp_entry->is_shared;
11901
11902                 /*
11903                  *      Take an object reference, so that we may
11904                  *      release the map lock(s).
11905                  */
11906
11907                 assert(src_object != VM_OBJECT_NULL);
11908                 vm_object_reference(src_object);
11909
11910                 /*
11911                  *      Record the timestamp for later verification.
11912                  *      Unlock the map.
11913                  */
11914
11915                 version.main_timestamp = src_map->timestamp;
11916                 vm_map_unlock(src_map); /* Increments timestamp once! */
11917                 saved_src_entry = src_entry;
11918                 tmp_entry = VM_MAP_ENTRY_NULL;
11919                 src_entry = VM_MAP_ENTRY_NULL;
11920
11921                 /*
11922                  *      Perform the copy
11923                  */
11924
11925                 if (was_wired ||
11926                     (debug4k_no_cow_copyin &&
11927                     VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11928 CopySlowly:
11929                         vm_object_lock(src_object);
11930                         result = vm_object_copy_slowly(
11931                                 src_object,
11932                                 src_offset,
11933                                 src_size,
11934                                 THREAD_UNINT,
11935                                 VME_OBJECT_PTR(new_entry));
11936                         VME_OFFSET_SET(new_entry,
11937                             src_offset - vm_object_trunc_page(src_offset));
11938                         new_entry->needs_copy = FALSE;
11939                 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11940                     (entry_was_shared || map_share)) {
11941                         vm_object_t new_object;
11942
11943                         vm_object_lock_shared(src_object);
11944                         new_object = vm_object_copy_delayed(
11945                                 src_object,
11946                                 src_offset,
11947                                 src_size,
11948                                 TRUE);
11949                         if (new_object == VM_OBJECT_NULL) {
11950                                 goto CopySlowly;
11951                         }
11952
11953                         VME_OBJECT_SET(new_entry, new_object);
11954                         assert(new_entry->wired_count == 0);
11955                         new_entry->needs_copy = TRUE;
11956                         assert(!new_entry->iokit_acct);
11957                         assert(new_object->purgable == VM_PURGABLE_DENY);
11958                         assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11959                         result = KERN_SUCCESS;
11960                 } else {
11961                         vm_object_offset_t new_offset;
11962                         new_offset = VME_OFFSET(new_entry);
11963                         result = vm_object_copy_strategically(src_object,
11964                             src_offset,
11965                             src_size,
11966                             VME_OBJECT_PTR(new_entry),
11967                             &new_offset,
11968                             &new_entry_needs_copy);
11969                         if (new_offset != VME_OFFSET(new_entry)) {
11970                                 VME_OFFSET_SET(new_entry, new_offset);
11971                         }
11972
11973                         new_entry->needs_copy = new_entry_needs_copy;
11974                 }
11975
11976                 if (result == KERN_SUCCESS &&
11977                     ((preserve_purgeable &&
11978                     src_object->purgable != VM_PURGABLE_DENY) ||
11979                     new_entry->used_for_jit)) {
11980                         /*
11981                          * Purgeable objects should be COPY_NONE, true share;
11982                          * this should be propogated to the copy.
11983                          *
11984                          * Also force mappings the pmap specially protects to
11985                          * be COPY_NONE; trying to COW these mappings would
11986                          * change the effective protections, which could have
11987                          * side effects if the pmap layer relies on the
11988                          * specified protections.
11989                          */
11990
11991                         vm_object_t     new_object;
11992
11993                         new_object = VME_OBJECT(new_entry);
11994                         assert(new_object != src_object);
11995                         vm_object_lock(new_object);
11996                         assert(new_object->ref_count == 1);
11997                         assert(new_object->shadow == VM_OBJECT_NULL);
11998                         assert(new_object->copy == VM_OBJECT_NULL);
11999                         assert(new_object->vo_owner == NULL);
12000
12001                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12002
12003                         if (preserve_purgeable &&
12004                             src_object->purgable != VM_PURGABLE_DENY) {
12005                                 new_object->true_share = TRUE;
12006
12007                                 /* start as non-volatile with no owner... */
12008                                 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12009                                 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12010                                 /* ... and move to src_object's purgeable state */
12011                                 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12012                                         int state;
12013                                         state = src_object->purgable;
12014                                         vm_object_purgable_control(
12015                                                 new_object,
12016                                                 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12017                                                 &state);
12018                                 }
12019                                 /* no pmap accounting for purgeable objects */
12020                                 new_entry->use_pmap = FALSE;
12021                         }
12022
12023                         vm_object_unlock(new_object);
12024                         new_object = VM_OBJECT_NULL;
12025                 }
12026
12027                 if (result != KERN_SUCCESS &&
12028                     result != KERN_MEMORY_RESTART_COPY) {
12029                         vm_map_lock(src_map);
12030                         RETURN(result);
12031                 }
12032
12033                 /*
12034                  *      Throw away the extra reference
12035                  */
12036
12037                 vm_object_deallocate(src_object);
12038
12039                 /*
12040                  *      Verify that the map has not substantially
12041                  *      changed while the copy was being made.
12042                  */
12043
12044                 vm_map_lock(src_map);
12045
12046                 if ((version.main_timestamp + 1) == src_map->timestamp) {
12047                         /* src_map hasn't changed: src_entry is still valid */
12048                         src_entry = saved_src_entry;
12049                         goto VerificationSuccessful;
12050                 }
12051
12052                 /*
12053                  *      Simple version comparison failed.
12054                  *
12055                  *      Retry the lookup and verify that the
12056                  *      same object/offset are still present.
12057                  *
12058                  *      [Note: a memory manager that colludes with
12059                  *      the calling task can detect that we have
12060                  *      cheated.  While the map was unlocked, the
12061                  *      mapping could have been changed and restored.]
12062                  */
12063
12064                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12065                         if (result != KERN_MEMORY_RESTART_COPY) {
12066                                 vm_object_deallocate(VME_OBJECT(new_entry));
12067                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12068                                 /* reset accounting state */
12069                                 new_entry->iokit_acct = FALSE;
12070                                 new_entry->use_pmap = TRUE;
12071                         }
12072                         RETURN(KERN_INVALID_ADDRESS);
12073                 }
12074
12075                 src_entry = tmp_entry;
12076                 vm_map_clip_start(src_map, src_entry, src_start);
12077
12078                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12079                     !use_maxprot) ||
12080                     ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12081                         goto VerificationFailed;
12082                 }
12083
12084                 if (src_entry->vme_end < new_entry->vme_end) {
12085                         /*
12086                          * This entry might have been shortened
12087                          * (vm_map_clip_end) or been replaced with
12088                          * an entry that ends closer to "src_start"
12089                          * than before.
12090                          * Adjust "new_entry" accordingly; copying
12091                          * less memory would be correct but we also
12092                          * redo the copy (see below) if the new entry
12093                          * no longer points at the same object/offset.
12094                          */
12095                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12096                             VM_MAP_COPY_PAGE_MASK(copy)));
12097                         new_entry->vme_end = src_entry->vme_end;
12098                         src_size = new_entry->vme_end - src_start;
12099                 } else if (src_entry->vme_end > new_entry->vme_end) {
12100                         /*
12101                          * This entry might have been extended
12102                          * (vm_map_entry_simplify() or coalesce)
12103                          * or been replaced with an entry that ends farther
12104                          * from "src_start" than before.
12105                          *
12106                          * We've called vm_object_copy_*() only on
12107                          * the previous <start:end> range, so we can't
12108                          * just extend new_entry.  We have to re-do
12109                          * the copy based on the new entry as if it was
12110                          * pointing at a different object/offset (see
12111                          * "Verification failed" below).
12112                          */
12113                 }
12114
12115                 if ((VME_OBJECT(src_entry) != src_object) ||
12116                     (VME_OFFSET(src_entry) != src_offset) ||
12117                     (src_entry->vme_end > new_entry->vme_end)) {
12118                         /*
12119                          *      Verification failed.
12120                          *
12121                          *      Start over with this top-level entry.
12122                          */
12123
12124 VerificationFailed:     ;
12125
12126                         vm_object_deallocate(VME_OBJECT(new_entry));
12127                         tmp_entry = src_entry;
12128                         continue;
12129                 }
12130
12131                 /*
12132                  *      Verification succeeded.
12133                  */
12134
12135 VerificationSuccessful:;
12136
12137                 if (result == KERN_MEMORY_RESTART_COPY) {
12138                         goto RestartCopy;
12139                 }
12140
12141                 /*
12142                  *      Copy succeeded.
12143                  */
12144
12145 CopySuccessful: ;
12146
12147                 /*
12148                  *      Link in the new copy entry.
12149                  */
12150
12151                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12152                     new_entry);
12153
12154                 /*
12155                  *      Determine whether the entire region
12156                  *      has been copied.
12157                  */
12158                 src_base = src_start;
12159                 src_start = new_entry->vme_end;
12160                 new_entry = VM_MAP_ENTRY_NULL;
12161                 while ((src_start >= src_end) && (src_end != 0)) {
12162                         submap_map_t    *ptr;
12163
12164                         if (src_map == base_map) {
12165                                 /* back to the top */
12166                                 break;
12167                         }
12168
12169                         ptr = parent_maps;
12170                         assert(ptr != NULL);
12171                         parent_maps = parent_maps->next;
12172
12173                         /* fix up the damage we did in that submap */
12174                         vm_map_simplify_range(src_map,
12175                             src_base,
12176                             src_end);
12177
12178                         vm_map_unlock(src_map);
12179                         vm_map_deallocate(src_map);
12180                         vm_map_lock(ptr->parent_map);
12181                         src_map = ptr->parent_map;
12182                         src_base = ptr->base_start;
12183                         src_start = ptr->base_start + ptr->base_len;
12184                         src_end = ptr->base_end;
12185                         if (!vm_map_lookup_entry(src_map,
12186                             src_start,
12187                             &tmp_entry) &&
12188                             (src_end > src_start)) {
12189                                 RETURN(KERN_INVALID_ADDRESS);
12190                         }
12191                         kfree(ptr, sizeof(submap_map_t));
12192                         if (parent_maps == NULL) {
12193                                 map_share = FALSE;
12194                         }
12195                         src_entry = tmp_entry->vme_prev;
12196                 }
12197
12198                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12199                     (src_start >= src_addr + len) &&
12200                     (src_addr + len != 0)) {
12201                         /*
12202                          * Stop copying now, even though we haven't reached
12203                          * "src_end".  We'll adjust the end of the last copy
12204                          * entry at the end, if needed.
12205                          *
12206                          * If src_map's aligment is different from the
12207                          * system's page-alignment, there could be
12208                          * extra non-map-aligned map entries between
12209                          * the original (non-rounded) "src_addr + len"
12210                          * and the rounded "src_end".
12211                          * We do not want to copy those map entries since
12212                          * they're not part of the copied range.
12213                          */
12214                         break;
12215                 }
12216
12217                 if ((src_start >= src_end) && (src_end != 0)) {
12218                         break;
12219                 }
12220
12221                 /*
12222                  *      Verify that there are no gaps in the region
12223                  */
12224
12225                 tmp_entry = src_entry->vme_next;
12226                 if ((tmp_entry->vme_start != src_start) ||
12227                     (tmp_entry == vm_map_to_entry(src_map))) {
12228                         RETURN(KERN_INVALID_ADDRESS);
12229                 }
12230         }
12231
12232         /*
12233          * If the source should be destroyed, do it now, since the
12234          * copy was successful.
12235          */
12236         if (src_destroy) {
12237                 (void) vm_map_delete(
12238                         src_map,
12239                         vm_map_trunc_page(src_addr,
12240                         VM_MAP_PAGE_MASK(src_map)),
12241                         src_end,
12242                         ((src_map == kernel_map) ?
12243                         VM_MAP_REMOVE_KUNWIRE :
12244                         VM_MAP_REMOVE_NO_FLAGS),
12245                         VM_MAP_NULL);
12246         } else {
12247                 /* fix up the damage we did in the base map */
12248                 vm_map_simplify_range(
12249                         src_map,
12250                         vm_map_trunc_page(src_addr,
12251                         VM_MAP_PAGE_MASK(src_map)),
12252                         vm_map_round_page(src_end,
12253                         VM_MAP_PAGE_MASK(src_map)));
12254         }
12255
12256         vm_map_unlock(src_map);
12257         tmp_entry = VM_MAP_ENTRY_NULL;
12258
12259         if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12260             VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12261                 vm_map_offset_t original_start, original_offset, original_end;
12262
12263                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12264
12265                 /* adjust alignment of first copy_entry's "vme_start" */
12266                 tmp_entry = vm_map_copy_first_entry(copy);
12267                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12268                         vm_map_offset_t adjustment;
12269
12270                         original_start = tmp_entry->vme_start;
12271                         original_offset = VME_OFFSET(tmp_entry);
12272
12273                         /* map-align the start of the first copy entry... */
12274                         adjustment = (tmp_entry->vme_start -
12275                             vm_map_trunc_page(
12276                                     tmp_entry->vme_start,
12277                                     VM_MAP_PAGE_MASK(src_map)));
12278                         tmp_entry->vme_start -= adjustment;
12279                         VME_OFFSET_SET(tmp_entry,
12280                             VME_OFFSET(tmp_entry) - adjustment);
12281                         copy_addr -= adjustment;
12282                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
12283                         /* ... adjust for mis-aligned start of copy range */
12284                         adjustment =
12285                             (vm_map_trunc_page(copy->offset,
12286                             PAGE_MASK) -
12287                             vm_map_trunc_page(copy->offset,
12288                             VM_MAP_PAGE_MASK(src_map)));
12289                         if (adjustment) {
12290                                 assert(page_aligned(adjustment));
12291                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12292                                 tmp_entry->vme_start += adjustment;
12293                                 VME_OFFSET_SET(tmp_entry,
12294                                     (VME_OFFSET(tmp_entry) +
12295                                     adjustment));
12296                                 copy_addr += adjustment;
12297                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12298                         }
12299
12300                         /*
12301                          * Assert that the adjustments haven't exposed
12302                          * more than was originally copied...
12303                          */
12304                         assert(tmp_entry->vme_start >= original_start);
12305                         assert(VME_OFFSET(tmp_entry) >= original_offset);
12306                         /*
12307                          * ... and that it did not adjust outside of a
12308                          * a single 16K page.
12309                          */
12310                         assert(vm_map_trunc_page(tmp_entry->vme_start,
12311                             VM_MAP_PAGE_MASK(src_map)) ==
12312                             vm_map_trunc_page(original_start,
12313                             VM_MAP_PAGE_MASK(src_map)));
12314                 }
12315
12316                 /* adjust alignment of last copy_entry's "vme_end" */
12317                 tmp_entry = vm_map_copy_last_entry(copy);
12318                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12319                         vm_map_offset_t adjustment;
12320
12321                         original_end = tmp_entry->vme_end;
12322
12323                         /* map-align the end of the last copy entry... */
12324                         tmp_entry->vme_end =
12325                             vm_map_round_page(tmp_entry->vme_end,
12326                             VM_MAP_PAGE_MASK(src_map));
12327                         /* ... adjust for mis-aligned end of copy range */
12328                         adjustment =
12329                             (vm_map_round_page((copy->offset +
12330                             copy->size),
12331                             VM_MAP_PAGE_MASK(src_map)) -
12332                             vm_map_round_page((copy->offset +
12333                             copy->size),
12334                             PAGE_MASK));
12335                         if (adjustment) {
12336                                 assert(page_aligned(adjustment));
12337                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12338                                 tmp_entry->vme_end -= adjustment;
12339                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12340                         }
12341
12342                         /*
12343                          * Assert that the adjustments haven't exposed
12344                          * more than was originally copied...
12345                          */
12346                         assert(tmp_entry->vme_end <= original_end);
12347                         /*
12348                          * ... and that it did not adjust outside of a
12349                          * a single 16K page.
12350                          */
12351                         assert(vm_map_round_page(tmp_entry->vme_end,
12352                             VM_MAP_PAGE_MASK(src_map)) ==
12353                             vm_map_round_page(original_end,
12354                             VM_MAP_PAGE_MASK(src_map)));
12355                 }
12356         }
12357
12358         /* Fix-up start and end points in copy.  This is necessary */
12359         /* when the various entries in the copy object were picked */
12360         /* up from different sub-maps */
12361
12362         tmp_entry = vm_map_copy_first_entry(copy);
12363         copy_size = 0; /* compute actual size */
12364         while (tmp_entry != vm_map_copy_to_entry(copy)) {
12365                 assert(VM_MAP_PAGE_ALIGNED(
12366                             copy_addr + (tmp_entry->vme_end -
12367                             tmp_entry->vme_start),
12368                             MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12369                 assert(VM_MAP_PAGE_ALIGNED(
12370                             copy_addr,
12371                             MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12372
12373                 /*
12374                  * The copy_entries will be injected directly into the
12375                  * destination map and might not be "map aligned" there...
12376                  */
12377                 tmp_entry->map_aligned = FALSE;
12378
12379                 tmp_entry->vme_end = copy_addr +
12380                     (tmp_entry->vme_end - tmp_entry->vme_start);
12381                 tmp_entry->vme_start = copy_addr;
12382                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12383                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12384                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12385                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12386         }
12387
12388         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12389             copy_size < copy->size) {
12390                 /*
12391                  * The actual size of the VM map copy is smaller than what
12392                  * was requested by the caller.  This must be because some
12393                  * PAGE_SIZE-sized pages are missing at the end of the last
12394                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12395                  * The caller might not have been aware of those missing
12396                  * pages and might not want to be aware of it, which is
12397                  * fine as long as they don't try to access (and crash on)
12398                  * those missing pages.
12399                  * Let's adjust the size of the "copy", to avoid failing
12400                  * in vm_map_copyout() or vm_map_copy_overwrite().
12401                  */
12402                 assert(vm_map_round_page(copy_size,
12403                     VM_MAP_PAGE_MASK(src_map)) ==
12404                     vm_map_round_page(copy->size,
12405                     VM_MAP_PAGE_MASK(src_map)));
12406                 copy->size = copy_size;
12407         }
12408
12409         *copy_result = copy;
12410         return KERN_SUCCESS;
12411
12412 #undef  RETURN
12413 }
12414
12415 kern_return_t
12416 vm_map_copy_extract(
12417         vm_map_t                src_map,
12418         vm_map_address_t        src_addr,
12419         vm_map_size_t           len,
12420         vm_prot_t               required_prot,
12421         boolean_t               do_copy,
12422         vm_map_copy_t           *copy_result,   /* OUT */
12423         vm_prot_t               *cur_prot,      /* OUT */
12424         vm_prot_t               *max_prot,      /* OUT */
12425         vm_inherit_t            inheritance,
12426         vm_map_kernel_flags_t   vmk_flags)
12427 {
12428         vm_map_copy_t   copy;
12429         kern_return_t   kr;
12430
12431         /*
12432          *      Check for copies of zero bytes.
12433          */
12434
12435         if (len == 0) {
12436                 *copy_result = VM_MAP_COPY_NULL;
12437                 return KERN_SUCCESS;
12438         }
12439
12440         /*
12441          *      Check that the end address doesn't overflow
12442          */
12443         if (src_addr + len < src_addr) {
12444                 return KERN_INVALID_ADDRESS;
12445         }
12446
12447         if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12448                 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12449         }
12450
12451         /*
12452          *      Allocate a header element for the list.
12453          *
12454          *      Use the start and end in the header to
12455          *      remember the endpoints prior to rounding.
12456          */
12457
12458         copy = vm_map_copy_allocate();
12459         copy->type = VM_MAP_COPY_ENTRY_LIST;
12460         copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12461
12462         vm_map_store_init(&copy->cpy_hdr);
12463
12464         copy->offset = 0;
12465         copy->size = len;
12466
12467         kr = vm_map_remap_extract(src_map,
12468             src_addr,
12469             len,
12470             required_prot,
12471             do_copy,                       /* copy */
12472             &copy->cpy_hdr,
12473             cur_prot,
12474             max_prot,
12475             inheritance,
12476             vmk_flags);
12477         if (kr != KERN_SUCCESS) {
12478                 vm_map_copy_discard(copy);
12479                 return kr;
12480         }
12481         assert((*cur_prot & required_prot) == required_prot);
12482         assert((*max_prot & required_prot) == required_prot);
12483
12484         *copy_result = copy;
12485         return KERN_SUCCESS;
12486 }
12487
12488 /*
12489  *      vm_map_copyin_object:
12490  *
12491  *      Create a copy object from an object.
12492  *      Our caller donates an object reference.
12493  */
12494
12495 kern_return_t
12496 vm_map_copyin_object(
12497         vm_object_t             object,
12498         vm_object_offset_t      offset, /* offset of region in object */
12499         vm_object_size_t        size,   /* size of region in object */
12500         vm_map_copy_t   *copy_result)   /* OUT */
12501 {
12502         vm_map_copy_t   copy;           /* Resulting copy */
12503
12504         /*
12505          *      We drop the object into a special copy object
12506          *      that contains the object directly.
12507          */
12508
12509         copy = vm_map_copy_allocate();
12510         copy->type = VM_MAP_COPY_OBJECT;
12511         copy->cpy_object = object;
12512         copy->offset = offset;
12513         copy->size = size;
12514
12515         *copy_result = copy;
12516         return KERN_SUCCESS;
12517 }
12518
12519 static void
12520 vm_map_fork_share(
12521         vm_map_t        old_map,
12522         vm_map_entry_t  old_entry,
12523         vm_map_t        new_map)
12524 {
12525         vm_object_t     object;
12526         vm_map_entry_t  new_entry;
12527
12528         /*
12529          *      New sharing code.  New map entry
12530          *      references original object.  Internal
12531          *      objects use asynchronous copy algorithm for
12532          *      future copies.  First make sure we have
12533          *      the right object.  If we need a shadow,
12534          *      or someone else already has one, then
12535          *      make a new shadow and share it.
12536          */
12537
12538         object = VME_OBJECT(old_entry);
12539         if (old_entry->is_sub_map) {
12540                 assert(old_entry->wired_count == 0);
12541 #ifndef NO_NESTED_PMAP
12542                 if (old_entry->use_pmap) {
12543                         kern_return_t   result;
12544
12545                         result = pmap_nest(new_map->pmap,
12546                             (VME_SUBMAP(old_entry))->pmap,
12547                             (addr64_t)old_entry->vme_start,
12548                             (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12549                         if (result) {
12550                                 panic("vm_map_fork_share: pmap_nest failed!");
12551                         }
12552                 }
12553 #endif  /* NO_NESTED_PMAP */
12554         } else if (object == VM_OBJECT_NULL) {
12555                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12556                     old_entry->vme_start));
12557                 VME_OFFSET_SET(old_entry, 0);
12558                 VME_OBJECT_SET(old_entry, object);
12559                 old_entry->use_pmap = TRUE;
12560 //              assert(!old_entry->needs_copy);
12561         } else if (object->copy_strategy !=
12562             MEMORY_OBJECT_COPY_SYMMETRIC) {
12563                 /*
12564                  *      We are already using an asymmetric
12565                  *      copy, and therefore we already have
12566                  *      the right object.
12567                  */
12568
12569                 assert(!old_entry->needs_copy);
12570         } else if (old_entry->needs_copy ||       /* case 1 */
12571             object->shadowed ||                 /* case 2 */
12572             (!object->true_share &&             /* case 3 */
12573             !old_entry->is_shared &&
12574             (object->vo_size >
12575             (vm_map_size_t)(old_entry->vme_end -
12576             old_entry->vme_start)))) {
12577                 /*
12578                  *      We need to create a shadow.
12579                  *      There are three cases here.
12580                  *      In the first case, we need to
12581                  *      complete a deferred symmetrical
12582                  *      copy that we participated in.
12583                  *      In the second and third cases,
12584                  *      we need to create the shadow so
12585                  *      that changes that we make to the
12586                  *      object do not interfere with
12587                  *      any symmetrical copies which
12588                  *      have occured (case 2) or which
12589                  *      might occur (case 3).
12590                  *
12591                  *      The first case is when we had
12592                  *      deferred shadow object creation
12593                  *      via the entry->needs_copy mechanism.
12594                  *      This mechanism only works when
12595                  *      only one entry points to the source
12596                  *      object, and we are about to create
12597                  *      a second entry pointing to the
12598                  *      same object. The problem is that
12599                  *      there is no way of mapping from
12600                  *      an object to the entries pointing
12601                  *      to it. (Deferred shadow creation
12602                  *      works with one entry because occurs
12603                  *      at fault time, and we walk from the
12604                  *      entry to the object when handling
12605                  *      the fault.)
12606                  *
12607                  *      The second case is when the object
12608                  *      to be shared has already been copied
12609                  *      with a symmetric copy, but we point
12610                  *      directly to the object without
12611                  *      needs_copy set in our entry. (This
12612                  *      can happen because different ranges
12613                  *      of an object can be pointed to by
12614                  *      different entries. In particular,
12615                  *      a single entry pointing to an object
12616                  *      can be split by a call to vm_inherit,
12617                  *      which, combined with task_create, can
12618                  *      result in the different entries
12619                  *      having different needs_copy values.)
12620                  *      The shadowed flag in the object allows
12621                  *      us to detect this case. The problem
12622                  *      with this case is that if this object
12623                  *      has or will have shadows, then we
12624                  *      must not perform an asymmetric copy
12625                  *      of this object, since such a copy
12626                  *      allows the object to be changed, which
12627                  *      will break the previous symmetrical
12628                  *      copies (which rely upon the object
12629                  *      not changing). In a sense, the shadowed
12630                  *      flag says "don't change this object".
12631                  *      We fix this by creating a shadow
12632                  *      object for this object, and sharing
12633                  *      that. This works because we are free
12634                  *      to change the shadow object (and thus
12635                  *      to use an asymmetric copy strategy);
12636                  *      this is also semantically correct,
12637                  *      since this object is temporary, and
12638                  *      therefore a copy of the object is
12639                  *      as good as the object itself. (This
12640                  *      is not true for permanent objects,
12641                  *      since the pager needs to see changes,
12642                  *      which won't happen if the changes
12643                  *      are made to a copy.)
12644                  *
12645                  *      The third case is when the object
12646                  *      to be shared has parts sticking
12647                  *      outside of the entry we're working
12648                  *      with, and thus may in the future
12649                  *      be subject to a symmetrical copy.
12650                  *      (This is a preemptive version of
12651                  *      case 2.)
12652                  */
12653                 VME_OBJECT_SHADOW(old_entry,
12654                     (vm_map_size_t) (old_entry->vme_end -
12655                     old_entry->vme_start));
12656
12657                 /*
12658                  *      If we're making a shadow for other than
12659                  *      copy on write reasons, then we have
12660                  *      to remove write permission.
12661                  */
12662
12663                 if (!old_entry->needs_copy &&
12664                     (old_entry->protection & VM_PROT_WRITE)) {
12665                         vm_prot_t prot;
12666
12667                         assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12668
12669                         prot = old_entry->protection & ~VM_PROT_WRITE;
12670
12671                         assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12672
12673                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12674                                 prot |= VM_PROT_EXECUTE;
12675                         }
12676
12677
12678                         if (old_map->mapped_in_other_pmaps) {
12679                                 vm_object_pmap_protect(
12680                                         VME_OBJECT(old_entry),
12681                                         VME_OFFSET(old_entry),
12682                                         (old_entry->vme_end -
12683                                         old_entry->vme_start),
12684                                         PMAP_NULL,
12685                                         PAGE_SIZE,
12686                                         old_entry->vme_start,
12687                                         prot);
12688                         } else {
12689                                 pmap_protect(old_map->pmap,
12690                                     old_entry->vme_start,
12691                                     old_entry->vme_end,
12692                                     prot);
12693                         }
12694                 }
12695
12696                 old_entry->needs_copy = FALSE;
12697                 object = VME_OBJECT(old_entry);
12698         }
12699
12700
12701         /*
12702          *      If object was using a symmetric copy strategy,
12703          *      change its copy strategy to the default
12704          *      asymmetric copy strategy, which is copy_delay
12705          *      in the non-norma case and copy_call in the
12706          *      norma case. Bump the reference count for the
12707          *      new entry.
12708          */
12709
12710         if (old_entry->is_sub_map) {
12711                 vm_map_lock(VME_SUBMAP(old_entry));
12712                 vm_map_reference(VME_SUBMAP(old_entry));
12713                 vm_map_unlock(VME_SUBMAP(old_entry));
12714         } else {
12715                 vm_object_lock(object);
12716                 vm_object_reference_locked(object);
12717                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12718                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12719                 }
12720                 vm_object_unlock(object);
12721         }
12722
12723         /*
12724          *      Clone the entry, using object ref from above.
12725          *      Mark both entries as shared.
12726          */
12727
12728         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12729                                                           * map or descendants */
12730         vm_map_entry_copy(old_map, new_entry, old_entry);
12731         old_entry->is_shared = TRUE;
12732         new_entry->is_shared = TRUE;
12733
12734         /*
12735          * We're dealing with a shared mapping, so the resulting mapping
12736          * should inherit some of the original mapping's accounting settings.
12737          * "iokit_acct" should have been cleared in vm_map_entry_copy().
12738          * "use_pmap" should stay the same as before (if it hasn't been reset
12739          * to TRUE when we cleared "iokit_acct").
12740          */
12741         assert(!new_entry->iokit_acct);
12742
12743         /*
12744          *      If old entry's inheritence is VM_INHERIT_NONE,
12745          *      the new entry is for corpse fork, remove the
12746          *      write permission from the new entry.
12747          */
12748         if (old_entry->inheritance == VM_INHERIT_NONE) {
12749                 new_entry->protection &= ~VM_PROT_WRITE;
12750                 new_entry->max_protection &= ~VM_PROT_WRITE;
12751         }
12752
12753         /*
12754          *      Insert the entry into the new map -- we
12755          *      know we're inserting at the end of the new
12756          *      map.
12757          */
12758
12759         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12760             VM_MAP_KERNEL_FLAGS_NONE);
12761
12762         /*
12763          *      Update the physical map
12764          */
12765
12766         if (old_entry->is_sub_map) {
12767                 /* Bill Angell pmap support goes here */
12768         } else {
12769                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12770                     old_entry->vme_end - old_entry->vme_start,
12771                     old_entry->vme_start);
12772         }
12773 }
12774
12775 static boolean_t
12776 vm_map_fork_copy(
12777         vm_map_t        old_map,
12778         vm_map_entry_t  *old_entry_p,
12779         vm_map_t        new_map,
12780         int             vm_map_copyin_flags)
12781 {
12782         vm_map_entry_t old_entry = *old_entry_p;
12783         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12784         vm_map_offset_t start = old_entry->vme_start;
12785         vm_map_copy_t copy;
12786         vm_map_entry_t last = vm_map_last_entry(new_map);
12787
12788         vm_map_unlock(old_map);
12789         /*
12790          *      Use maxprot version of copyin because we
12791          *      care about whether this memory can ever
12792          *      be accessed, not just whether it's accessible
12793          *      right now.
12794          */
12795         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12796         if (vm_map_copyin_internal(old_map, start, entry_size,
12797             vm_map_copyin_flags, &copy)
12798             != KERN_SUCCESS) {
12799                 /*
12800                  *      The map might have changed while it
12801                  *      was unlocked, check it again.  Skip
12802                  *      any blank space or permanently
12803                  *      unreadable region.
12804                  */
12805                 vm_map_lock(old_map);
12806                 if (!vm_map_lookup_entry(old_map, start, &last) ||
12807                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12808                         last = last->vme_next;
12809                 }
12810                 *old_entry_p = last;
12811
12812                 /*
12813                  * XXX  For some error returns, want to
12814                  * XXX  skip to the next element.  Note
12815                  *      that INVALID_ADDRESS and
12816                  *      PROTECTION_FAILURE are handled above.
12817                  */
12818
12819                 return FALSE;
12820         }
12821
12822         /*
12823          * Assert that the vm_map_copy is coming from the right
12824          * zone and hasn't been forged
12825          */
12826         vm_map_copy_require(copy);
12827
12828         /*
12829          *      Insert the copy into the new map
12830          */
12831         vm_map_copy_insert(new_map, last, copy);
12832
12833         /*
12834          *      Pick up the traversal at the end of
12835          *      the copied region.
12836          */
12837
12838         vm_map_lock(old_map);
12839         start += entry_size;
12840         if (!vm_map_lookup_entry(old_map, start, &last)) {
12841                 last = last->vme_next;
12842         } else {
12843                 if (last->vme_start == start) {
12844                         /*
12845                          * No need to clip here and we don't
12846                          * want to cause any unnecessary
12847                          * unnesting...
12848                          */
12849                 } else {
12850                         vm_map_clip_start(old_map, last, start);
12851                 }
12852         }
12853         *old_entry_p = last;
12854
12855         return TRUE;
12856 }
12857
12858 /*
12859  *      vm_map_fork:
12860  *
12861  *      Create and return a new map based on the old
12862  *      map, according to the inheritance values on the
12863  *      regions in that map and the options.
12864  *
12865  *      The source map must not be locked.
12866  */
12867 vm_map_t
12868 vm_map_fork(
12869         ledger_t        ledger,
12870         vm_map_t        old_map,
12871         int             options)
12872 {
12873         pmap_t          new_pmap;
12874         vm_map_t        new_map;
12875         vm_map_entry_t  old_entry;
12876         vm_map_size_t   new_size = 0, entry_size;
12877         vm_map_entry_t  new_entry;
12878         boolean_t       src_needs_copy;
12879         boolean_t       new_entry_needs_copy;
12880         boolean_t       pmap_is64bit;
12881         int             vm_map_copyin_flags;
12882         vm_inherit_t    old_entry_inheritance;
12883         int             map_create_options;
12884         kern_return_t   footprint_collect_kr;
12885
12886         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12887             VM_MAP_FORK_PRESERVE_PURGEABLE |
12888             VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12889                 /* unsupported option */
12890                 return VM_MAP_NULL;
12891         }
12892
12893         pmap_is64bit =
12894 #if defined(__i386__) || defined(__x86_64__)
12895             old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12896 #elif defined(__arm64__)
12897             old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12898 #elif defined(__arm__)
12899             FALSE;
12900 #else
12901 #error Unknown architecture.
12902 #endif
12903
12904         unsigned int pmap_flags = 0;
12905         pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12906 #if defined(HAS_APPLE_PAC)
12907         pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12908 #endif
12909 #if PMAP_CREATE_FORCE_4K_PAGES
12910         if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12911             PAGE_SIZE != FOURK_PAGE_SIZE) {
12912                 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12913         }
12914 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12915         new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12916
12917         vm_map_reference_swap(old_map);
12918         vm_map_lock(old_map);
12919
12920         map_create_options = 0;
12921         if (old_map->hdr.entries_pageable) {
12922                 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12923         }
12924         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12925                 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12926                 footprint_collect_kr = KERN_SUCCESS;
12927         }
12928         new_map = vm_map_create_options(new_pmap,
12929             old_map->min_offset,
12930             old_map->max_offset,
12931             map_create_options);
12932         /* inherit cs_enforcement */
12933         vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12934         vm_map_lock(new_map);
12935         vm_commit_pagezero_status(new_map);
12936         /* inherit the parent map's page size */
12937         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12938         for (
12939                 old_entry = vm_map_first_entry(old_map);
12940                 old_entry != vm_map_to_entry(old_map);
12941                 ) {
12942                 entry_size = old_entry->vme_end - old_entry->vme_start;
12943
12944                 old_entry_inheritance = old_entry->inheritance;
12945                 /*
12946                  * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12947                  * share VM_INHERIT_NONE entries that are not backed by a
12948                  * device pager.
12949                  */
12950                 if (old_entry_inheritance == VM_INHERIT_NONE &&
12951                     (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12952                     (old_entry->protection & VM_PROT_READ) &&
12953                     !(!old_entry->is_sub_map &&
12954                     VME_OBJECT(old_entry) != NULL &&
12955                     VME_OBJECT(old_entry)->pager != NULL &&
12956                     is_device_pager_ops(
12957                             VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12958                         old_entry_inheritance = VM_INHERIT_SHARE;
12959                 }
12960
12961                 if (old_entry_inheritance != VM_INHERIT_NONE &&
12962                     (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12963                     footprint_collect_kr == KERN_SUCCESS) {
12964                         /*
12965                          * The corpse won't have old_map->pmap to query
12966                          * footprint information, so collect that data now
12967                          * and store it in new_map->vmmap_corpse_footprint
12968                          * for later autopsy.
12969                          */
12970                         footprint_collect_kr =
12971                             vm_map_corpse_footprint_collect(old_map,
12972                             old_entry,
12973                             new_map);
12974                 }
12975
12976                 switch (old_entry_inheritance) {
12977                 case VM_INHERIT_NONE:
12978                         break;
12979
12980                 case VM_INHERIT_SHARE:
12981                         vm_map_fork_share(old_map, old_entry, new_map);
12982                         new_size += entry_size;
12983                         break;
12984
12985                 case VM_INHERIT_COPY:
12986
12987                         /*
12988                          *      Inline the copy_quickly case;
12989                          *      upon failure, fall back on call
12990                          *      to vm_map_fork_copy.
12991                          */
12992
12993                         if (old_entry->is_sub_map) {
12994                                 break;
12995                         }
12996                         if ((old_entry->wired_count != 0) ||
12997                             ((VME_OBJECT(old_entry) != NULL) &&
12998                             (VME_OBJECT(old_entry)->true_share))) {
12999                                 goto slow_vm_map_fork_copy;
13000                         }
13001
13002                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
13003                         vm_map_entry_copy(old_map, new_entry, old_entry);
13004
13005                         if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13006                                 new_map->jit_entry_exists = TRUE;
13007                         }
13008
13009                         if (new_entry->is_sub_map) {
13010                                 /* clear address space specifics */
13011                                 new_entry->use_pmap = FALSE;
13012                         } else {
13013                                 /*
13014                                  * We're dealing with a copy-on-write operation,
13015                                  * so the resulting mapping should not inherit
13016                                  * the original mapping's accounting settings.
13017                                  * "iokit_acct" should have been cleared in
13018                                  * vm_map_entry_copy().
13019                                  * "use_pmap" should be reset to its default
13020                                  * (TRUE) so that the new mapping gets
13021                                  * accounted for in the task's memory footprint.
13022                                  */
13023                                 assert(!new_entry->iokit_acct);
13024                                 new_entry->use_pmap = TRUE;
13025                         }
13026
13027                         if (!vm_object_copy_quickly(
13028                                     VME_OBJECT_PTR(new_entry),
13029                                     VME_OFFSET(old_entry),
13030                                     (old_entry->vme_end -
13031                                     old_entry->vme_start),
13032                                     &src_needs_copy,
13033                                     &new_entry_needs_copy)) {
13034                                 vm_map_entry_dispose(new_map, new_entry);
13035                                 goto slow_vm_map_fork_copy;
13036                         }
13037
13038                         /*
13039                          *      Handle copy-on-write obligations
13040                          */
13041
13042                         if (src_needs_copy && !old_entry->needs_copy) {
13043                                 vm_prot_t prot;
13044
13045                                 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13046
13047                                 prot = old_entry->protection & ~VM_PROT_WRITE;
13048
13049                                 if (override_nx(old_map, VME_ALIAS(old_entry))
13050                                     && prot) {
13051                                         prot |= VM_PROT_EXECUTE;
13052                                 }
13053
13054                                 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13055
13056                                 vm_object_pmap_protect(
13057                                         VME_OBJECT(old_entry),
13058                                         VME_OFFSET(old_entry),
13059                                         (old_entry->vme_end -
13060                                         old_entry->vme_start),
13061                                         ((old_entry->is_shared
13062                                         || old_map->mapped_in_other_pmaps)
13063                                         ? PMAP_NULL :
13064                                         old_map->pmap),
13065                                         VM_MAP_PAGE_SIZE(old_map),
13066                                         old_entry->vme_start,
13067                                         prot);
13068
13069                                 assert(old_entry->wired_count == 0);
13070                                 old_entry->needs_copy = TRUE;
13071                         }
13072                         new_entry->needs_copy = new_entry_needs_copy;
13073
13074                         /*
13075                          *      Insert the entry at the end
13076                          *      of the map.
13077                          */
13078
13079                         vm_map_store_entry_link(new_map,
13080                             vm_map_last_entry(new_map),
13081                             new_entry,
13082                             VM_MAP_KERNEL_FLAGS_NONE);
13083                         new_size += entry_size;
13084                         break;
13085
13086 slow_vm_map_fork_copy:
13087                         vm_map_copyin_flags = 0;
13088                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13089                                 vm_map_copyin_flags |=
13090                                     VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13091                         }
13092                         if (vm_map_fork_copy(old_map,
13093                             &old_entry,
13094                             new_map,
13095                             vm_map_copyin_flags)) {
13096                                 new_size += entry_size;
13097                         }
13098                         continue;
13099                 }
13100                 old_entry = old_entry->vme_next;
13101         }
13102
13103 #if defined(__arm64__)
13104         pmap_insert_sharedpage(new_map->pmap);
13105 #endif /* __arm64__ */
13106
13107         new_map->size = new_size;
13108
13109         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13110                 vm_map_corpse_footprint_collect_done(new_map);
13111         }
13112
13113         /* Propagate JIT entitlement for the pmap layer. */
13114         if (pmap_get_jit_entitled(old_map->pmap)) {
13115                 /* Tell the pmap that it supports JIT. */
13116                 pmap_set_jit_entitled(new_map->pmap);
13117         }
13118
13119         vm_map_unlock(new_map);
13120         vm_map_unlock(old_map);
13121         vm_map_deallocate(old_map);
13122
13123         return new_map;
13124 }
13125
13126 /*
13127  * vm_map_exec:
13128  *
13129  *      Setup the "new_map" with the proper execution environment according
13130  *      to the type of executable (platform, 64bit, chroot environment).
13131  *      Map the comm page and shared region, etc...
13132  */
13133 kern_return_t
13134 vm_map_exec(
13135         vm_map_t        new_map,
13136         task_t          task,
13137         boolean_t       is64bit,
13138         void            *fsroot,
13139         cpu_type_t      cpu,
13140         cpu_subtype_t   cpu_subtype,
13141         boolean_t       reslide)
13142 {
13143         SHARED_REGION_TRACE_DEBUG(
13144                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13145                 (void *)VM_KERNEL_ADDRPERM(current_task()),
13146                 (void *)VM_KERNEL_ADDRPERM(new_map),
13147                 (void *)VM_KERNEL_ADDRPERM(task),
13148                 (void *)VM_KERNEL_ADDRPERM(fsroot),
13149                 cpu,
13150                 cpu_subtype));
13151         (void) vm_commpage_enter(new_map, task, is64bit);
13152
13153         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide);
13154
13155         SHARED_REGION_TRACE_DEBUG(
13156                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13157                 (void *)VM_KERNEL_ADDRPERM(current_task()),
13158                 (void *)VM_KERNEL_ADDRPERM(new_map),
13159                 (void *)VM_KERNEL_ADDRPERM(task),
13160                 (void *)VM_KERNEL_ADDRPERM(fsroot),
13161                 cpu,
13162                 cpu_subtype));
13163
13164         /*
13165          * Some devices have region(s) of memory that shouldn't get allocated by
13166          * user processes. The following code creates dummy vm_map_entry_t's for each
13167          * of the regions that needs to be reserved to prevent any allocations in
13168          * those regions.
13169          */
13170         kern_return_t kr = KERN_FAILURE;
13171         vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13172         vmk_flags.vmkf_permanent = TRUE;
13173         vmk_flags.vmkf_beyond_max = TRUE;
13174
13175         struct vm_reserved_region *regions = NULL;
13176         size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
13177         assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13178
13179         for (size_t i = 0; i < num_regions; ++i) {
13180                 kr = vm_map_enter(
13181                         new_map,
13182                         &regions[i].vmrr_addr,
13183                         regions[i].vmrr_size,
13184                         (vm_map_offset_t)0,
13185                         VM_FLAGS_FIXED,
13186                         vmk_flags,
13187                         VM_KERN_MEMORY_NONE,
13188                         VM_OBJECT_NULL,
13189                         (vm_object_offset_t)0,
13190                         FALSE,
13191                         VM_PROT_NONE,
13192                         VM_PROT_NONE,
13193                         VM_INHERIT_NONE);
13194
13195                 if (kr != KERN_SUCCESS) {
13196                         panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13197                 }
13198         }
13199
13200         new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13201
13202         return KERN_SUCCESS;
13203 }
13204
13205 /*
13206  *      vm_map_lookup_locked:
13207  *
13208  *      Finds the VM object, offset, and
13209  *      protection for a given virtual address in the
13210  *      specified map, assuming a page fault of the
13211  *      type specified.
13212  *
13213  *      Returns the (object, offset, protection) for
13214  *      this address, whether it is wired down, and whether
13215  *      this map has the only reference to the data in question.
13216  *      In order to later verify this lookup, a "version"
13217  *      is returned.
13218  *      If contended != NULL, *contended will be set to
13219  *      true iff the thread had to spin or block to acquire
13220  *      an exclusive lock.
13221  *
13222  *      The map MUST be locked by the caller and WILL be
13223  *      locked on exit.  In order to guarantee the
13224  *      existence of the returned object, it is returned
13225  *      locked.
13226  *
13227  *      If a lookup is requested with "write protection"
13228  *      specified, the map may be changed to perform virtual
13229  *      copying operations, although the data referenced will
13230  *      remain the same.
13231  */
13232 kern_return_t
13233 vm_map_lookup_locked(
13234         vm_map_t                *var_map,       /* IN/OUT */
13235         vm_map_offset_t         vaddr,
13236         vm_prot_t               fault_type,
13237         int                     object_lock_type,
13238         vm_map_version_t        *out_version,   /* OUT */
13239         vm_object_t             *object,        /* OUT */
13240         vm_object_offset_t      *offset,        /* OUT */
13241         vm_prot_t               *out_prot,      /* OUT */
13242         boolean_t               *wired,         /* OUT */
13243         vm_object_fault_info_t  fault_info,     /* OUT */
13244         vm_map_t                *real_map,      /* OUT */
13245         bool                    *contended)     /* OUT */
13246 {
13247         vm_map_entry_t                  entry;
13248         vm_map_t                        map = *var_map;
13249         vm_map_t                        old_map = *var_map;
13250         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
13251         vm_map_offset_t                 cow_parent_vaddr = 0;
13252         vm_map_offset_t                 old_start = 0;
13253         vm_map_offset_t                 old_end = 0;
13254         vm_prot_t                       prot;
13255         boolean_t                       mask_protections;
13256         boolean_t                       force_copy;
13257         boolean_t                       no_force_copy_if_executable;
13258         vm_prot_t                       original_fault_type;
13259         vm_map_size_t                   fault_page_mask;
13260
13261         /*
13262          * VM_PROT_MASK means that the caller wants us to use "fault_type"
13263          * as a mask against the mapping's actual protections, not as an
13264          * absolute value.
13265          */
13266         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13267         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13268         no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13269         fault_type &= VM_PROT_ALL;
13270         original_fault_type = fault_type;
13271         if (contended) {
13272                 *contended = false;
13273         }
13274
13275         *real_map = map;
13276
13277         fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13278         vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13279
13280 RetryLookup:
13281         fault_type = original_fault_type;
13282
13283         /*
13284          *      If the map has an interesting hint, try it before calling
13285          *      full blown lookup routine.
13286          */
13287         entry = map->hint;
13288
13289         if ((entry == vm_map_to_entry(map)) ||
13290             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13291                 vm_map_entry_t  tmp_entry;
13292
13293                 /*
13294                  *      Entry was either not a valid hint, or the vaddr
13295                  *      was not contained in the entry, so do a full lookup.
13296                  */
13297                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13298                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13299                                 vm_map_unlock(cow_sub_map_parent);
13300                         }
13301                         if ((*real_map != map)
13302                             && (*real_map != cow_sub_map_parent)) {
13303                                 vm_map_unlock(*real_map);
13304                         }
13305                         return KERN_INVALID_ADDRESS;
13306                 }
13307
13308                 entry = tmp_entry;
13309         }
13310         if (map == old_map) {
13311                 old_start = entry->vme_start;
13312                 old_end = entry->vme_end;
13313         }
13314
13315         /*
13316          *      Handle submaps.  Drop lock on upper map, submap is
13317          *      returned locked.
13318          */
13319
13320 submap_recurse:
13321         if (entry->is_sub_map) {
13322                 vm_map_offset_t         local_vaddr;
13323                 vm_map_offset_t         end_delta;
13324                 vm_map_offset_t         start_delta;
13325                 vm_map_entry_t          submap_entry, saved_submap_entry;
13326                 vm_object_offset_t      submap_entry_offset;
13327                 vm_object_size_t        submap_entry_size;
13328                 vm_prot_t               subentry_protection;
13329                 vm_prot_t               subentry_max_protection;
13330                 boolean_t               subentry_no_copy_on_read;
13331                 boolean_t               mapped_needs_copy = FALSE;
13332                 vm_map_version_t        version;
13333
13334                 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13335                     "map %p (%d) entry %p submap %p (%d)\n",
13336                     map, VM_MAP_PAGE_SHIFT(map), entry,
13337                     VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13338
13339                 local_vaddr = vaddr;
13340
13341                 if ((entry->use_pmap &&
13342                     !((fault_type & VM_PROT_WRITE) ||
13343                     force_copy))) {
13344                         /* if real_map equals map we unlock below */
13345                         if ((*real_map != map) &&
13346                             (*real_map != cow_sub_map_parent)) {
13347                                 vm_map_unlock(*real_map);
13348                         }
13349                         *real_map = VME_SUBMAP(entry);
13350                 }
13351
13352                 if (entry->needs_copy &&
13353                     ((fault_type & VM_PROT_WRITE) ||
13354                     force_copy)) {
13355                         if (!mapped_needs_copy) {
13356                                 if (vm_map_lock_read_to_write(map)) {
13357                                         vm_map_lock_read(map);
13358                                         *real_map = map;
13359                                         goto RetryLookup;
13360                                 }
13361                                 vm_map_lock_read(VME_SUBMAP(entry));
13362                                 *var_map = VME_SUBMAP(entry);
13363                                 cow_sub_map_parent = map;
13364                                 /* reset base to map before cow object */
13365                                 /* this is the map which will accept   */
13366                                 /* the new cow object */
13367                                 old_start = entry->vme_start;
13368                                 old_end = entry->vme_end;
13369                                 cow_parent_vaddr = vaddr;
13370                                 mapped_needs_copy = TRUE;
13371                         } else {
13372                                 vm_map_lock_read(VME_SUBMAP(entry));
13373                                 *var_map = VME_SUBMAP(entry);
13374                                 if ((cow_sub_map_parent != map) &&
13375                                     (*real_map != map)) {
13376                                         vm_map_unlock(map);
13377                                 }
13378                         }
13379                 } else {
13380                         vm_map_lock_read(VME_SUBMAP(entry));
13381                         *var_map = VME_SUBMAP(entry);
13382                         /* leave map locked if it is a target */
13383                         /* cow sub_map above otherwise, just  */
13384                         /* follow the maps down to the object */
13385                         /* here we unlock knowing we are not  */
13386                         /* revisiting the map.  */
13387                         if ((*real_map != map) && (map != cow_sub_map_parent)) {
13388                                 vm_map_unlock_read(map);
13389                         }
13390                 }
13391
13392                 map = *var_map;
13393
13394                 /* calculate the offset in the submap for vaddr */
13395                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13396                 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13397                     "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13398                     (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13399
13400 RetrySubMap:
13401                 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13402                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13403                                 vm_map_unlock(cow_sub_map_parent);
13404                         }
13405                         if ((*real_map != map)
13406                             && (*real_map != cow_sub_map_parent)) {
13407                                 vm_map_unlock(*real_map);
13408                         }
13409                         *real_map = map;
13410                         return KERN_INVALID_ADDRESS;
13411                 }
13412
13413                 /* find the attenuated shadow of the underlying object */
13414                 /* on our target map */
13415
13416                 /* in english the submap object may extend beyond the     */
13417                 /* region mapped by the entry or, may only fill a portion */
13418                 /* of it.  For our purposes, we only care if the object   */
13419                 /* doesn't fill.  In this case the area which will        */
13420                 /* ultimately be clipped in the top map will only need    */
13421                 /* to be as big as the portion of the underlying entry    */
13422                 /* which is mapped */
13423                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13424                     submap_entry->vme_start - VME_OFFSET(entry) : 0;
13425
13426                 end_delta =
13427                     (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13428                     submap_entry->vme_end ?
13429                     0 : (VME_OFFSET(entry) +
13430                     (old_end - old_start))
13431                     - submap_entry->vme_end;
13432
13433                 old_start += start_delta;
13434                 old_end -= end_delta;
13435
13436                 if (submap_entry->is_sub_map) {
13437                         entry = submap_entry;
13438                         vaddr = local_vaddr;
13439                         goto submap_recurse;
13440                 }
13441
13442                 if (((fault_type & VM_PROT_WRITE) ||
13443                     force_copy)
13444                     && cow_sub_map_parent) {
13445                         vm_object_t     sub_object, copy_object;
13446                         vm_object_offset_t copy_offset;
13447                         vm_map_offset_t local_start;
13448                         vm_map_offset_t local_end;
13449                         boolean_t       copied_slowly = FALSE;
13450                         vm_object_offset_t copied_slowly_phys_offset = 0;
13451                         kern_return_t   kr = KERN_SUCCESS;
13452
13453                         if (vm_map_lock_read_to_write(map)) {
13454                                 vm_map_lock_read(map);
13455                                 old_start -= start_delta;
13456                                 old_end += end_delta;
13457                                 goto RetrySubMap;
13458                         }
13459
13460
13461                         sub_object = VME_OBJECT(submap_entry);
13462                         if (sub_object == VM_OBJECT_NULL) {
13463                                 sub_object =
13464                                     vm_object_allocate(
13465                                         (vm_map_size_t)
13466                                         (submap_entry->vme_end -
13467                                         submap_entry->vme_start));
13468                                 VME_OBJECT_SET(submap_entry, sub_object);
13469                                 VME_OFFSET_SET(submap_entry, 0);
13470                                 assert(!submap_entry->is_sub_map);
13471                                 assert(submap_entry->use_pmap);
13472                         }
13473                         local_start =  local_vaddr -
13474                             (cow_parent_vaddr - old_start);
13475                         local_end = local_vaddr +
13476                             (old_end - cow_parent_vaddr);
13477                         vm_map_clip_start(map, submap_entry, local_start);
13478                         vm_map_clip_end(map, submap_entry, local_end);
13479                         if (submap_entry->is_sub_map) {
13480                                 /* unnesting was done when clipping */
13481                                 assert(!submap_entry->use_pmap);
13482                         }
13483
13484                         /* This is the COW case, lets connect */
13485                         /* an entry in our space to the underlying */
13486                         /* object in the submap, bypassing the  */
13487                         /* submap. */
13488
13489                         if (submap_entry->wired_count != 0 ||
13490                             (sub_object->copy_strategy !=
13491                             MEMORY_OBJECT_COPY_SYMMETRIC)) {
13492                                 if ((submap_entry->protection & VM_PROT_EXECUTE) &&
13493                                     no_force_copy_if_executable) {
13494 //                                      printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13495                                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13496                                                 vm_map_unlock(cow_sub_map_parent);
13497                                         }
13498                                         if ((*real_map != map)
13499                                             && (*real_map != cow_sub_map_parent)) {
13500                                                 vm_map_unlock(*real_map);
13501                                         }
13502                                         *real_map = map;
13503                                         vm_map_lock_write_to_read(map);
13504                                         kr = KERN_PROTECTION_FAILURE;
13505                                         DTRACE_VM4(submap_no_copy_executable,
13506                                             vm_map_t, map,
13507                                             vm_object_offset_t, submap_entry_offset,
13508                                             vm_object_size_t, submap_entry_size,
13509                                             int, kr);
13510                                         return kr;
13511                                 }
13512
13513                                 vm_object_reference(sub_object);
13514
13515                                 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13516                                     "submap_entry %p offset 0x%llx\n",
13517                                     submap_entry, VME_OFFSET(submap_entry));
13518                                 submap_entry_offset = VME_OFFSET(submap_entry);
13519                                 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13520
13521                                 DTRACE_VM6(submap_copy_slowly,
13522                                     vm_map_t, cow_sub_map_parent,
13523                                     vm_map_offset_t, vaddr,
13524                                     vm_map_t, map,
13525                                     vm_object_size_t, submap_entry_size,
13526                                     int, submap_entry->wired_count,
13527                                     int, sub_object->copy_strategy);
13528
13529                                 saved_submap_entry = submap_entry;
13530                                 version.main_timestamp = map->timestamp;
13531                                 vm_map_unlock(map); /* Increments timestamp by 1 */
13532                                 submap_entry = VM_MAP_ENTRY_NULL;
13533
13534                                 vm_object_lock(sub_object);
13535                                 kr = vm_object_copy_slowly(sub_object,
13536                                     submap_entry_offset,
13537                                     submap_entry_size,
13538                                     FALSE,
13539                                     &copy_object);
13540                                 copied_slowly = TRUE;
13541                                 /* 4k: account for extra offset in physical page */
13542                                 copied_slowly_phys_offset = submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13543                                 vm_object_deallocate(sub_object);
13544
13545                                 vm_map_lock(map);
13546
13547                                 if (kr != KERN_SUCCESS &&
13548                                     kr != KERN_MEMORY_RESTART_COPY) {
13549                                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13550                                                 vm_map_unlock(cow_sub_map_parent);
13551                                         }
13552                                         if ((*real_map != map)
13553                                             && (*real_map != cow_sub_map_parent)) {
13554                                                 vm_map_unlock(*real_map);
13555                                         }
13556                                         *real_map = map;
13557                                         vm_object_deallocate(copy_object);
13558                                         copy_object = VM_OBJECT_NULL;
13559                                         vm_map_lock_write_to_read(map);
13560                                         DTRACE_VM4(submap_copy_slowly,
13561                                             vm_object_t, sub_object,
13562                                             vm_object_offset_t, submap_entry_offset,
13563                                             vm_object_size_t, submap_entry_size,
13564                                             int, kr);
13565                                         return kr;
13566                                 }
13567
13568                                 if ((kr == KERN_SUCCESS) &&
13569                                     (version.main_timestamp + 1) == map->timestamp) {
13570                                         submap_entry = saved_submap_entry;
13571                                 } else {
13572                                         saved_submap_entry = NULL;
13573                                         old_start -= start_delta;
13574                                         old_end += end_delta;
13575                                         vm_object_deallocate(copy_object);
13576                                         copy_object = VM_OBJECT_NULL;
13577                                         vm_map_lock_write_to_read(map);
13578                                         goto RetrySubMap;
13579                                 }
13580                         } else {
13581                                 /* set up shadow object */
13582                                 copy_object = sub_object;
13583                                 vm_object_lock(sub_object);
13584                                 vm_object_reference_locked(sub_object);
13585                                 sub_object->shadowed = TRUE;
13586                                 vm_object_unlock(sub_object);
13587
13588                                 assert(submap_entry->wired_count == 0);
13589                                 submap_entry->needs_copy = TRUE;
13590
13591                                 prot = submap_entry->protection;
13592                                 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13593                                 prot = prot & ~VM_PROT_WRITE;
13594                                 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13595
13596                                 if (override_nx(old_map,
13597                                     VME_ALIAS(submap_entry))
13598                                     && prot) {
13599                                         prot |= VM_PROT_EXECUTE;
13600                                 }
13601
13602                                 vm_object_pmap_protect(
13603                                         sub_object,
13604                                         VME_OFFSET(submap_entry),
13605                                         submap_entry->vme_end -
13606                                         submap_entry->vme_start,
13607                                         (submap_entry->is_shared
13608                                         || map->mapped_in_other_pmaps) ?
13609                                         PMAP_NULL : map->pmap,
13610                                         VM_MAP_PAGE_SIZE(map),
13611                                         submap_entry->vme_start,
13612                                         prot);
13613                         }
13614
13615                         /*
13616                          * Adjust the fault offset to the submap entry.
13617                          */
13618                         copy_offset = (local_vaddr -
13619                             submap_entry->vme_start +
13620                             VME_OFFSET(submap_entry));
13621
13622                         /* This works diffently than the   */
13623                         /* normal submap case. We go back  */
13624                         /* to the parent of the cow map and*/
13625                         /* clip out the target portion of  */
13626                         /* the sub_map, substituting the   */
13627                         /* new copy object,                */
13628
13629                         subentry_protection = submap_entry->protection;
13630                         subentry_max_protection = submap_entry->max_protection;
13631                         subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13632                         vm_map_unlock(map);
13633                         submap_entry = NULL; /* not valid after map unlock */
13634
13635                         local_start = old_start;
13636                         local_end = old_end;
13637                         map = cow_sub_map_parent;
13638                         *var_map = cow_sub_map_parent;
13639                         vaddr = cow_parent_vaddr;
13640                         cow_sub_map_parent = NULL;
13641
13642                         if (!vm_map_lookup_entry(map,
13643                             vaddr, &entry)) {
13644                                 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13645                                         vm_map_unlock(cow_sub_map_parent);
13646                                 }
13647                                 if ((*real_map != map)
13648                                     && (*real_map != cow_sub_map_parent)) {
13649                                         vm_map_unlock(*real_map);
13650                                 }
13651                                 *real_map = map;
13652                                 vm_object_deallocate(
13653                                         copy_object);
13654                                 copy_object = VM_OBJECT_NULL;
13655                                 vm_map_lock_write_to_read(map);
13656                                 DTRACE_VM4(submap_lookup_post_unlock,
13657                                     uint64_t, (uint64_t)entry->vme_start,
13658                                     uint64_t, (uint64_t)entry->vme_end,
13659                                     vm_map_offset_t, vaddr,
13660                                     int, copied_slowly);
13661                                 return KERN_INVALID_ADDRESS;
13662                         }
13663
13664                         /* clip out the portion of space */
13665                         /* mapped by the sub map which   */
13666                         /* corresponds to the underlying */
13667                         /* object */
13668
13669                         /*
13670                          * Clip (and unnest) the smallest nested chunk
13671                          * possible around the faulting address...
13672                          */
13673                         local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13674                         local_end = local_start + pmap_shared_region_size_min(map->pmap);
13675                         /*
13676                          * ... but don't go beyond the "old_start" to "old_end"
13677                          * range, to avoid spanning over another VM region
13678                          * with a possibly different VM object and/or offset.
13679                          */
13680                         if (local_start < old_start) {
13681                                 local_start = old_start;
13682                         }
13683                         if (local_end > old_end) {
13684                                 local_end = old_end;
13685                         }
13686                         /*
13687                          * Adjust copy_offset to the start of the range.
13688                          */
13689                         copy_offset -= (vaddr - local_start);
13690
13691                         vm_map_clip_start(map, entry, local_start);
13692                         vm_map_clip_end(map, entry, local_end);
13693                         if (entry->is_sub_map) {
13694                                 /* unnesting was done when clipping */
13695                                 assert(!entry->use_pmap);
13696                         }
13697
13698                         /* substitute copy object for */
13699                         /* shared map entry           */
13700                         vm_map_deallocate(VME_SUBMAP(entry));
13701                         assert(!entry->iokit_acct);
13702                         entry->is_sub_map = FALSE;
13703                         entry->use_pmap = TRUE;
13704                         VME_OBJECT_SET(entry, copy_object);
13705
13706                         /* propagate the submap entry's protections */
13707                         if (entry->protection != VM_PROT_READ) {
13708                                 /*
13709                                  * Someone has already altered the top entry's
13710                                  * protections via vm_protect(VM_PROT_COPY).
13711                                  * Respect these new values and ignore the
13712                                  * submap entry's protections.
13713                                  */
13714                         } else {
13715                                 /*
13716                                  * Regular copy-on-write: propagate the submap
13717                                  * entry's protections to the top map entry.
13718                                  */
13719                                 entry->protection |= subentry_protection;
13720                         }
13721                         entry->max_protection |= subentry_max_protection;
13722                         /* propagate no_copy_on_read */
13723                         entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13724
13725                         if ((entry->protection & VM_PROT_WRITE) &&
13726                             (entry->protection & VM_PROT_EXECUTE) &&
13727 #if XNU_TARGET_OS_OSX
13728                             map->pmap != kernel_pmap &&
13729                             (vm_map_cs_enforcement(map)
13730 #if __arm64__
13731                             || !VM_MAP_IS_EXOTIC(map)
13732 #endif /* __arm64__ */
13733                             ) &&
13734 #endif /* XNU_TARGET_OS_OSX */
13735                             !(entry->used_for_jit) &&
13736                             VM_MAP_POLICY_WX_STRIP_X(map)) {
13737                                 DTRACE_VM3(cs_wx,
13738                                     uint64_t, (uint64_t)entry->vme_start,
13739                                     uint64_t, (uint64_t)entry->vme_end,
13740                                     vm_prot_t, entry->protection);
13741                                 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13742                                     proc_selfpid(),
13743                                     (current_task()->bsd_info
13744                                     ? proc_name_address(current_task()->bsd_info)
13745                                     : "?"),
13746                                     __FUNCTION__);
13747                                 entry->protection &= ~VM_PROT_EXECUTE;
13748                         }
13749
13750                         if (copied_slowly) {
13751                                 VME_OFFSET_SET(entry, local_start - old_start + copied_slowly_phys_offset);
13752                                 entry->needs_copy = FALSE;
13753                                 entry->is_shared = FALSE;
13754                         } else {
13755                                 VME_OFFSET_SET(entry, copy_offset);
13756                                 assert(entry->wired_count == 0);
13757                                 entry->needs_copy = TRUE;
13758                                 if (entry->inheritance == VM_INHERIT_SHARE) {
13759                                         entry->inheritance = VM_INHERIT_COPY;
13760                                 }
13761                                 if (map != old_map) {
13762                                         entry->is_shared = TRUE;
13763                                 }
13764                         }
13765                         if (entry->inheritance == VM_INHERIT_SHARE) {
13766                                 entry->inheritance = VM_INHERIT_COPY;
13767                         }
13768
13769                         vm_map_lock_write_to_read(map);
13770                 } else {
13771                         if ((cow_sub_map_parent)
13772                             && (cow_sub_map_parent != *real_map)
13773                             && (cow_sub_map_parent != map)) {
13774                                 vm_map_unlock(cow_sub_map_parent);
13775                         }
13776                         entry = submap_entry;
13777                         vaddr = local_vaddr;
13778                 }
13779         }
13780
13781         /*
13782          *      Check whether this task is allowed to have
13783          *      this page.
13784          */
13785
13786         prot = entry->protection;
13787
13788         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13789                 /*
13790                  * HACK -- if not a stack, then allow execution
13791                  */
13792                 prot |= VM_PROT_EXECUTE;
13793         }
13794
13795         if (mask_protections) {
13796                 fault_type &= prot;
13797                 if (fault_type == VM_PROT_NONE) {
13798                         goto protection_failure;
13799                 }
13800         }
13801         if (((fault_type & prot) != fault_type)
13802 #if __arm64__
13803             /* prefetch abort in execute-only page */
13804             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13805 #endif
13806             ) {
13807 protection_failure:
13808                 if (*real_map != map) {
13809                         vm_map_unlock(*real_map);
13810                 }
13811                 *real_map = map;
13812
13813                 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13814                         log_stack_execution_failure((addr64_t)vaddr, prot);
13815                 }
13816
13817                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13818                 return KERN_PROTECTION_FAILURE;
13819         }
13820
13821         /*
13822          *      If this page is not pageable, we have to get
13823          *      it for all possible accesses.
13824          */
13825
13826         *wired = (entry->wired_count != 0);
13827         if (*wired) {
13828                 fault_type = prot;
13829         }
13830
13831         /*
13832          *      If the entry was copy-on-write, we either ...
13833          */
13834
13835         if (entry->needs_copy) {
13836                 /*
13837                  *      If we want to write the page, we may as well
13838                  *      handle that now since we've got the map locked.
13839                  *
13840                  *      If we don't need to write the page, we just
13841                  *      demote the permissions allowed.
13842                  */
13843
13844                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13845                         /*
13846                          *      Make a new object, and place it in the
13847                          *      object chain.  Note that no new references
13848                          *      have appeared -- one just moved from the
13849                          *      map to the new object.
13850                          */
13851
13852                         if (vm_map_lock_read_to_write(map)) {
13853                                 vm_map_lock_read(map);
13854                                 goto RetryLookup;
13855                         }
13856
13857                         if (VME_OBJECT(entry)->shadowed == FALSE) {
13858                                 vm_object_lock(VME_OBJECT(entry));
13859                                 VME_OBJECT(entry)->shadowed = TRUE;
13860                                 vm_object_unlock(VME_OBJECT(entry));
13861                         }
13862                         VME_OBJECT_SHADOW(entry,
13863                             (vm_map_size_t) (entry->vme_end -
13864                             entry->vme_start));
13865                         entry->needs_copy = FALSE;
13866
13867                         vm_map_lock_write_to_read(map);
13868                 }
13869                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13870                         /*
13871                          *      We're attempting to read a copy-on-write
13872                          *      page -- don't allow writes.
13873                          */
13874
13875                         prot &= (~VM_PROT_WRITE);
13876                 }
13877         }
13878
13879         /*
13880          *      Create an object if necessary.
13881          */
13882         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13883                 if (vm_map_lock_read_to_write(map)) {
13884                         vm_map_lock_read(map);
13885                         goto RetryLookup;
13886                 }
13887
13888                 VME_OBJECT_SET(entry,
13889                     vm_object_allocate(
13890                             (vm_map_size_t)(entry->vme_end -
13891                             entry->vme_start)));
13892                 VME_OFFSET_SET(entry, 0);
13893                 assert(entry->use_pmap);
13894                 vm_map_lock_write_to_read(map);
13895         }
13896
13897         /*
13898          *      Return the object/offset from this entry.  If the entry
13899          *      was copy-on-write or empty, it has been fixed up.  Also
13900          *      return the protection.
13901          */
13902
13903         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13904         *object = VME_OBJECT(entry);
13905         *out_prot = prot;
13906         KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13907
13908         if (fault_info) {
13909                 fault_info->interruptible = THREAD_UNINT; /* for now... */
13910                 /* ... the caller will change "interruptible" if needed */
13911                 fault_info->cluster_size = 0;
13912                 fault_info->user_tag = VME_ALIAS(entry);
13913                 fault_info->pmap_options = 0;
13914                 if (entry->iokit_acct ||
13915                     (!entry->is_sub_map && !entry->use_pmap)) {
13916                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13917                 }
13918                 fault_info->behavior = entry->behavior;
13919                 fault_info->lo_offset = VME_OFFSET(entry);
13920                 fault_info->hi_offset =
13921                     (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13922                 fault_info->no_cache  = entry->no_cache;
13923                 fault_info->stealth = FALSE;
13924                 fault_info->io_sync = FALSE;
13925                 if (entry->used_for_jit ||
13926                     entry->vme_resilient_codesign) {
13927                         fault_info->cs_bypass = TRUE;
13928                 } else {
13929                         fault_info->cs_bypass = FALSE;
13930                 }
13931                 fault_info->pmap_cs_associated = FALSE;
13932 #if CONFIG_PMAP_CS
13933                 if (entry->pmap_cs_associated) {
13934                         /*
13935                          * The pmap layer will validate this page
13936                          * before allowing it to be executed from.
13937                          */
13938                         fault_info->pmap_cs_associated = TRUE;
13939                 }
13940 #endif /* CONFIG_PMAP_CS */
13941                 fault_info->mark_zf_absent = FALSE;
13942                 fault_info->batch_pmap_op = FALSE;
13943                 fault_info->resilient_media = entry->vme_resilient_media;
13944                 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13945                 if (entry->translated_allow_execute) {
13946                         fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
13947                 }
13948         }
13949
13950         /*
13951          *      Lock the object to prevent it from disappearing
13952          */
13953         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13954                 if (contended == NULL) {
13955                         vm_object_lock(*object);
13956                 } else {
13957                         *contended = vm_object_lock_check_contended(*object);
13958                 }
13959         } else {
13960                 vm_object_lock_shared(*object);
13961         }
13962
13963         /*
13964          *      Save the version number
13965          */
13966
13967         out_version->main_timestamp = map->timestamp;
13968
13969         return KERN_SUCCESS;
13970 }
13971
13972
13973 /*
13974  *      vm_map_verify:
13975  *
13976  *      Verifies that the map in question has not changed
13977  *      since the given version. The map has to be locked
13978  *      ("shared" mode is fine) before calling this function
13979  *      and it will be returned locked too.
13980  */
13981 boolean_t
13982 vm_map_verify(
13983         vm_map_t                map,
13984         vm_map_version_t        *version)       /* REF */
13985 {
13986         boolean_t       result;
13987
13988         vm_map_lock_assert_held(map);
13989         result = (map->timestamp == version->main_timestamp);
13990
13991         return result;
13992 }
13993
13994 /*
13995  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13996  *      Goes away after regular vm_region_recurse function migrates to
13997  *      64 bits
13998  *      vm_region_recurse: A form of vm_region which follows the
13999  *      submaps in a target map
14000  *
14001  */
14002
14003 kern_return_t
14004 vm_map_region_recurse_64(
14005         vm_map_t                 map,
14006         vm_map_offset_t *address,               /* IN/OUT */
14007         vm_map_size_t           *size,                  /* OUT */
14008         natural_t               *nesting_depth, /* IN/OUT */
14009         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
14010         mach_msg_type_number_t  *count) /* IN/OUT */
14011 {
14012         mach_msg_type_number_t  original_count;
14013         vm_region_extended_info_data_t  extended;
14014         vm_map_entry_t                  tmp_entry;
14015         vm_map_offset_t                 user_address;
14016         unsigned int                    user_max_depth;
14017
14018         /*
14019          * "curr_entry" is the VM map entry preceding or including the
14020          * address we're looking for.
14021          * "curr_map" is the map or sub-map containing "curr_entry".
14022          * "curr_address" is the equivalent of the top map's "user_address"
14023          * in the current map.
14024          * "curr_offset" is the cumulated offset of "curr_map" in the
14025          * target task's address space.
14026          * "curr_depth" is the depth of "curr_map" in the chain of
14027          * sub-maps.
14028          *
14029          * "curr_max_below" and "curr_max_above" limit the range (around
14030          * "curr_address") we should take into account in the current (sub)map.
14031          * They limit the range to what's visible through the map entries
14032          * we've traversed from the top map to the current map.
14033          *
14034          */
14035         vm_map_entry_t                  curr_entry;
14036         vm_map_address_t                curr_address;
14037         vm_map_offset_t                 curr_offset;
14038         vm_map_t                        curr_map;
14039         unsigned int                    curr_depth;
14040         vm_map_offset_t                 curr_max_below, curr_max_above;
14041         vm_map_offset_t                 curr_skip;
14042
14043         /*
14044          * "next_" is the same as "curr_" but for the VM region immediately
14045          * after the address we're looking for.  We need to keep track of this
14046          * too because we want to return info about that region if the
14047          * address we're looking for is not mapped.
14048          */
14049         vm_map_entry_t                  next_entry;
14050         vm_map_offset_t                 next_offset;
14051         vm_map_offset_t                 next_address;
14052         vm_map_t                        next_map;
14053         unsigned int                    next_depth;
14054         vm_map_offset_t                 next_max_below, next_max_above;
14055         vm_map_offset_t                 next_skip;
14056
14057         boolean_t                       look_for_pages;
14058         vm_region_submap_short_info_64_t short_info;
14059         boolean_t                       do_region_footprint;
14060         int                             effective_page_size, effective_page_shift;
14061
14062         if (map == VM_MAP_NULL) {
14063                 /* no address space to work on */
14064                 return KERN_INVALID_ARGUMENT;
14065         }
14066
14067         effective_page_shift = vm_self_region_page_shift(map);
14068         effective_page_size = (1 << effective_page_shift);
14069
14070         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14071                 /*
14072                  * "info" structure is not big enough and
14073                  * would overflow
14074                  */
14075                 return KERN_INVALID_ARGUMENT;
14076         }
14077
14078         do_region_footprint = task_self_region_footprint();
14079         original_count = *count;
14080
14081         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14082                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14083                 look_for_pages = FALSE;
14084                 short_info = (vm_region_submap_short_info_64_t) submap_info;
14085                 submap_info = NULL;
14086         } else {
14087                 look_for_pages = TRUE;
14088                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14089                 short_info = NULL;
14090
14091                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14092                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14093                 }
14094                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14095                         *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14096                 }
14097         }
14098
14099         user_address = *address;
14100         user_max_depth = *nesting_depth;
14101
14102         if (not_in_kdp) {
14103                 vm_map_lock_read(map);
14104         }
14105
14106 recurse_again:
14107         curr_entry = NULL;
14108         curr_map = map;
14109         curr_address = user_address;
14110         curr_offset = 0;
14111         curr_skip = 0;
14112         curr_depth = 0;
14113         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14114         curr_max_below = curr_address;
14115
14116         next_entry = NULL;
14117         next_map = NULL;
14118         next_address = 0;
14119         next_offset = 0;
14120         next_skip = 0;
14121         next_depth = 0;
14122         next_max_above = (vm_map_offset_t) -1;
14123         next_max_below = (vm_map_offset_t) -1;
14124
14125         for (;;) {
14126                 if (vm_map_lookup_entry(curr_map,
14127                     curr_address,
14128                     &tmp_entry)) {
14129                         /* tmp_entry contains the address we're looking for */
14130                         curr_entry = tmp_entry;
14131                 } else {
14132                         vm_map_offset_t skip;
14133                         /*
14134                          * The address is not mapped.  "tmp_entry" is the
14135                          * map entry preceding the address.  We want the next
14136                          * one, if it exists.
14137                          */
14138                         curr_entry = tmp_entry->vme_next;
14139
14140                         if (curr_entry == vm_map_to_entry(curr_map) ||
14141                             (curr_entry->vme_start >=
14142                             curr_address + curr_max_above)) {
14143                                 /* no next entry at this level: stop looking */
14144                                 if (not_in_kdp) {
14145                                         vm_map_unlock_read(curr_map);
14146                                 }
14147                                 curr_entry = NULL;
14148                                 curr_map = NULL;
14149                                 curr_skip = 0;
14150                                 curr_offset = 0;
14151                                 curr_depth = 0;
14152                                 curr_max_above = 0;
14153                                 curr_max_below = 0;
14154                                 break;
14155                         }
14156
14157                         /* adjust current address and offset */
14158                         skip = curr_entry->vme_start - curr_address;
14159                         curr_address = curr_entry->vme_start;
14160                         curr_skip += skip;
14161                         curr_offset += skip;
14162                         curr_max_above -= skip;
14163                         curr_max_below = 0;
14164                 }
14165
14166                 /*
14167                  * Is the next entry at this level closer to the address (or
14168                  * deeper in the submap chain) than the one we had
14169                  * so far ?
14170                  */
14171                 tmp_entry = curr_entry->vme_next;
14172                 if (tmp_entry == vm_map_to_entry(curr_map)) {
14173                         /* no next entry at this level */
14174                 } else if (tmp_entry->vme_start >=
14175                     curr_address + curr_max_above) {
14176                         /*
14177                          * tmp_entry is beyond the scope of what we mapped of
14178                          * this submap in the upper level: ignore it.
14179                          */
14180                 } else if ((next_entry == NULL) ||
14181                     (tmp_entry->vme_start + curr_offset <=
14182                     next_entry->vme_start + next_offset)) {
14183                         /*
14184                          * We didn't have a "next_entry" or this one is
14185                          * closer to the address we're looking for:
14186                          * use this "tmp_entry" as the new "next_entry".
14187                          */
14188                         if (next_entry != NULL) {
14189                                 /* unlock the last "next_map" */
14190                                 if (next_map != curr_map && not_in_kdp) {
14191                                         vm_map_unlock_read(next_map);
14192                                 }
14193                         }
14194                         next_entry = tmp_entry;
14195                         next_map = curr_map;
14196                         next_depth = curr_depth;
14197                         next_address = next_entry->vme_start;
14198                         next_skip = curr_skip;
14199                         next_skip += (next_address - curr_address);
14200                         next_offset = curr_offset;
14201                         next_offset += (next_address - curr_address);
14202                         next_max_above = MIN(next_max_above, curr_max_above);
14203                         next_max_above = MIN(next_max_above,
14204                             next_entry->vme_end - next_address);
14205                         next_max_below = MIN(next_max_below, curr_max_below);
14206                         next_max_below = MIN(next_max_below,
14207                             next_address - next_entry->vme_start);
14208                 }
14209
14210                 /*
14211                  * "curr_max_{above,below}" allow us to keep track of the
14212                  * portion of the submap that is actually mapped at this level:
14213                  * the rest of that submap is irrelevant to us, since it's not
14214                  * mapped here.
14215                  * The relevant portion of the map starts at
14216                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14217                  */
14218                 curr_max_above = MIN(curr_max_above,
14219                     curr_entry->vme_end - curr_address);
14220                 curr_max_below = MIN(curr_max_below,
14221                     curr_address - curr_entry->vme_start);
14222
14223                 if (!curr_entry->is_sub_map ||
14224                     curr_depth >= user_max_depth) {
14225                         /*
14226                          * We hit a leaf map or we reached the maximum depth
14227                          * we could, so stop looking.  Keep the current map
14228                          * locked.
14229                          */
14230                         break;
14231                 }
14232
14233                 /*
14234                  * Get down to the next submap level.
14235                  */
14236
14237                 /*
14238                  * Lock the next level and unlock the current level,
14239                  * unless we need to keep it locked to access the "next_entry"
14240                  * later.
14241                  */
14242                 if (not_in_kdp) {
14243                         vm_map_lock_read(VME_SUBMAP(curr_entry));
14244                 }
14245                 if (curr_map == next_map) {
14246                         /* keep "next_map" locked in case we need it */
14247                 } else {
14248                         /* release this map */
14249                         if (not_in_kdp) {
14250                                 vm_map_unlock_read(curr_map);
14251                         }
14252                 }
14253
14254                 /*
14255                  * Adjust the offset.  "curr_entry" maps the submap
14256                  * at relative address "curr_entry->vme_start" in the
14257                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
14258                  * bytes of the submap.
14259                  * "curr_offset" always represents the offset of a virtual
14260                  * address in the curr_map relative to the absolute address
14261                  * space (i.e. the top-level VM map).
14262                  */
14263                 curr_offset +=
14264                     (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14265                 curr_address = user_address + curr_offset;
14266                 /* switch to the submap */
14267                 curr_map = VME_SUBMAP(curr_entry);
14268                 curr_depth++;
14269                 curr_entry = NULL;
14270         }
14271
14272 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14273 // so probably should be a real 32b ID vs. ptr.
14274 // Current users just check for equality
14275
14276         if (curr_entry == NULL) {
14277                 /* no VM region contains the address... */
14278
14279                 if (do_region_footprint && /* we want footprint numbers */
14280                     next_entry == NULL && /* & there are no more regions */
14281                     /* & we haven't already provided our fake region: */
14282                     user_address <= vm_map_last_entry(map)->vme_end) {
14283                         ledger_amount_t ledger_resident, ledger_compressed;
14284
14285                         /*
14286                          * Add a fake memory region to account for
14287                          * purgeable and/or ledger-tagged memory that
14288                          * counts towards this task's memory footprint,
14289                          * i.e. the resident/compressed pages of non-volatile
14290                          * objects owned by that task.
14291                          */
14292                         task_ledgers_footprint(map->pmap->ledger,
14293                             &ledger_resident,
14294                             &ledger_compressed);
14295                         if (ledger_resident + ledger_compressed == 0) {
14296                                 /* no purgeable memory usage to report */
14297                                 return KERN_INVALID_ADDRESS;
14298                         }
14299                         /* fake region to show nonvolatile footprint */
14300                         if (look_for_pages) {
14301                                 submap_info->protection = VM_PROT_DEFAULT;
14302                                 submap_info->max_protection = VM_PROT_DEFAULT;
14303                                 submap_info->inheritance = VM_INHERIT_DEFAULT;
14304                                 submap_info->offset = 0;
14305                                 submap_info->user_tag = -1;
14306                                 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14307                                 submap_info->pages_shared_now_private = 0;
14308                                 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14309                                 submap_info->pages_dirtied = submap_info->pages_resident;
14310                                 submap_info->ref_count = 1;
14311                                 submap_info->shadow_depth = 0;
14312                                 submap_info->external_pager = 0;
14313                                 submap_info->share_mode = SM_PRIVATE;
14314                                 submap_info->is_submap = 0;
14315                                 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14316                                 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14317                                 submap_info->user_wired_count = 0;
14318                                 submap_info->pages_reusable = 0;
14319                         } else {
14320                                 short_info->user_tag = -1;
14321                                 short_info->offset = 0;
14322                                 short_info->protection = VM_PROT_DEFAULT;
14323                                 short_info->inheritance = VM_INHERIT_DEFAULT;
14324                                 short_info->max_protection = VM_PROT_DEFAULT;
14325                                 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14326                                 short_info->user_wired_count = 0;
14327                                 short_info->is_submap = 0;
14328                                 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14329                                 short_info->external_pager = 0;
14330                                 short_info->shadow_depth = 0;
14331                                 short_info->share_mode = SM_PRIVATE;
14332                                 short_info->ref_count = 1;
14333                         }
14334                         *nesting_depth = 0;
14335                         *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14336 //                      *address = user_address;
14337                         *address = vm_map_last_entry(map)->vme_end;
14338                         return KERN_SUCCESS;
14339                 }
14340
14341                 if (next_entry == NULL) {
14342                         /* ... and no VM region follows it either */
14343                         return KERN_INVALID_ADDRESS;
14344                 }
14345                 /* ... gather info about the next VM region */
14346                 curr_entry = next_entry;
14347                 curr_map = next_map;    /* still locked ... */
14348                 curr_address = next_address;
14349                 curr_skip = next_skip;
14350                 curr_offset = next_offset;
14351                 curr_depth = next_depth;
14352                 curr_max_above = next_max_above;
14353                 curr_max_below = next_max_below;
14354         } else {
14355                 /* we won't need "next_entry" after all */
14356                 if (next_entry != NULL) {
14357                         /* release "next_map" */
14358                         if (next_map != curr_map && not_in_kdp) {
14359                                 vm_map_unlock_read(next_map);
14360                         }
14361                 }
14362         }
14363         next_entry = NULL;
14364         next_map = NULL;
14365         next_offset = 0;
14366         next_skip = 0;
14367         next_depth = 0;
14368         next_max_below = -1;
14369         next_max_above = -1;
14370
14371         if (curr_entry->is_sub_map &&
14372             curr_depth < user_max_depth) {
14373                 /*
14374                  * We're not as deep as we could be:  we must have
14375                  * gone back up after not finding anything mapped
14376                  * below the original top-level map entry's.
14377                  * Let's move "curr_address" forward and recurse again.
14378                  */
14379                 user_address = curr_address;
14380                 goto recurse_again;
14381         }
14382
14383         *nesting_depth = curr_depth;
14384         *size = curr_max_above + curr_max_below;
14385         *address = user_address + curr_skip - curr_max_below;
14386
14387         if (look_for_pages) {
14388                 submap_info->user_tag = VME_ALIAS(curr_entry);
14389                 submap_info->offset = VME_OFFSET(curr_entry);
14390                 submap_info->protection = curr_entry->protection;
14391                 submap_info->inheritance = curr_entry->inheritance;
14392                 submap_info->max_protection = curr_entry->max_protection;
14393                 submap_info->behavior = curr_entry->behavior;
14394                 submap_info->user_wired_count = curr_entry->user_wired_count;
14395                 submap_info->is_submap = curr_entry->is_sub_map;
14396                 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14397         } else {
14398                 short_info->user_tag = VME_ALIAS(curr_entry);
14399                 short_info->offset = VME_OFFSET(curr_entry);
14400                 short_info->protection = curr_entry->protection;
14401                 short_info->inheritance = curr_entry->inheritance;
14402                 short_info->max_protection = curr_entry->max_protection;
14403                 short_info->behavior = curr_entry->behavior;
14404                 short_info->user_wired_count = curr_entry->user_wired_count;
14405                 short_info->is_submap = curr_entry->is_sub_map;
14406                 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14407         }
14408
14409         extended.pages_resident = 0;
14410         extended.pages_swapped_out = 0;
14411         extended.pages_shared_now_private = 0;
14412         extended.pages_dirtied = 0;
14413         extended.pages_reusable = 0;
14414         extended.external_pager = 0;
14415         extended.shadow_depth = 0;
14416         extended.share_mode = SM_EMPTY;
14417         extended.ref_count = 0;
14418
14419         if (not_in_kdp) {
14420                 if (!curr_entry->is_sub_map) {
14421                         vm_map_offset_t range_start, range_end;
14422                         range_start = MAX((curr_address - curr_max_below),
14423                             curr_entry->vme_start);
14424                         range_end = MIN((curr_address + curr_max_above),
14425                             curr_entry->vme_end);
14426                         vm_map_region_walk(curr_map,
14427                             range_start,
14428                             curr_entry,
14429                             (VME_OFFSET(curr_entry) +
14430                             (range_start -
14431                             curr_entry->vme_start)),
14432                             range_end - range_start,
14433                             &extended,
14434                             look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14435                         if (extended.external_pager &&
14436                             extended.ref_count == 2 &&
14437                             extended.share_mode == SM_SHARED) {
14438                                 extended.share_mode = SM_PRIVATE;
14439                         }
14440                 } else {
14441                         if (curr_entry->use_pmap) {
14442                                 extended.share_mode = SM_TRUESHARED;
14443                         } else {
14444                                 extended.share_mode = SM_PRIVATE;
14445                         }
14446                         extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
14447                 }
14448         }
14449
14450         if (look_for_pages) {
14451                 submap_info->pages_resident = extended.pages_resident;
14452                 submap_info->pages_swapped_out = extended.pages_swapped_out;
14453                 submap_info->pages_shared_now_private =
14454                     extended.pages_shared_now_private;
14455                 submap_info->pages_dirtied = extended.pages_dirtied;
14456                 submap_info->external_pager = extended.external_pager;
14457                 submap_info->shadow_depth = extended.shadow_depth;
14458                 submap_info->share_mode = extended.share_mode;
14459                 submap_info->ref_count = extended.ref_count;
14460
14461                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14462                         submap_info->pages_reusable = extended.pages_reusable;
14463                 }
14464                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14465                         submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14466                 }
14467         } else {
14468                 short_info->external_pager = extended.external_pager;
14469                 short_info->shadow_depth = extended.shadow_depth;
14470                 short_info->share_mode = extended.share_mode;
14471                 short_info->ref_count = extended.ref_count;
14472         }
14473
14474         if (not_in_kdp) {
14475                 vm_map_unlock_read(curr_map);
14476         }
14477
14478         return KERN_SUCCESS;
14479 }
14480
14481 /*
14482  *      vm_region:
14483  *
14484  *      User call to obtain information about a region in
14485  *      a task's address map. Currently, only one flavor is
14486  *      supported.
14487  *
14488  *      XXX The reserved and behavior fields cannot be filled
14489  *          in until the vm merge from the IK is completed, and
14490  *          vm_reserve is implemented.
14491  */
14492
14493 kern_return_t
14494 vm_map_region(
14495         vm_map_t                 map,
14496         vm_map_offset_t *address,               /* IN/OUT */
14497         vm_map_size_t           *size,                  /* OUT */
14498         vm_region_flavor_t       flavor,                /* IN */
14499         vm_region_info_t         info,                  /* OUT */
14500         mach_msg_type_number_t  *count, /* IN/OUT */
14501         mach_port_t             *object_name)           /* OUT */
14502 {
14503         vm_map_entry_t          tmp_entry;
14504         vm_map_entry_t          entry;
14505         vm_map_offset_t         start;
14506
14507         if (map == VM_MAP_NULL) {
14508                 return KERN_INVALID_ARGUMENT;
14509         }
14510
14511         switch (flavor) {
14512         case VM_REGION_BASIC_INFO:
14513                 /* legacy for old 32-bit objects info */
14514         {
14515                 vm_region_basic_info_t  basic;
14516
14517                 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14518                         return KERN_INVALID_ARGUMENT;
14519                 }
14520
14521                 basic = (vm_region_basic_info_t) info;
14522                 *count = VM_REGION_BASIC_INFO_COUNT;
14523
14524                 vm_map_lock_read(map);
14525
14526                 start = *address;
14527                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14528                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14529                                 vm_map_unlock_read(map);
14530                                 return KERN_INVALID_ADDRESS;
14531                         }
14532                 } else {
14533                         entry = tmp_entry;
14534                 }
14535
14536                 start = entry->vme_start;
14537
14538                 basic->offset = (uint32_t)VME_OFFSET(entry);
14539                 basic->protection = entry->protection;
14540                 basic->inheritance = entry->inheritance;
14541                 basic->max_protection = entry->max_protection;
14542                 basic->behavior = entry->behavior;
14543                 basic->user_wired_count = entry->user_wired_count;
14544                 basic->reserved = entry->is_sub_map;
14545                 *address = start;
14546                 *size = (entry->vme_end - start);
14547
14548                 if (object_name) {
14549                         *object_name = IP_NULL;
14550                 }
14551                 if (entry->is_sub_map) {
14552                         basic->shared = FALSE;
14553                 } else {
14554                         basic->shared = entry->is_shared;
14555                 }
14556
14557                 vm_map_unlock_read(map);
14558                 return KERN_SUCCESS;
14559         }
14560
14561         case VM_REGION_BASIC_INFO_64:
14562         {
14563                 vm_region_basic_info_64_t       basic;
14564
14565                 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14566                         return KERN_INVALID_ARGUMENT;
14567                 }
14568
14569                 basic = (vm_region_basic_info_64_t) info;
14570                 *count = VM_REGION_BASIC_INFO_COUNT_64;
14571
14572                 vm_map_lock_read(map);
14573
14574                 start = *address;
14575                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14576                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14577                                 vm_map_unlock_read(map);
14578                                 return KERN_INVALID_ADDRESS;
14579                         }
14580                 } else {
14581                         entry = tmp_entry;
14582                 }
14583
14584                 start = entry->vme_start;
14585
14586                 basic->offset = VME_OFFSET(entry);
14587                 basic->protection = entry->protection;
14588                 basic->inheritance = entry->inheritance;
14589                 basic->max_protection = entry->max_protection;
14590                 basic->behavior = entry->behavior;
14591                 basic->user_wired_count = entry->user_wired_count;
14592                 basic->reserved = entry->is_sub_map;
14593                 *address = start;
14594                 *size = (entry->vme_end - start);
14595
14596                 if (object_name) {
14597                         *object_name = IP_NULL;
14598                 }
14599                 if (entry->is_sub_map) {
14600                         basic->shared = FALSE;
14601                 } else {
14602                         basic->shared = entry->is_shared;
14603                 }
14604
14605                 vm_map_unlock_read(map);
14606                 return KERN_SUCCESS;
14607         }
14608         case VM_REGION_EXTENDED_INFO:
14609                 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14610                         return KERN_INVALID_ARGUMENT;
14611                 }
14612                 OS_FALLTHROUGH;
14613         case VM_REGION_EXTENDED_INFO__legacy:
14614                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14615                         return KERN_INVALID_ARGUMENT;
14616                 }
14617
14618                 {
14619                         vm_region_extended_info_t       extended;
14620                         mach_msg_type_number_t original_count;
14621                         int effective_page_size, effective_page_shift;
14622
14623                         extended = (vm_region_extended_info_t) info;
14624
14625                         effective_page_shift = vm_self_region_page_shift(map);
14626                         effective_page_size = (1 << effective_page_shift);
14627
14628                         vm_map_lock_read(map);
14629
14630                         start = *address;
14631                         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14632                                 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14633                                         vm_map_unlock_read(map);
14634                                         return KERN_INVALID_ADDRESS;
14635                                 }
14636                         } else {
14637                                 entry = tmp_entry;
14638                         }
14639                         start = entry->vme_start;
14640
14641                         extended->protection = entry->protection;
14642                         extended->user_tag = VME_ALIAS(entry);
14643                         extended->pages_resident = 0;
14644                         extended->pages_swapped_out = 0;
14645                         extended->pages_shared_now_private = 0;
14646                         extended->pages_dirtied = 0;
14647                         extended->external_pager = 0;
14648                         extended->shadow_depth = 0;
14649
14650                         original_count = *count;
14651                         if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14652                                 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14653                         } else {
14654                                 extended->pages_reusable = 0;
14655                                 *count = VM_REGION_EXTENDED_INFO_COUNT;
14656                         }
14657
14658                         vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14659
14660                         if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14661                                 extended->share_mode = SM_PRIVATE;
14662                         }
14663
14664                         if (object_name) {
14665                                 *object_name = IP_NULL;
14666                         }
14667                         *address = start;
14668                         *size = (entry->vme_end - start);
14669
14670                         vm_map_unlock_read(map);
14671                         return KERN_SUCCESS;
14672                 }
14673         case VM_REGION_TOP_INFO:
14674         {
14675                 vm_region_top_info_t    top;
14676
14677                 if (*count < VM_REGION_TOP_INFO_COUNT) {
14678                         return KERN_INVALID_ARGUMENT;
14679                 }
14680
14681                 top = (vm_region_top_info_t) info;
14682                 *count = VM_REGION_TOP_INFO_COUNT;
14683
14684                 vm_map_lock_read(map);
14685
14686                 start = *address;
14687                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14688                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14689                                 vm_map_unlock_read(map);
14690                                 return KERN_INVALID_ADDRESS;
14691                         }
14692                 } else {
14693                         entry = tmp_entry;
14694                 }
14695                 start = entry->vme_start;
14696
14697                 top->private_pages_resident = 0;
14698                 top->shared_pages_resident = 0;
14699
14700                 vm_map_region_top_walk(entry, top);
14701
14702                 if (object_name) {
14703                         *object_name = IP_NULL;
14704                 }
14705                 *address = start;
14706                 *size = (entry->vme_end - start);
14707
14708                 vm_map_unlock_read(map);
14709                 return KERN_SUCCESS;
14710         }
14711         default:
14712                 return KERN_INVALID_ARGUMENT;
14713         }
14714 }
14715
14716 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
14717         MIN((entry_size),                                               \
14718             ((obj)->all_reusable ?                                      \
14719              (obj)->wired_page_count :                                  \
14720              (obj)->resident_page_count - (obj)->reusable_page_count))
14721
14722 void
14723 vm_map_region_top_walk(
14724         vm_map_entry_t             entry,
14725         vm_region_top_info_t       top)
14726 {
14727         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14728                 top->share_mode = SM_EMPTY;
14729                 top->ref_count = 0;
14730                 top->obj_id = 0;
14731                 return;
14732         }
14733
14734         {
14735                 struct  vm_object *obj, *tmp_obj;
14736                 int             ref_count;
14737                 uint32_t        entry_size;
14738
14739                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14740
14741                 obj = VME_OBJECT(entry);
14742
14743                 vm_object_lock(obj);
14744
14745                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14746                         ref_count--;
14747                 }
14748
14749                 assert(obj->reusable_page_count <= obj->resident_page_count);
14750                 if (obj->shadow) {
14751                         if (ref_count == 1) {
14752                                 top->private_pages_resident =
14753                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14754                         } else {
14755                                 top->shared_pages_resident =
14756                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14757                         }
14758                         top->ref_count  = ref_count;
14759                         top->share_mode = SM_COW;
14760
14761                         while ((tmp_obj = obj->shadow)) {
14762                                 vm_object_lock(tmp_obj);
14763                                 vm_object_unlock(obj);
14764                                 obj = tmp_obj;
14765
14766                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14767                                         ref_count--;
14768                                 }
14769
14770                                 assert(obj->reusable_page_count <= obj->resident_page_count);
14771                                 top->shared_pages_resident +=
14772                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14773                                 top->ref_count += ref_count - 1;
14774                         }
14775                 } else {
14776                         if (entry->superpage_size) {
14777                                 top->share_mode = SM_LARGE_PAGE;
14778                                 top->shared_pages_resident = 0;
14779                                 top->private_pages_resident = entry_size;
14780                         } else if (entry->needs_copy) {
14781                                 top->share_mode = SM_COW;
14782                                 top->shared_pages_resident =
14783                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14784                         } else {
14785                                 if (ref_count == 1 ||
14786                                     (ref_count == 2 && obj->named)) {
14787                                         top->share_mode = SM_PRIVATE;
14788                                         top->private_pages_resident =
14789                                             OBJ_RESIDENT_COUNT(obj,
14790                                             entry_size);
14791                                 } else {
14792                                         top->share_mode = SM_SHARED;
14793                                         top->shared_pages_resident =
14794                                             OBJ_RESIDENT_COUNT(obj,
14795                                             entry_size);
14796                                 }
14797                         }
14798                         top->ref_count = ref_count;
14799                 }
14800                 /* XXX K64: obj_id will be truncated */
14801                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14802
14803                 vm_object_unlock(obj);
14804         }
14805 }
14806
14807 void
14808 vm_map_region_walk(
14809         vm_map_t                        map,
14810         vm_map_offset_t                 va,
14811         vm_map_entry_t                  entry,
14812         vm_object_offset_t              offset,
14813         vm_object_size_t                range,
14814         vm_region_extended_info_t       extended,
14815         boolean_t                       look_for_pages,
14816         mach_msg_type_number_t count)
14817 {
14818         struct vm_object *obj, *tmp_obj;
14819         vm_map_offset_t       last_offset;
14820         int               i;
14821         int               ref_count;
14822         struct vm_object        *shadow_object;
14823         unsigned short          shadow_depth;
14824         boolean_t         do_region_footprint;
14825         int                     effective_page_size, effective_page_shift;
14826         vm_map_offset_t         effective_page_mask;
14827
14828         do_region_footprint = task_self_region_footprint();
14829
14830         if ((VME_OBJECT(entry) == 0) ||
14831             (entry->is_sub_map) ||
14832             (VME_OBJECT(entry)->phys_contiguous &&
14833             !entry->superpage_size)) {
14834                 extended->share_mode = SM_EMPTY;
14835                 extended->ref_count = 0;
14836                 return;
14837         }
14838
14839         if (entry->superpage_size) {
14840                 extended->shadow_depth = 0;
14841                 extended->share_mode = SM_LARGE_PAGE;
14842                 extended->ref_count = 1;
14843                 extended->external_pager = 0;
14844
14845                 /* TODO4K: Superpage in 4k mode? */
14846                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14847                 extended->shadow_depth = 0;
14848                 return;
14849         }
14850
14851         effective_page_shift = vm_self_region_page_shift(map);
14852         effective_page_size = (1 << effective_page_shift);
14853         effective_page_mask = effective_page_size - 1;
14854
14855         offset = vm_map_trunc_page(offset, effective_page_mask);
14856
14857         obj = VME_OBJECT(entry);
14858
14859         vm_object_lock(obj);
14860
14861         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14862                 ref_count--;
14863         }
14864
14865         if (look_for_pages) {
14866                 for (last_offset = offset + range;
14867                     offset < last_offset;
14868                     offset += effective_page_size, va += effective_page_size) {
14869                         if (do_region_footprint) {
14870                                 int disp;
14871
14872                                 disp = 0;
14873                                 if (map->has_corpse_footprint) {
14874                                         /*
14875                                          * Query the page info data we saved
14876                                          * while forking the corpse.
14877                                          */
14878                                         vm_map_corpse_footprint_query_page_info(
14879                                                 map,
14880                                                 va,
14881                                                 &disp);
14882                                 } else {
14883                                         /*
14884                                          * Query the pmap.
14885                                          */
14886                                         vm_map_footprint_query_page_info(
14887                                                 map,
14888                                                 entry,
14889                                                 va,
14890                                                 &disp);
14891                                 }
14892                                 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14893                                         extended->pages_resident++;
14894                                 }
14895                                 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14896                                         extended->pages_reusable++;
14897                                 }
14898                                 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14899                                         extended->pages_dirtied++;
14900                                 }
14901                                 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14902                                         extended->pages_swapped_out++;
14903                                 }
14904                                 continue;
14905                         }
14906
14907                         vm_map_region_look_for_page(map, va, obj,
14908                             vm_object_trunc_page(offset), ref_count,
14909                             0, extended, count);
14910                 }
14911
14912                 if (do_region_footprint) {
14913                         goto collect_object_info;
14914                 }
14915         } else {
14916 collect_object_info:
14917                 shadow_object = obj->shadow;
14918                 shadow_depth = 0;
14919
14920                 if (!(obj->internal)) {
14921                         extended->external_pager = 1;
14922                 }
14923
14924                 if (shadow_object != VM_OBJECT_NULL) {
14925                         vm_object_lock(shadow_object);
14926                         for (;
14927                             shadow_object != VM_OBJECT_NULL;
14928                             shadow_depth++) {
14929                                 vm_object_t     next_shadow;
14930
14931                                 if (!(shadow_object->internal)) {
14932                                         extended->external_pager = 1;
14933                                 }
14934
14935                                 next_shadow = shadow_object->shadow;
14936                                 if (next_shadow) {
14937                                         vm_object_lock(next_shadow);
14938                                 }
14939                                 vm_object_unlock(shadow_object);
14940                                 shadow_object = next_shadow;
14941                         }
14942                 }
14943                 extended->shadow_depth = shadow_depth;
14944         }
14945
14946         if (extended->shadow_depth || entry->needs_copy) {
14947                 extended->share_mode = SM_COW;
14948         } else {
14949                 if (ref_count == 1) {
14950                         extended->share_mode = SM_PRIVATE;
14951                 } else {
14952                         if (obj->true_share) {
14953                                 extended->share_mode = SM_TRUESHARED;
14954                         } else {
14955                                 extended->share_mode = SM_SHARED;
14956                         }
14957                 }
14958         }
14959         extended->ref_count = ref_count - extended->shadow_depth;
14960
14961         for (i = 0; i < extended->shadow_depth; i++) {
14962                 if ((tmp_obj = obj->shadow) == 0) {
14963                         break;
14964                 }
14965                 vm_object_lock(tmp_obj);
14966                 vm_object_unlock(obj);
14967
14968                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14969                         ref_count--;
14970                 }
14971
14972                 extended->ref_count += ref_count;
14973                 obj = tmp_obj;
14974         }
14975         vm_object_unlock(obj);
14976
14977         if (extended->share_mode == SM_SHARED) {
14978                 vm_map_entry_t       cur;
14979                 vm_map_entry_t       last;
14980                 int      my_refs;
14981
14982                 obj = VME_OBJECT(entry);
14983                 last = vm_map_to_entry(map);
14984                 my_refs = 0;
14985
14986                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14987                         ref_count--;
14988                 }
14989                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14990                         my_refs += vm_map_region_count_obj_refs(cur, obj);
14991                 }
14992
14993                 if (my_refs == ref_count) {
14994                         extended->share_mode = SM_PRIVATE_ALIASED;
14995                 } else if (my_refs > 1) {
14996                         extended->share_mode = SM_SHARED_ALIASED;
14997                 }
14998         }
14999 }
15000
15001
15002 /* object is locked on entry and locked on return */
15003
15004
15005 static void
15006 vm_map_region_look_for_page(
15007         __unused vm_map_t               map,
15008         __unused vm_map_offset_t        va,
15009         vm_object_t                     object,
15010         vm_object_offset_t              offset,
15011         int                             max_refcnt,
15012         unsigned short                  depth,
15013         vm_region_extended_info_t       extended,
15014         mach_msg_type_number_t count)
15015 {
15016         vm_page_t       p;
15017         vm_object_t     shadow;
15018         int             ref_count;
15019         vm_object_t     caller_object;
15020
15021         shadow = object->shadow;
15022         caller_object = object;
15023
15024
15025         while (TRUE) {
15026                 if (!(object->internal)) {
15027                         extended->external_pager = 1;
15028                 }
15029
15030                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15031                         if (shadow && (max_refcnt == 1)) {
15032                                 extended->pages_shared_now_private++;
15033                         }
15034
15035                         if (!p->vmp_fictitious &&
15036                             (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15037                                 extended->pages_dirtied++;
15038                         } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15039                                 if (p->vmp_reusable || object->all_reusable) {
15040                                         extended->pages_reusable++;
15041                                 }
15042                         }
15043
15044                         extended->pages_resident++;
15045
15046                         if (object != caller_object) {
15047                                 vm_object_unlock(object);
15048                         }
15049
15050                         return;
15051                 }
15052                 if (object->internal &&
15053                     object->alive &&
15054                     !object->terminating &&
15055                     object->pager_ready) {
15056                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15057                             == VM_EXTERNAL_STATE_EXISTS) {
15058                                 /* the pager has that page */
15059                                 extended->pages_swapped_out++;
15060                                 if (object != caller_object) {
15061                                         vm_object_unlock(object);
15062                                 }
15063                                 return;
15064                         }
15065                 }
15066
15067                 if (shadow) {
15068                         vm_object_lock(shadow);
15069
15070                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15071                                 ref_count--;
15072                         }
15073
15074                         if (++depth > extended->shadow_depth) {
15075                                 extended->shadow_depth = depth;
15076                         }
15077
15078                         if (ref_count > max_refcnt) {
15079                                 max_refcnt = ref_count;
15080                         }
15081
15082                         if (object != caller_object) {
15083                                 vm_object_unlock(object);
15084                         }
15085
15086                         offset = offset + object->vo_shadow_offset;
15087                         object = shadow;
15088                         shadow = object->shadow;
15089                         continue;
15090                 }
15091                 if (object != caller_object) {
15092                         vm_object_unlock(object);
15093                 }
15094                 break;
15095         }
15096 }
15097
15098 static int
15099 vm_map_region_count_obj_refs(
15100         vm_map_entry_t    entry,
15101         vm_object_t       object)
15102 {
15103         int ref_count;
15104         vm_object_t chk_obj;
15105         vm_object_t tmp_obj;
15106
15107         if (VME_OBJECT(entry) == 0) {
15108                 return 0;
15109         }
15110
15111         if (entry->is_sub_map) {
15112                 return 0;
15113         } else {
15114                 ref_count = 0;
15115
15116                 chk_obj = VME_OBJECT(entry);
15117                 vm_object_lock(chk_obj);
15118
15119                 while (chk_obj) {
15120                         if (chk_obj == object) {
15121                                 ref_count++;
15122                         }
15123                         tmp_obj = chk_obj->shadow;
15124                         if (tmp_obj) {
15125                                 vm_object_lock(tmp_obj);
15126                         }
15127                         vm_object_unlock(chk_obj);
15128
15129                         chk_obj = tmp_obj;
15130                 }
15131         }
15132         return ref_count;
15133 }
15134
15135
15136 /*
15137  *      Routine:        vm_map_simplify
15138  *
15139  *      Description:
15140  *              Attempt to simplify the map representation in
15141  *              the vicinity of the given starting address.
15142  *      Note:
15143  *              This routine is intended primarily to keep the
15144  *              kernel maps more compact -- they generally don't
15145  *              benefit from the "expand a map entry" technology
15146  *              at allocation time because the adjacent entry
15147  *              is often wired down.
15148  */
15149 void
15150 vm_map_simplify_entry(
15151         vm_map_t        map,
15152         vm_map_entry_t  this_entry)
15153 {
15154         vm_map_entry_t  prev_entry;
15155
15156         counter(c_vm_map_simplify_entry_called++);
15157
15158         prev_entry = this_entry->vme_prev;
15159
15160         if ((this_entry != vm_map_to_entry(map)) &&
15161             (prev_entry != vm_map_to_entry(map)) &&
15162
15163             (prev_entry->vme_end == this_entry->vme_start) &&
15164
15165             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15166             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15167             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15168             prev_entry->vme_start))
15169             == VME_OFFSET(this_entry)) &&
15170
15171             (prev_entry->behavior == this_entry->behavior) &&
15172             (prev_entry->needs_copy == this_entry->needs_copy) &&
15173             (prev_entry->protection == this_entry->protection) &&
15174             (prev_entry->max_protection == this_entry->max_protection) &&
15175             (prev_entry->inheritance == this_entry->inheritance) &&
15176             (prev_entry->use_pmap == this_entry->use_pmap) &&
15177             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15178             (prev_entry->no_cache == this_entry->no_cache) &&
15179             (prev_entry->permanent == this_entry->permanent) &&
15180             (prev_entry->map_aligned == this_entry->map_aligned) &&
15181             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15182             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15183             (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15184             /* from_reserved_zone: OK if that field doesn't match */
15185             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15186             (prev_entry->vme_resilient_codesign ==
15187             this_entry->vme_resilient_codesign) &&
15188             (prev_entry->vme_resilient_media ==
15189             this_entry->vme_resilient_media) &&
15190             (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15191
15192             (prev_entry->wired_count == this_entry->wired_count) &&
15193             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15194
15195             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15196             (prev_entry->in_transition == FALSE) &&
15197             (this_entry->in_transition == FALSE) &&
15198             (prev_entry->needs_wakeup == FALSE) &&
15199             (this_entry->needs_wakeup == FALSE) &&
15200             (prev_entry->is_shared == this_entry->is_shared) &&
15201             (prev_entry->superpage_size == FALSE) &&
15202             (this_entry->superpage_size == FALSE)
15203             ) {
15204                 vm_map_store_entry_unlink(map, prev_entry);
15205                 assert(prev_entry->vme_start < this_entry->vme_end);
15206                 if (prev_entry->map_aligned) {
15207                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15208                             VM_MAP_PAGE_MASK(map)));
15209                 }
15210                 this_entry->vme_start = prev_entry->vme_start;
15211                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15212
15213                 if (map->holelistenabled) {
15214                         vm_map_store_update_first_free(map, this_entry, TRUE);
15215                 }
15216
15217                 if (prev_entry->is_sub_map) {
15218                         vm_map_deallocate(VME_SUBMAP(prev_entry));
15219                 } else {
15220                         vm_object_deallocate(VME_OBJECT(prev_entry));
15221                 }
15222                 vm_map_entry_dispose(map, prev_entry);
15223                 SAVE_HINT_MAP_WRITE(map, this_entry);
15224                 counter(c_vm_map_simplified++);
15225         }
15226 }
15227
15228 void
15229 vm_map_simplify(
15230         vm_map_t        map,
15231         vm_map_offset_t start)
15232 {
15233         vm_map_entry_t  this_entry;
15234
15235         vm_map_lock(map);
15236         if (vm_map_lookup_entry(map, start, &this_entry)) {
15237                 vm_map_simplify_entry(map, this_entry);
15238                 vm_map_simplify_entry(map, this_entry->vme_next);
15239         }
15240         counter(c_vm_map_simplify_called++);
15241         vm_map_unlock(map);
15242 }
15243
15244 static void
15245 vm_map_simplify_range(
15246         vm_map_t        map,
15247         vm_map_offset_t start,
15248         vm_map_offset_t end)
15249 {
15250         vm_map_entry_t  entry;
15251
15252         /*
15253          * The map should be locked (for "write") by the caller.
15254          */
15255
15256         if (start >= end) {
15257                 /* invalid address range */
15258                 return;
15259         }
15260
15261         start = vm_map_trunc_page(start,
15262             VM_MAP_PAGE_MASK(map));
15263         end = vm_map_round_page(end,
15264             VM_MAP_PAGE_MASK(map));
15265
15266         if (!vm_map_lookup_entry(map, start, &entry)) {
15267                 /* "start" is not mapped and "entry" ends before "start" */
15268                 if (entry == vm_map_to_entry(map)) {
15269                         /* start with first entry in the map */
15270                         entry = vm_map_first_entry(map);
15271                 } else {
15272                         /* start with next entry */
15273                         entry = entry->vme_next;
15274                 }
15275         }
15276
15277         while (entry != vm_map_to_entry(map) &&
15278             entry->vme_start <= end) {
15279                 /* try and coalesce "entry" with its previous entry */
15280                 vm_map_simplify_entry(map, entry);
15281                 entry = entry->vme_next;
15282         }
15283 }
15284
15285
15286 /*
15287  *      Routine:        vm_map_machine_attribute
15288  *      Purpose:
15289  *              Provide machine-specific attributes to mappings,
15290  *              such as cachability etc. for machines that provide
15291  *              them.  NUMA architectures and machines with big/strange
15292  *              caches will use this.
15293  *      Note:
15294  *              Responsibilities for locking and checking are handled here,
15295  *              everything else in the pmap module. If any non-volatile
15296  *              information must be kept, the pmap module should handle
15297  *              it itself. [This assumes that attributes do not
15298  *              need to be inherited, which seems ok to me]
15299  */
15300 kern_return_t
15301 vm_map_machine_attribute(
15302         vm_map_t                        map,
15303         vm_map_offset_t         start,
15304         vm_map_offset_t         end,
15305         vm_machine_attribute_t  attribute,
15306         vm_machine_attribute_val_t* value)              /* IN/OUT */
15307 {
15308         kern_return_t   ret;
15309         vm_map_size_t sync_size;
15310         vm_map_entry_t entry;
15311
15312         if (start < vm_map_min(map) || end > vm_map_max(map)) {
15313                 return KERN_INVALID_ADDRESS;
15314         }
15315
15316         /* Figure how much memory we need to flush (in page increments) */
15317         sync_size = end - start;
15318
15319         vm_map_lock(map);
15320
15321         if (attribute != MATTR_CACHE) {
15322                 /* If we don't have to find physical addresses, we */
15323                 /* don't have to do an explicit traversal here.    */
15324                 ret = pmap_attribute(map->pmap, start, end - start,
15325                     attribute, value);
15326                 vm_map_unlock(map);
15327                 return ret;
15328         }
15329
15330         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
15331
15332         while (sync_size) {
15333                 if (vm_map_lookup_entry(map, start, &entry)) {
15334                         vm_map_size_t   sub_size;
15335                         if ((entry->vme_end - start) > sync_size) {
15336                                 sub_size = sync_size;
15337                                 sync_size = 0;
15338                         } else {
15339                                 sub_size = entry->vme_end - start;
15340                                 sync_size -= sub_size;
15341                         }
15342                         if (entry->is_sub_map) {
15343                                 vm_map_offset_t sub_start;
15344                                 vm_map_offset_t sub_end;
15345
15346                                 sub_start = (start - entry->vme_start)
15347                                     + VME_OFFSET(entry);
15348                                 sub_end = sub_start + sub_size;
15349                                 vm_map_machine_attribute(
15350                                         VME_SUBMAP(entry),
15351                                         sub_start,
15352                                         sub_end,
15353                                         attribute, value);
15354                         } else {
15355                                 if (VME_OBJECT(entry)) {
15356                                         vm_page_t               m;
15357                                         vm_object_t             object;
15358                                         vm_object_t             base_object;
15359                                         vm_object_t             last_object;
15360                                         vm_object_offset_t      offset;
15361                                         vm_object_offset_t      base_offset;
15362                                         vm_map_size_t           range;
15363                                         range = sub_size;
15364                                         offset = (start - entry->vme_start)
15365                                             + VME_OFFSET(entry);
15366                                         offset = vm_object_trunc_page(offset);
15367                                         base_offset = offset;
15368                                         object = VME_OBJECT(entry);
15369                                         base_object = object;
15370                                         last_object = NULL;
15371
15372                                         vm_object_lock(object);
15373
15374                                         while (range) {
15375                                                 m = vm_page_lookup(
15376                                                         object, offset);
15377
15378                                                 if (m && !m->vmp_fictitious) {
15379                                                         ret =
15380                                                             pmap_attribute_cache_sync(
15381                                                                 VM_PAGE_GET_PHYS_PAGE(m),
15382                                                                 PAGE_SIZE,
15383                                                                 attribute, value);
15384                                                 } else if (object->shadow) {
15385                                                         offset = offset + object->vo_shadow_offset;
15386                                                         last_object = object;
15387                                                         object = object->shadow;
15388                                                         vm_object_lock(last_object->shadow);
15389                                                         vm_object_unlock(last_object);
15390                                                         continue;
15391                                                 }
15392                                                 if (range < PAGE_SIZE) {
15393                                                         range = 0;
15394                                                 } else {
15395                                                         range -= PAGE_SIZE;
15396                                                 }
15397
15398                                                 if (base_object != object) {
15399                                                         vm_object_unlock(object);
15400                                                         vm_object_lock(base_object);
15401                                                         object = base_object;
15402                                                 }
15403                                                 /* Bump to the next page */
15404                                                 base_offset += PAGE_SIZE;
15405                                                 offset = base_offset;
15406                                         }
15407                                         vm_object_unlock(object);
15408                                 }
15409                         }
15410                         start += sub_size;
15411                 } else {
15412                         vm_map_unlock(map);
15413                         return KERN_FAILURE;
15414                 }
15415         }
15416
15417         vm_map_unlock(map);
15418
15419         return ret;
15420 }
15421
15422 /*
15423  *      vm_map_behavior_set:
15424  *
15425  *      Sets the paging reference behavior of the specified address
15426  *      range in the target map.  Paging reference behavior affects
15427  *      how pagein operations resulting from faults on the map will be
15428  *      clustered.
15429  */
15430 kern_return_t
15431 vm_map_behavior_set(
15432         vm_map_t        map,
15433         vm_map_offset_t start,
15434         vm_map_offset_t end,
15435         vm_behavior_t   new_behavior)
15436 {
15437         vm_map_entry_t  entry;
15438         vm_map_entry_t  temp_entry;
15439
15440         if (start > end ||
15441             start < vm_map_min(map) ||
15442             end > vm_map_max(map)) {
15443                 return KERN_NO_SPACE;
15444         }
15445
15446         switch (new_behavior) {
15447         /*
15448          * This first block of behaviors all set a persistent state on the specified
15449          * memory range.  All we have to do here is to record the desired behavior
15450          * in the vm_map_entry_t's.
15451          */
15452
15453         case VM_BEHAVIOR_DEFAULT:
15454         case VM_BEHAVIOR_RANDOM:
15455         case VM_BEHAVIOR_SEQUENTIAL:
15456         case VM_BEHAVIOR_RSEQNTL:
15457         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15458                 vm_map_lock(map);
15459
15460                 /*
15461                  *      The entire address range must be valid for the map.
15462                  *      Note that vm_map_range_check() does a
15463                  *      vm_map_lookup_entry() internally and returns the
15464                  *      entry containing the start of the address range if
15465                  *      the entire range is valid.
15466                  */
15467                 if (vm_map_range_check(map, start, end, &temp_entry)) {
15468                         entry = temp_entry;
15469                         vm_map_clip_start(map, entry, start);
15470                 } else {
15471                         vm_map_unlock(map);
15472                         return KERN_INVALID_ADDRESS;
15473                 }
15474
15475                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15476                         vm_map_clip_end(map, entry, end);
15477                         if (entry->is_sub_map) {
15478                                 assert(!entry->use_pmap);
15479                         }
15480
15481                         if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15482                                 entry->zero_wired_pages = TRUE;
15483                         } else {
15484                                 entry->behavior = new_behavior;
15485                         }
15486                         entry = entry->vme_next;
15487                 }
15488
15489                 vm_map_unlock(map);
15490                 break;
15491
15492         /*
15493          * The rest of these are different from the above in that they cause
15494          * an immediate action to take place as opposed to setting a behavior that
15495          * affects future actions.
15496          */
15497
15498         case VM_BEHAVIOR_WILLNEED:
15499                 return vm_map_willneed(map, start, end);
15500
15501         case VM_BEHAVIOR_DONTNEED:
15502                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15503
15504         case VM_BEHAVIOR_FREE:
15505                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15506
15507         case VM_BEHAVIOR_REUSABLE:
15508                 return vm_map_reusable_pages(map, start, end);
15509
15510         case VM_BEHAVIOR_REUSE:
15511                 return vm_map_reuse_pages(map, start, end);
15512
15513         case VM_BEHAVIOR_CAN_REUSE:
15514                 return vm_map_can_reuse(map, start, end);
15515
15516 #if MACH_ASSERT
15517         case VM_BEHAVIOR_PAGEOUT:
15518                 return vm_map_pageout(map, start, end);
15519 #endif /* MACH_ASSERT */
15520
15521         default:
15522                 return KERN_INVALID_ARGUMENT;
15523         }
15524
15525         return KERN_SUCCESS;
15526 }
15527
15528
15529 /*
15530  * Internals for madvise(MADV_WILLNEED) system call.
15531  *
15532  * The implementation is to do:-
15533  * a) read-ahead if the mapping corresponds to a mapped regular file
15534  * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15535  */
15536
15537
15538 static kern_return_t
15539 vm_map_willneed(
15540         vm_map_t        map,
15541         vm_map_offset_t start,
15542         vm_map_offset_t end
15543         )
15544 {
15545         vm_map_entry_t                  entry;
15546         vm_object_t                     object;
15547         memory_object_t                 pager;
15548         struct vm_object_fault_info     fault_info = {};
15549         kern_return_t                   kr;
15550         vm_object_size_t                len;
15551         vm_object_offset_t              offset;
15552
15553         fault_info.interruptible = THREAD_UNINT;        /* ignored value */
15554         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
15555         fault_info.stealth       = TRUE;
15556
15557         /*
15558          * The MADV_WILLNEED operation doesn't require any changes to the
15559          * vm_map_entry_t's, so the read lock is sufficient.
15560          */
15561
15562         vm_map_lock_read(map);
15563
15564         /*
15565          * The madvise semantics require that the address range be fully
15566          * allocated with no holes.  Otherwise, we're required to return
15567          * an error.
15568          */
15569
15570         if (!vm_map_range_check(map, start, end, &entry)) {
15571                 vm_map_unlock_read(map);
15572                 return KERN_INVALID_ADDRESS;
15573         }
15574
15575         /*
15576          * Examine each vm_map_entry_t in the range.
15577          */
15578         for (; entry != vm_map_to_entry(map) && start < end;) {
15579                 /*
15580                  * The first time through, the start address could be anywhere
15581                  * within the vm_map_entry we found.  So adjust the offset to
15582                  * correspond.  After that, the offset will always be zero to
15583                  * correspond to the beginning of the current vm_map_entry.
15584                  */
15585                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15586
15587                 /*
15588                  * Set the length so we don't go beyond the end of the
15589                  * map_entry or beyond the end of the range we were given.
15590                  * This range could span also multiple map entries all of which
15591                  * map different files, so make sure we only do the right amount
15592                  * of I/O for each object.  Note that it's possible for there
15593                  * to be multiple map entries all referring to the same object
15594                  * but with different page permissions, but it's not worth
15595                  * trying to optimize that case.
15596                  */
15597                 len = MIN(entry->vme_end - start, end - start);
15598
15599                 if ((vm_size_t) len != len) {
15600                         /* 32-bit overflow */
15601                         len = (vm_size_t) (0 - PAGE_SIZE);
15602                 }
15603                 fault_info.cluster_size = (vm_size_t) len;
15604                 fault_info.lo_offset    = offset;
15605                 fault_info.hi_offset    = offset + len;
15606                 fault_info.user_tag     = VME_ALIAS(entry);
15607                 fault_info.pmap_options = 0;
15608                 if (entry->iokit_acct ||
15609                     (!entry->is_sub_map && !entry->use_pmap)) {
15610                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15611                 }
15612
15613                 /*
15614                  * If the entry is a submap OR there's no read permission
15615                  * to this mapping, then just skip it.
15616                  */
15617                 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15618                         entry = entry->vme_next;
15619                         start = entry->vme_start;
15620                         continue;
15621                 }
15622
15623                 object = VME_OBJECT(entry);
15624
15625                 if (object == NULL ||
15626                     (object && object->internal)) {
15627                         /*
15628                          * Memory range backed by anonymous memory.
15629                          */
15630                         vm_size_t region_size = 0, effective_page_size = 0;
15631                         vm_map_offset_t addr = 0, effective_page_mask = 0;
15632
15633                         region_size = len;
15634                         addr = start;
15635
15636                         effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15637                         effective_page_size = effective_page_mask + 1;
15638
15639                         vm_map_unlock_read(map);
15640
15641                         while (region_size) {
15642                                 vm_pre_fault(
15643                                         vm_map_trunc_page(addr, effective_page_mask),
15644                                         VM_PROT_READ | VM_PROT_WRITE);
15645
15646                                 region_size -= effective_page_size;
15647                                 addr += effective_page_size;
15648                         }
15649                 } else {
15650                         /*
15651                          * Find the file object backing this map entry.  If there is
15652                          * none, then we simply ignore the "will need" advice for this
15653                          * entry and go on to the next one.
15654                          */
15655                         if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15656                                 entry = entry->vme_next;
15657                                 start = entry->vme_start;
15658                                 continue;
15659                         }
15660
15661                         vm_object_paging_begin(object);
15662                         pager = object->pager;
15663                         vm_object_unlock(object);
15664
15665                         /*
15666                          * The data_request() could take a long time, so let's
15667                          * release the map lock to avoid blocking other threads.
15668                          */
15669                         vm_map_unlock_read(map);
15670
15671                         /*
15672                          * Get the data from the object asynchronously.
15673                          *
15674                          * Note that memory_object_data_request() places limits on the
15675                          * amount of I/O it will do.  Regardless of the len we
15676                          * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15677                          * silently truncates the len to that size.  This isn't
15678                          * necessarily bad since madvise shouldn't really be used to
15679                          * page in unlimited amounts of data.  Other Unix variants
15680                          * limit the willneed case as well.  If this turns out to be an
15681                          * issue for developers, then we can always adjust the policy
15682                          * here and still be backwards compatible since this is all
15683                          * just "advice".
15684                          */
15685                         kr = memory_object_data_request(
15686                                 pager,
15687                                 vm_object_trunc_page(offset) + object->paging_offset,
15688                                 0,      /* ignored */
15689                                 VM_PROT_READ,
15690                                 (memory_object_fault_info_t)&fault_info);
15691
15692                         vm_object_lock(object);
15693                         vm_object_paging_end(object);
15694                         vm_object_unlock(object);
15695
15696                         /*
15697                          * If we couldn't do the I/O for some reason, just give up on
15698                          * the madvise.  We still return success to the user since
15699                          * madvise isn't supposed to fail when the advice can't be
15700                          * taken.
15701                          */
15702
15703                         if (kr != KERN_SUCCESS) {
15704                                 return KERN_SUCCESS;
15705                         }
15706                 }
15707
15708                 start += len;
15709                 if (start >= end) {
15710                         /* done */
15711                         return KERN_SUCCESS;
15712                 }
15713
15714                 /* look up next entry */
15715                 vm_map_lock_read(map);
15716                 if (!vm_map_lookup_entry(map, start, &entry)) {
15717                         /*
15718                          * There's a new hole in the address range.
15719                          */
15720                         vm_map_unlock_read(map);
15721                         return KERN_INVALID_ADDRESS;
15722                 }
15723         }
15724
15725         vm_map_unlock_read(map);
15726         return KERN_SUCCESS;
15727 }
15728
15729 static boolean_t
15730 vm_map_entry_is_reusable(
15731         vm_map_entry_t entry)
15732 {
15733         /* Only user map entries */
15734
15735         vm_object_t object;
15736
15737         if (entry->is_sub_map) {
15738                 return FALSE;
15739         }
15740
15741         switch (VME_ALIAS(entry)) {
15742         case VM_MEMORY_MALLOC:
15743         case VM_MEMORY_MALLOC_SMALL:
15744         case VM_MEMORY_MALLOC_LARGE:
15745         case VM_MEMORY_REALLOC:
15746         case VM_MEMORY_MALLOC_TINY:
15747         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15748         case VM_MEMORY_MALLOC_LARGE_REUSED:
15749                 /*
15750                  * This is a malloc() memory region: check if it's still
15751                  * in its original state and can be re-used for more
15752                  * malloc() allocations.
15753                  */
15754                 break;
15755         default:
15756                 /*
15757                  * Not a malloc() memory region: let the caller decide if
15758                  * it's re-usable.
15759                  */
15760                 return TRUE;
15761         }
15762
15763         if (/*entry->is_shared ||*/
15764                 entry->is_sub_map ||
15765                 entry->in_transition ||
15766                 entry->protection != VM_PROT_DEFAULT ||
15767                 entry->max_protection != VM_PROT_ALL ||
15768                 entry->inheritance != VM_INHERIT_DEFAULT ||
15769                 entry->no_cache ||
15770                 entry->permanent ||
15771                 entry->superpage_size != FALSE ||
15772                 entry->zero_wired_pages ||
15773                 entry->wired_count != 0 ||
15774                 entry->user_wired_count != 0) {
15775                 return FALSE;
15776         }
15777
15778         object = VME_OBJECT(entry);
15779         if (object == VM_OBJECT_NULL) {
15780                 return TRUE;
15781         }
15782         if (
15783 #if 0
15784                 /*
15785                  * Let's proceed even if the VM object is potentially
15786                  * shared.
15787                  * We check for this later when processing the actual
15788                  * VM pages, so the contents will be safe if shared.
15789                  *
15790                  * But we can still mark this memory region as "reusable" to
15791                  * acknowledge that the caller did let us know that the memory
15792                  * could be re-used and should not be penalized for holding
15793                  * on to it.  This allows its "resident size" to not include
15794                  * the reusable range.
15795                  */
15796                 object->ref_count == 1 &&
15797 #endif
15798                 object->wired_page_count == 0 &&
15799                 object->copy == VM_OBJECT_NULL &&
15800                 object->shadow == VM_OBJECT_NULL &&
15801                 object->internal &&
15802                 object->purgable == VM_PURGABLE_DENY &&
15803                 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15804                 !object->true_share &&
15805                 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15806                 !object->code_signed) {
15807                 return TRUE;
15808         }
15809         return FALSE;
15810 }
15811
15812 static kern_return_t
15813 vm_map_reuse_pages(
15814         vm_map_t        map,
15815         vm_map_offset_t start,
15816         vm_map_offset_t end)
15817 {
15818         vm_map_entry_t                  entry;
15819         vm_object_t                     object;
15820         vm_object_offset_t              start_offset, end_offset;
15821
15822         /*
15823          * The MADV_REUSE operation doesn't require any changes to the
15824          * vm_map_entry_t's, so the read lock is sufficient.
15825          */
15826
15827         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15828                 /*
15829                  * XXX TODO4K
15830                  * need to figure out what reusable means for a
15831                  * portion of a native page.
15832                  */
15833                 return KERN_SUCCESS;
15834         }
15835
15836         vm_map_lock_read(map);
15837         assert(map->pmap != kernel_pmap);       /* protect alias access */
15838
15839         /*
15840          * The madvise semantics require that the address range be fully
15841          * allocated with no holes.  Otherwise, we're required to return
15842          * an error.
15843          */
15844
15845         if (!vm_map_range_check(map, start, end, &entry)) {
15846                 vm_map_unlock_read(map);
15847                 vm_page_stats_reusable.reuse_pages_failure++;
15848                 return KERN_INVALID_ADDRESS;
15849         }
15850
15851         /*
15852          * Examine each vm_map_entry_t in the range.
15853          */
15854         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15855             entry = entry->vme_next) {
15856                 /*
15857                  * Sanity check on the VM map entry.
15858                  */
15859                 if (!vm_map_entry_is_reusable(entry)) {
15860                         vm_map_unlock_read(map);
15861                         vm_page_stats_reusable.reuse_pages_failure++;
15862                         return KERN_INVALID_ADDRESS;
15863                 }
15864
15865                 /*
15866                  * The first time through, the start address could be anywhere
15867                  * within the vm_map_entry we found.  So adjust the offset to
15868                  * correspond.
15869                  */
15870                 if (entry->vme_start < start) {
15871                         start_offset = start - entry->vme_start;
15872                 } else {
15873                         start_offset = 0;
15874                 }
15875                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15876                 start_offset += VME_OFFSET(entry);
15877                 end_offset += VME_OFFSET(entry);
15878
15879                 assert(!entry->is_sub_map);
15880                 object = VME_OBJECT(entry);
15881                 if (object != VM_OBJECT_NULL) {
15882                         vm_object_lock(object);
15883                         vm_object_reuse_pages(object, start_offset, end_offset,
15884                             TRUE);
15885                         vm_object_unlock(object);
15886                 }
15887
15888                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15889                         /*
15890                          * XXX
15891                          * We do not hold the VM map exclusively here.
15892                          * The "alias" field is not that critical, so it's
15893                          * safe to update it here, as long as it is the only
15894                          * one that can be modified while holding the VM map
15895                          * "shared".
15896                          */
15897                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15898                 }
15899         }
15900
15901         vm_map_unlock_read(map);
15902         vm_page_stats_reusable.reuse_pages_success++;
15903         return KERN_SUCCESS;
15904 }
15905
15906
15907 static kern_return_t
15908 vm_map_reusable_pages(
15909         vm_map_t        map,
15910         vm_map_offset_t start,
15911         vm_map_offset_t end)
15912 {
15913         vm_map_entry_t                  entry;
15914         vm_object_t                     object;
15915         vm_object_offset_t              start_offset, end_offset;
15916         vm_map_offset_t                 pmap_offset;
15917
15918         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15919                 /*
15920                  * XXX TODO4K
15921                  * need to figure out what reusable means for a portion
15922                  * of a native page.
15923                  */
15924                 return KERN_SUCCESS;
15925         }
15926
15927         /*
15928          * The MADV_REUSABLE operation doesn't require any changes to the
15929          * vm_map_entry_t's, so the read lock is sufficient.
15930          */
15931
15932         vm_map_lock_read(map);
15933         assert(map->pmap != kernel_pmap);       /* protect alias access */
15934
15935         /*
15936          * The madvise semantics require that the address range be fully
15937          * allocated with no holes.  Otherwise, we're required to return
15938          * an error.
15939          */
15940
15941         if (!vm_map_range_check(map, start, end, &entry)) {
15942                 vm_map_unlock_read(map);
15943                 vm_page_stats_reusable.reusable_pages_failure++;
15944                 return KERN_INVALID_ADDRESS;
15945         }
15946
15947         /*
15948          * Examine each vm_map_entry_t in the range.
15949          */
15950         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15951             entry = entry->vme_next) {
15952                 int kill_pages = 0;
15953
15954                 /*
15955                  * Sanity check on the VM map entry.
15956                  */
15957                 if (!vm_map_entry_is_reusable(entry)) {
15958                         vm_map_unlock_read(map);
15959                         vm_page_stats_reusable.reusable_pages_failure++;
15960                         return KERN_INVALID_ADDRESS;
15961                 }
15962
15963                 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15964                         /* not writable: can't discard contents */
15965                         vm_map_unlock_read(map);
15966                         vm_page_stats_reusable.reusable_nonwritable++;
15967                         vm_page_stats_reusable.reusable_pages_failure++;
15968                         return KERN_PROTECTION_FAILURE;
15969                 }
15970
15971                 /*
15972                  * The first time through, the start address could be anywhere
15973                  * within the vm_map_entry we found.  So adjust the offset to
15974                  * correspond.
15975                  */
15976                 if (entry->vme_start < start) {
15977                         start_offset = start - entry->vme_start;
15978                         pmap_offset = start;
15979                 } else {
15980                         start_offset = 0;
15981                         pmap_offset = entry->vme_start;
15982                 }
15983                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15984                 start_offset += VME_OFFSET(entry);
15985                 end_offset += VME_OFFSET(entry);
15986
15987                 assert(!entry->is_sub_map);
15988                 object = VME_OBJECT(entry);
15989                 if (object == VM_OBJECT_NULL) {
15990                         continue;
15991                 }
15992
15993
15994                 vm_object_lock(object);
15995                 if (((object->ref_count == 1) ||
15996                     (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15997                     object->copy == VM_OBJECT_NULL)) &&
15998                     object->shadow == VM_OBJECT_NULL &&
15999                     /*
16000                      * "iokit_acct" entries are billed for their virtual size
16001                      * (rather than for their resident pages only), so they
16002                      * wouldn't benefit from making pages reusable, and it
16003                      * would be hard to keep track of pages that are both
16004                      * "iokit_acct" and "reusable" in the pmap stats and
16005                      * ledgers.
16006                      */
16007                     !(entry->iokit_acct ||
16008                     (!entry->is_sub_map && !entry->use_pmap))) {
16009                         if (object->ref_count != 1) {
16010                                 vm_page_stats_reusable.reusable_shared++;
16011                         }
16012                         kill_pages = 1;
16013                 } else {
16014                         kill_pages = -1;
16015                 }
16016                 if (kill_pages != -1) {
16017                         vm_object_deactivate_pages(object,
16018                             start_offset,
16019                             end_offset - start_offset,
16020                             kill_pages,
16021                             TRUE /*reusable_pages*/,
16022                             map->pmap,
16023                             pmap_offset);
16024                 } else {
16025                         vm_page_stats_reusable.reusable_pages_shared++;
16026                 }
16027                 vm_object_unlock(object);
16028
16029                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16030                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16031                         /*
16032                          * XXX
16033                          * We do not hold the VM map exclusively here.
16034                          * The "alias" field is not that critical, so it's
16035                          * safe to update it here, as long as it is the only
16036                          * one that can be modified while holding the VM map
16037                          * "shared".
16038                          */
16039                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16040                 }
16041         }
16042
16043         vm_map_unlock_read(map);
16044         vm_page_stats_reusable.reusable_pages_success++;
16045         return KERN_SUCCESS;
16046 }
16047
16048
16049 static kern_return_t
16050 vm_map_can_reuse(
16051         vm_map_t        map,
16052         vm_map_offset_t start,
16053         vm_map_offset_t end)
16054 {
16055         vm_map_entry_t                  entry;
16056
16057         /*
16058          * The MADV_REUSABLE operation doesn't require any changes to the
16059          * vm_map_entry_t's, so the read lock is sufficient.
16060          */
16061
16062         vm_map_lock_read(map);
16063         assert(map->pmap != kernel_pmap);       /* protect alias access */
16064
16065         /*
16066          * The madvise semantics require that the address range be fully
16067          * allocated with no holes.  Otherwise, we're required to return
16068          * an error.
16069          */
16070
16071         if (!vm_map_range_check(map, start, end, &entry)) {
16072                 vm_map_unlock_read(map);
16073                 vm_page_stats_reusable.can_reuse_failure++;
16074                 return KERN_INVALID_ADDRESS;
16075         }
16076
16077         /*
16078          * Examine each vm_map_entry_t in the range.
16079          */
16080         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16081             entry = entry->vme_next) {
16082                 /*
16083                  * Sanity check on the VM map entry.
16084                  */
16085                 if (!vm_map_entry_is_reusable(entry)) {
16086                         vm_map_unlock_read(map);
16087                         vm_page_stats_reusable.can_reuse_failure++;
16088                         return KERN_INVALID_ADDRESS;
16089                 }
16090         }
16091
16092         vm_map_unlock_read(map);
16093         vm_page_stats_reusable.can_reuse_success++;
16094         return KERN_SUCCESS;
16095 }
16096
16097
16098 #if MACH_ASSERT
16099 static kern_return_t
16100 vm_map_pageout(
16101         vm_map_t        map,
16102         vm_map_offset_t start,
16103         vm_map_offset_t end)
16104 {
16105         vm_map_entry_t                  entry;
16106
16107         /*
16108          * The MADV_PAGEOUT operation doesn't require any changes to the
16109          * vm_map_entry_t's, so the read lock is sufficient.
16110          */
16111
16112         vm_map_lock_read(map);
16113
16114         /*
16115          * The madvise semantics require that the address range be fully
16116          * allocated with no holes.  Otherwise, we're required to return
16117          * an error.
16118          */
16119
16120         if (!vm_map_range_check(map, start, end, &entry)) {
16121                 vm_map_unlock_read(map);
16122                 return KERN_INVALID_ADDRESS;
16123         }
16124
16125         /*
16126          * Examine each vm_map_entry_t in the range.
16127          */
16128         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16129             entry = entry->vme_next) {
16130                 vm_object_t     object;
16131
16132                 /*
16133                  * Sanity check on the VM map entry.
16134                  */
16135                 if (entry->is_sub_map) {
16136                         vm_map_t submap;
16137                         vm_map_offset_t submap_start;
16138                         vm_map_offset_t submap_end;
16139                         vm_map_entry_t submap_entry;
16140
16141                         submap = VME_SUBMAP(entry);
16142                         submap_start = VME_OFFSET(entry);
16143                         submap_end = submap_start + (entry->vme_end -
16144                             entry->vme_start);
16145
16146                         vm_map_lock_read(submap);
16147
16148                         if (!vm_map_range_check(submap,
16149                             submap_start,
16150                             submap_end,
16151                             &submap_entry)) {
16152                                 vm_map_unlock_read(submap);
16153                                 vm_map_unlock_read(map);
16154                                 return KERN_INVALID_ADDRESS;
16155                         }
16156
16157                         object = VME_OBJECT(submap_entry);
16158                         if (submap_entry->is_sub_map ||
16159                             object == VM_OBJECT_NULL ||
16160                             !object->internal) {
16161                                 vm_map_unlock_read(submap);
16162                                 continue;
16163                         }
16164
16165                         vm_object_pageout(object);
16166
16167                         vm_map_unlock_read(submap);
16168                         submap = VM_MAP_NULL;
16169                         submap_entry = VM_MAP_ENTRY_NULL;
16170                         continue;
16171                 }
16172
16173                 object = VME_OBJECT(entry);
16174                 if (entry->is_sub_map ||
16175                     object == VM_OBJECT_NULL ||
16176                     !object->internal) {
16177                         continue;
16178                 }
16179
16180                 vm_object_pageout(object);
16181         }
16182
16183         vm_map_unlock_read(map);
16184         return KERN_SUCCESS;
16185 }
16186 #endif /* MACH_ASSERT */
16187
16188
16189 /*
16190  *      Routine:        vm_map_entry_insert
16191  *
16192  *      Description:    This routine inserts a new vm_entry in a locked map.
16193  */
16194 vm_map_entry_t
16195 vm_map_entry_insert(
16196         vm_map_t                map,
16197         vm_map_entry_t          insp_entry,
16198         vm_map_offset_t         start,
16199         vm_map_offset_t         end,
16200         vm_object_t             object,
16201         vm_object_offset_t      offset,
16202         boolean_t               needs_copy,
16203         boolean_t               is_shared,
16204         boolean_t               in_transition,
16205         vm_prot_t               cur_protection,
16206         vm_prot_t               max_protection,
16207         vm_behavior_t           behavior,
16208         vm_inherit_t            inheritance,
16209         unsigned short          wired_count,
16210         boolean_t               no_cache,
16211         boolean_t               permanent,
16212         boolean_t               no_copy_on_read,
16213         unsigned int            superpage_size,
16214         boolean_t               clear_map_aligned,
16215         boolean_t               is_submap,
16216         boolean_t               used_for_jit,
16217         int                     alias,
16218         boolean_t               translated_allow_execute)
16219 {
16220         vm_map_entry_t  new_entry;
16221
16222         assert(insp_entry != (vm_map_entry_t)0);
16223         vm_map_lock_assert_exclusive(map);
16224
16225 #if DEVELOPMENT || DEBUG
16226         vm_object_offset_t      end_offset = 0;
16227         assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16228 #endif /* DEVELOPMENT || DEBUG */
16229
16230         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16231
16232         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16233                 new_entry->map_aligned = TRUE;
16234         } else {
16235                 new_entry->map_aligned = FALSE;
16236         }
16237         if (clear_map_aligned &&
16238             (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16239             !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16240                 new_entry->map_aligned = FALSE;
16241         }
16242
16243         new_entry->vme_start = start;
16244         new_entry->vme_end = end;
16245         if (new_entry->map_aligned) {
16246                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16247                     VM_MAP_PAGE_MASK(map)));
16248                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16249                     VM_MAP_PAGE_MASK(map)));
16250         } else {
16251                 assert(page_aligned(new_entry->vme_start));
16252                 assert(page_aligned(new_entry->vme_end));
16253         }
16254         assert(new_entry->vme_start < new_entry->vme_end);
16255
16256         VME_OBJECT_SET(new_entry, object);
16257         VME_OFFSET_SET(new_entry, offset);
16258         new_entry->is_shared = is_shared;
16259         new_entry->is_sub_map = is_submap;
16260         new_entry->needs_copy = needs_copy;
16261         new_entry->in_transition = in_transition;
16262         new_entry->needs_wakeup = FALSE;
16263         new_entry->inheritance = inheritance;
16264         new_entry->protection = cur_protection;
16265         new_entry->max_protection = max_protection;
16266         new_entry->behavior = behavior;
16267         new_entry->wired_count = wired_count;
16268         new_entry->user_wired_count = 0;
16269         if (is_submap) {
16270                 /*
16271                  * submap: "use_pmap" means "nested".
16272                  * default: false.
16273                  */
16274                 new_entry->use_pmap = FALSE;
16275         } else {
16276                 /*
16277                  * object: "use_pmap" means "use pmap accounting" for footprint.
16278                  * default: true.
16279                  */
16280                 new_entry->use_pmap = TRUE;
16281         }
16282         VME_ALIAS_SET(new_entry, alias);
16283         new_entry->zero_wired_pages = FALSE;
16284         new_entry->no_cache = no_cache;
16285         new_entry->permanent = permanent;
16286         if (superpage_size) {
16287                 new_entry->superpage_size = TRUE;
16288         } else {
16289                 new_entry->superpage_size = FALSE;
16290         }
16291         if (used_for_jit) {
16292                 if (!(map->jit_entry_exists) ||
16293                     VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16294                         new_entry->used_for_jit = TRUE;
16295                         map->jit_entry_exists = TRUE;
16296                 }
16297         } else {
16298                 new_entry->used_for_jit = FALSE;
16299         }
16300         if (translated_allow_execute) {
16301                 new_entry->translated_allow_execute = TRUE;
16302         } else {
16303                 new_entry->translated_allow_execute = FALSE;
16304         }
16305         new_entry->pmap_cs_associated = FALSE;
16306         new_entry->iokit_acct = FALSE;
16307         new_entry->vme_resilient_codesign = FALSE;
16308         new_entry->vme_resilient_media = FALSE;
16309         new_entry->vme_atomic = FALSE;
16310         new_entry->vme_no_copy_on_read = no_copy_on_read;
16311
16312         /*
16313          *      Insert the new entry into the list.
16314          */
16315
16316         vm_map_store_entry_link(map, insp_entry, new_entry,
16317             VM_MAP_KERNEL_FLAGS_NONE);
16318         map->size += end - start;
16319
16320         /*
16321          *      Update the free space hint and the lookup hint.
16322          */
16323
16324         SAVE_HINT_MAP_WRITE(map, new_entry);
16325         return new_entry;
16326 }
16327
16328 int vm_remap_old_path = 0;
16329 int vm_remap_new_path = 0;
16330 /*
16331  *      Routine:        vm_map_remap_extract
16332  *
16333  *      Description:    This routine returns a vm_entry list from a map.
16334  */
16335 static kern_return_t
16336 vm_map_remap_extract(
16337         vm_map_t                map,
16338         vm_map_offset_t         addr,
16339         vm_map_size_t           size,
16340         vm_prot_t               required_protection,
16341         boolean_t               copy,
16342         struct vm_map_header    *map_header,
16343         vm_prot_t               *cur_protection,
16344         vm_prot_t               *max_protection,
16345         /* What, no behavior? */
16346         vm_inherit_t            inheritance,
16347         vm_map_kernel_flags_t   vmk_flags)
16348 {
16349         kern_return_t           result;
16350         vm_map_size_t           mapped_size;
16351         vm_map_size_t           tmp_size;
16352         vm_map_entry_t          src_entry;     /* result of last map lookup */
16353         vm_map_entry_t          new_entry;
16354         vm_object_offset_t      offset;
16355         vm_map_offset_t         map_address;
16356         vm_map_offset_t         src_start;     /* start of entry to map */
16357         vm_map_offset_t         src_end;       /* end of region to be mapped */
16358         vm_object_t             object;
16359         vm_map_version_t        version;
16360         boolean_t               src_needs_copy;
16361         boolean_t               new_entry_needs_copy;
16362         vm_map_entry_t          saved_src_entry;
16363         boolean_t               src_entry_was_wired;
16364         vm_prot_t               max_prot_for_prot_copy;
16365         vm_map_offset_t         effective_page_mask;
16366         boolean_t               pageable, same_map;
16367
16368         pageable = vmk_flags.vmkf_copy_pageable;
16369         same_map = vmk_flags.vmkf_copy_same_map;
16370
16371         effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16372
16373         assert(map != VM_MAP_NULL);
16374         assert(size != 0);
16375         assert(size == vm_map_round_page(size, effective_page_mask));
16376         assert(inheritance == VM_INHERIT_NONE ||
16377             inheritance == VM_INHERIT_COPY ||
16378             inheritance == VM_INHERIT_SHARE);
16379         assert(!(required_protection & ~VM_PROT_ALL));
16380
16381         /*
16382          *      Compute start and end of region.
16383          */
16384         src_start = vm_map_trunc_page(addr, effective_page_mask);
16385         src_end = vm_map_round_page(src_start + size, effective_page_mask);
16386
16387         /*
16388          *      Initialize map_header.
16389          */
16390         map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16391         map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16392         map_header->nentries = 0;
16393         map_header->entries_pageable = pageable;
16394 //      map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16395         map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16396         map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16397
16398         vm_map_store_init( map_header );
16399
16400         if (copy && vmk_flags.vmkf_remap_prot_copy) {
16401                 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
16402         } else {
16403                 max_prot_for_prot_copy = VM_PROT_NONE;
16404         }
16405         *cur_protection = VM_PROT_ALL;
16406         *max_protection = VM_PROT_ALL;
16407
16408         map_address = 0;
16409         mapped_size = 0;
16410         result = KERN_SUCCESS;
16411
16412         /*
16413          *      The specified source virtual space might correspond to
16414          *      multiple map entries, need to loop on them.
16415          */
16416         vm_map_lock(map);
16417         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16418                 /*
16419                  * This address space uses sub-pages so the range might
16420                  * not be re-mappable in an address space with larger
16421                  * pages. Re-assemble any broken-up VM map entries to
16422                  * improve our chances of making it work.
16423                  */
16424                 vm_map_simplify_range(map, src_start, src_end);
16425         }
16426         while (mapped_size != size) {
16427                 vm_map_size_t   entry_size;
16428
16429                 /*
16430                  *      Find the beginning of the region.
16431                  */
16432                 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16433                         result = KERN_INVALID_ADDRESS;
16434                         break;
16435                 }
16436
16437                 if (src_start < src_entry->vme_start ||
16438                     (mapped_size && src_start != src_entry->vme_start)) {
16439                         result = KERN_INVALID_ADDRESS;
16440                         break;
16441                 }
16442
16443                 tmp_size = size - mapped_size;
16444                 if (src_end > src_entry->vme_end) {
16445                         tmp_size -= (src_end - src_entry->vme_end);
16446                 }
16447
16448                 entry_size = (vm_map_size_t)(src_entry->vme_end -
16449                     src_entry->vme_start);
16450
16451                 if (src_entry->is_sub_map &&
16452                     vmk_flags.vmkf_copy_single_object) {
16453                         vm_map_t submap;
16454                         vm_map_offset_t submap_start;
16455                         vm_map_size_t submap_size;
16456
16457                         /*
16458                          * No check for "required_protection" on "src_entry"
16459                          * because the protections that matter are the ones
16460                          * on the submap's VM map entry, which will be checked
16461                          * during the call to vm_map_remap_extract() below.
16462                          */
16463                         submap_size = src_entry->vme_end - src_start;
16464                         if (submap_size > size) {
16465                                 submap_size = size;
16466                         }
16467                         submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16468                         submap = VME_SUBMAP(src_entry);
16469                         vm_map_reference(submap);
16470                         vm_map_unlock(map);
16471                         src_entry = NULL;
16472                         result = vm_map_remap_extract(submap,
16473                             submap_start,
16474                             submap_size,
16475                             required_protection,
16476                             copy,
16477                             map_header,
16478                             cur_protection,
16479                             max_protection,
16480                             inheritance,
16481                             vmk_flags);
16482                         vm_map_deallocate(submap);
16483                         return result;
16484                 }
16485
16486                 if ((src_entry->protection & required_protection)
16487                     != required_protection) {
16488                         if (vmk_flags.vmkf_copy_single_object &&
16489                             mapped_size != 0) {
16490                                 /*
16491                                  * Single object extraction.
16492                                  * We can't extract more with the required
16493                                  * protection but we've extracted some, so
16494                                  * stop there and declare success.
16495                                  * The caller should check the size of
16496                                  * the copy entry we've extracted.
16497                                  */
16498                                 result = KERN_SUCCESS;
16499                         } else {
16500                                 /*
16501                                  * VM range extraction.
16502                                  * Required proctection is not available
16503                                  * for this part of the range: fail.
16504                                  */
16505                                 result = KERN_PROTECTION_FAILURE;
16506                         }
16507                         break;
16508                 }
16509
16510                 if (src_entry->is_sub_map &&
16511                     VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry)) < PAGE_SHIFT) {
16512                         vm_map_t submap;
16513                         vm_map_offset_t submap_start;
16514                         vm_map_size_t submap_size;
16515                         vm_map_copy_t submap_copy;
16516                         vm_prot_t submap_curprot, submap_maxprot;
16517
16518                         vm_remap_new_path++;
16519
16520                         /*
16521                          * No check for "required_protection" on "src_entry"
16522                          * because the protections that matter are the ones
16523                          * on the submap's VM map entry, which will be checked
16524                          * during the call to vm_map_copy_extract() below.
16525                          */
16526                         object = VM_OBJECT_NULL;
16527                         submap_copy = VM_MAP_COPY_NULL;
16528
16529                         /* find equivalent range in the submap */
16530                         submap = VME_SUBMAP(src_entry);
16531                         submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16532                         submap_size = tmp_size;
16533                         /* extra ref to keep submap alive */
16534                         vm_map_reference(submap);
16535
16536                         DTRACE_VM6(remap_submap_recurse,
16537                             vm_map_t, map,
16538                             vm_map_offset_t, addr,
16539                             vm_map_size_t, size,
16540                             boolean_t, copy,
16541                             vm_map_offset_t, submap_start,
16542                             vm_map_size_t, submap_size);
16543
16544                         /*
16545                          * The map can be safely unlocked since we
16546                          * already hold a reference on the submap.
16547                          *
16548                          * No timestamp since we don't care if the map
16549                          * gets modified while we're down in the submap.
16550                          * We'll resume the extraction at src_start + tmp_size
16551                          * anyway.
16552                          */
16553                         vm_map_unlock(map);
16554                         src_entry = NULL; /* not valid once map is unlocked */
16555
16556                         result = vm_map_copy_extract(submap,
16557                             submap_start,
16558                             submap_size,
16559                             required_protection,
16560                             copy,
16561                             &submap_copy,
16562                             &submap_curprot,
16563                             &submap_maxprot,
16564                             inheritance,
16565                             vmk_flags);
16566
16567                         /* release extra ref on submap */
16568                         vm_map_deallocate(submap);
16569                         submap = VM_MAP_NULL;
16570
16571                         if (result != KERN_SUCCESS) {
16572                                 vm_map_lock(map);
16573                                 break;
16574                         }
16575
16576                         /* transfer submap_copy entries to map_header */
16577                         while (vm_map_copy_first_entry(submap_copy) !=
16578                             vm_map_copy_to_entry(submap_copy)) {
16579                                 vm_map_entry_t copy_entry;
16580                                 vm_map_size_t copy_entry_size;
16581
16582                                 copy_entry = vm_map_copy_first_entry(submap_copy);
16583                                 assert(!copy_entry->is_sub_map);
16584                                 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16585                                 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16586                                 copy_entry->vme_start = map_address;
16587                                 copy_entry->vme_end = map_address + copy_entry_size;
16588                                 map_address += copy_entry_size;
16589                                 mapped_size += copy_entry_size;
16590                                 src_start += copy_entry_size;
16591                                 assert(src_start <= src_end);
16592                                 _vm_map_store_entry_link(map_header,
16593                                     map_header->links.prev,
16594                                     copy_entry);
16595                         }
16596                         /* done with submap_copy */
16597                         vm_map_copy_discard(submap_copy);
16598
16599                         *cur_protection &= submap_curprot;
16600                         *max_protection &= submap_maxprot;
16601
16602                         /* re-acquire the map lock and continue to next entry */
16603                         vm_map_lock(map);
16604                         continue;
16605                 } else if (src_entry->is_sub_map) {
16606                         vm_remap_old_path++;
16607                         DTRACE_VM4(remap_submap,
16608                             vm_map_t, map,
16609                             vm_map_offset_t, addr,
16610                             vm_map_size_t, size,
16611                             boolean_t, copy);
16612
16613                         vm_map_reference(VME_SUBMAP(src_entry));
16614                         object = VM_OBJECT_NULL;
16615                 } else {
16616                         object = VME_OBJECT(src_entry);
16617                         if (src_entry->iokit_acct) {
16618                                 /*
16619                                  * This entry uses "IOKit accounting".
16620                                  */
16621                         } else if (object != VM_OBJECT_NULL &&
16622                             (object->purgable != VM_PURGABLE_DENY ||
16623                             object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16624                                 /*
16625                                  * Purgeable objects have their own accounting:
16626                                  * no pmap accounting for them.
16627                                  */
16628                                 assertf(!src_entry->use_pmap,
16629                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16630                                     map,
16631                                     src_entry,
16632                                     (uint64_t)src_entry->vme_start,
16633                                     (uint64_t)src_entry->vme_end,
16634                                     src_entry->protection,
16635                                     src_entry->max_protection,
16636                                     VME_ALIAS(src_entry));
16637                         } else {
16638                                 /*
16639                                  * Not IOKit or purgeable:
16640                                  * must be accounted by pmap stats.
16641                                  */
16642                                 assertf(src_entry->use_pmap,
16643                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16644                                     map,
16645                                     src_entry,
16646                                     (uint64_t)src_entry->vme_start,
16647                                     (uint64_t)src_entry->vme_end,
16648                                     src_entry->protection,
16649                                     src_entry->max_protection,
16650                                     VME_ALIAS(src_entry));
16651                         }
16652
16653                         if (object == VM_OBJECT_NULL) {
16654                                 assert(!src_entry->needs_copy);
16655                                 object = vm_object_allocate(entry_size);
16656                                 VME_OFFSET_SET(src_entry, 0);
16657                                 VME_OBJECT_SET(src_entry, object);
16658                                 assert(src_entry->use_pmap);
16659                         } else if (src_entry->wired_count ||
16660                             object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16661                                 /*
16662                                  * A wired memory region should not have
16663                                  * any pending copy-on-write and needs to
16664                                  * keep pointing at the VM object that
16665                                  * contains the wired pages.
16666                                  * If we're sharing this memory (copy=false),
16667                                  * we'll share this VM object.
16668                                  * If we're copying this memory (copy=true),
16669                                  * we'll call vm_object_copy_slowly() below
16670                                  * and use the new VM object for the remapping.
16671                                  *
16672                                  * Or, we are already using an asymmetric
16673                                  * copy, and therefore we already have
16674                                  * the right object.
16675                                  */
16676                                 assert(!src_entry->needs_copy);
16677                         } else if (src_entry->needs_copy || object->shadowed ||
16678                             (object->internal && !object->true_share &&
16679                             !src_entry->is_shared &&
16680                             object->vo_size > entry_size)) {
16681                                 VME_OBJECT_SHADOW(src_entry, entry_size);
16682                                 assert(src_entry->use_pmap);
16683
16684                                 if (!src_entry->needs_copy &&
16685                                     (src_entry->protection & VM_PROT_WRITE)) {
16686                                         vm_prot_t prot;
16687
16688                                         assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16689
16690                                         prot = src_entry->protection & ~VM_PROT_WRITE;
16691
16692                                         if (override_nx(map,
16693                                             VME_ALIAS(src_entry))
16694                                             && prot) {
16695                                                 prot |= VM_PROT_EXECUTE;
16696                                         }
16697
16698                                         assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16699
16700                                         if (map->mapped_in_other_pmaps) {
16701                                                 vm_object_pmap_protect(
16702                                                         VME_OBJECT(src_entry),
16703                                                         VME_OFFSET(src_entry),
16704                                                         entry_size,
16705                                                         PMAP_NULL,
16706                                                         PAGE_SIZE,
16707                                                         src_entry->vme_start,
16708                                                         prot);
16709 #if MACH_ASSERT
16710                                         } else if (__improbable(map->pmap == PMAP_NULL)) {
16711                                                 extern boolean_t vm_tests_in_progress;
16712                                                 assert(vm_tests_in_progress);
16713                                                 /*
16714                                                  * Some VM tests (in vm_tests.c)
16715                                                  * sometimes want to use a VM
16716                                                  * map without a pmap.
16717                                                  * Otherwise, this should never
16718                                                  * happen.
16719                                                  */
16720 #endif /* MACH_ASSERT */
16721                                         } else {
16722                                                 pmap_protect(vm_map_pmap(map),
16723                                                     src_entry->vme_start,
16724                                                     src_entry->vme_end,
16725                                                     prot);
16726                                         }
16727                                 }
16728
16729                                 object = VME_OBJECT(src_entry);
16730                                 src_entry->needs_copy = FALSE;
16731                         }
16732
16733
16734                         vm_object_lock(object);
16735                         vm_object_reference_locked(object); /* object ref. for new entry */
16736                         assert(!src_entry->needs_copy);
16737                         if (object->copy_strategy ==
16738                             MEMORY_OBJECT_COPY_SYMMETRIC) {
16739                                 /*
16740                                  * If we want to share this object (copy==0),
16741                                  * it needs to be COPY_DELAY.
16742                                  * If we want to copy this object (copy==1),
16743                                  * we can't just set "needs_copy" on our side
16744                                  * and expect the other side to do the same
16745                                  * (symmetrically), so we can't let the object
16746                                  * stay COPY_SYMMETRIC.
16747                                  * So we always switch from COPY_SYMMETRIC to
16748                                  * COPY_DELAY.
16749                                  */
16750                                 object->copy_strategy =
16751                                     MEMORY_OBJECT_COPY_DELAY;
16752                         }
16753                         vm_object_unlock(object);
16754                 }
16755
16756                 offset = (VME_OFFSET(src_entry) +
16757                     (src_start - src_entry->vme_start));
16758
16759                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
16760                 vm_map_entry_copy(map, new_entry, src_entry);
16761                 if (new_entry->is_sub_map) {
16762                         /* clr address space specifics */
16763                         new_entry->use_pmap = FALSE;
16764                 } else if (copy) {
16765                         /*
16766                          * We're dealing with a copy-on-write operation,
16767                          * so the resulting mapping should not inherit the
16768                          * original mapping's accounting settings.
16769                          * "use_pmap" should be reset to its default (TRUE)
16770                          * so that the new mapping gets accounted for in
16771                          * the task's memory footprint.
16772                          */
16773                         new_entry->use_pmap = TRUE;
16774                 }
16775                 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16776                 assert(!new_entry->iokit_acct);
16777
16778                 new_entry->map_aligned = FALSE;
16779
16780                 new_entry->vme_start = map_address;
16781                 new_entry->vme_end = map_address + tmp_size;
16782                 assert(new_entry->vme_start < new_entry->vme_end);
16783                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16784                         /*
16785                          * Remapping for vm_map_protect(VM_PROT_COPY)
16786                          * to convert a read-only mapping into a
16787                          * copy-on-write version of itself but
16788                          * with write access:
16789                          * keep the original inheritance and add
16790                          * VM_PROT_WRITE to the max protection.
16791                          */
16792                         new_entry->inheritance = src_entry->inheritance;
16793                         new_entry->protection &= max_prot_for_prot_copy;
16794                         new_entry->max_protection |= VM_PROT_WRITE;
16795                 } else {
16796                         new_entry->inheritance = inheritance;
16797                 }
16798                 VME_OFFSET_SET(new_entry, offset);
16799
16800                 /*
16801                  * The new region has to be copied now if required.
16802                  */
16803 RestartCopy:
16804                 if (!copy) {
16805                         if (src_entry->used_for_jit == TRUE) {
16806                                 if (same_map) {
16807 #if __APRR_SUPPORTED__
16808                                         /*
16809                                          * Disallow re-mapping of any JIT regions on APRR devices.
16810                                          */
16811                                         result = KERN_PROTECTION_FAILURE;
16812                                         break;
16813 #endif /* __APRR_SUPPORTED__*/
16814                                 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
16815                                         /*
16816                                          * Cannot allow an entry describing a JIT
16817                                          * region to be shared across address spaces.
16818                                          */
16819                                         result = KERN_INVALID_ARGUMENT;
16820                                         break;
16821                                 }
16822                         }
16823
16824                         src_entry->is_shared = TRUE;
16825                         new_entry->is_shared = TRUE;
16826                         if (!(new_entry->is_sub_map)) {
16827                                 new_entry->needs_copy = FALSE;
16828                         }
16829                 } else if (src_entry->is_sub_map) {
16830                         /* make this a COW sub_map if not already */
16831                         assert(new_entry->wired_count == 0);
16832                         new_entry->needs_copy = TRUE;
16833                         object = VM_OBJECT_NULL;
16834                 } else if (src_entry->wired_count == 0 &&
16835                     !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
16836                     vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
16837                     VME_OFFSET(new_entry),
16838                     (new_entry->vme_end -
16839                     new_entry->vme_start),
16840                     &src_needs_copy,
16841                     &new_entry_needs_copy)) {
16842                         new_entry->needs_copy = new_entry_needs_copy;
16843                         new_entry->is_shared = FALSE;
16844                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16845
16846                         /*
16847                          * Handle copy_on_write semantics.
16848                          */
16849                         if (src_needs_copy && !src_entry->needs_copy) {
16850                                 vm_prot_t prot;
16851
16852                                 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16853
16854                                 prot = src_entry->protection & ~VM_PROT_WRITE;
16855
16856                                 if (override_nx(map,
16857                                     VME_ALIAS(src_entry))
16858                                     && prot) {
16859                                         prot |= VM_PROT_EXECUTE;
16860                                 }
16861
16862                                 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16863
16864                                 vm_object_pmap_protect(object,
16865                                     offset,
16866                                     entry_size,
16867                                     ((src_entry->is_shared
16868                                     || map->mapped_in_other_pmaps) ?
16869                                     PMAP_NULL : map->pmap),
16870                                     VM_MAP_PAGE_SIZE(map),
16871                                     src_entry->vme_start,
16872                                     prot);
16873
16874                                 assert(src_entry->wired_count == 0);
16875                                 src_entry->needs_copy = TRUE;
16876                         }
16877                         /*
16878                          * Throw away the old object reference of the new entry.
16879                          */
16880                         vm_object_deallocate(object);
16881                 } else {
16882                         new_entry->is_shared = FALSE;
16883                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16884
16885                         src_entry_was_wired = (src_entry->wired_count > 0);
16886                         saved_src_entry = src_entry;
16887                         src_entry = VM_MAP_ENTRY_NULL;
16888
16889                         /*
16890                          * The map can be safely unlocked since we
16891                          * already hold a reference on the object.
16892                          *
16893                          * Record the timestamp of the map for later
16894                          * verification, and unlock the map.
16895                          */
16896                         version.main_timestamp = map->timestamp;
16897                         vm_map_unlock(map);     /* Increments timestamp once! */
16898
16899                         /*
16900                          * Perform the copy.
16901                          */
16902                         if (src_entry_was_wired > 0 ||
16903                             (debug4k_no_cow_copyin &&
16904                             VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
16905                                 vm_object_lock(object);
16906                                 result = vm_object_copy_slowly(
16907                                         object,
16908                                         offset,
16909                                         (new_entry->vme_end -
16910                                         new_entry->vme_start),
16911                                         THREAD_UNINT,
16912                                         VME_OBJECT_PTR(new_entry));
16913
16914                                 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
16915                                 new_entry->needs_copy = FALSE;
16916                         } else {
16917                                 vm_object_offset_t new_offset;
16918
16919                                 new_offset = VME_OFFSET(new_entry);
16920                                 result = vm_object_copy_strategically(
16921                                         object,
16922                                         offset,
16923                                         (new_entry->vme_end -
16924                                         new_entry->vme_start),
16925                                         VME_OBJECT_PTR(new_entry),
16926                                         &new_offset,
16927                                         &new_entry_needs_copy);
16928                                 if (new_offset != VME_OFFSET(new_entry)) {
16929                                         VME_OFFSET_SET(new_entry, new_offset);
16930                                 }
16931
16932                                 new_entry->needs_copy = new_entry_needs_copy;
16933                         }
16934
16935                         /*
16936                          * Throw away the old object reference of the new entry.
16937                          */
16938                         vm_object_deallocate(object);
16939
16940                         if (result != KERN_SUCCESS &&
16941                             result != KERN_MEMORY_RESTART_COPY) {
16942                                 _vm_map_entry_dispose(map_header, new_entry);
16943                                 vm_map_lock(map);
16944                                 break;
16945                         }
16946
16947                         /*
16948                          * Verify that the map has not substantially
16949                          * changed while the copy was being made.
16950                          */
16951
16952                         vm_map_lock(map);
16953                         if (version.main_timestamp + 1 != map->timestamp) {
16954                                 /*
16955                                  * Simple version comparison failed.
16956                                  *
16957                                  * Retry the lookup and verify that the
16958                                  * same object/offset are still present.
16959                                  */
16960                                 saved_src_entry = VM_MAP_ENTRY_NULL;
16961                                 vm_object_deallocate(VME_OBJECT(new_entry));
16962                                 _vm_map_entry_dispose(map_header, new_entry);
16963                                 if (result == KERN_MEMORY_RESTART_COPY) {
16964                                         result = KERN_SUCCESS;
16965                                 }
16966                                 continue;
16967                         }
16968                         /* map hasn't changed: src_entry is still valid */
16969                         src_entry = saved_src_entry;
16970                         saved_src_entry = VM_MAP_ENTRY_NULL;
16971
16972                         if (result == KERN_MEMORY_RESTART_COPY) {
16973                                 vm_object_reference(object);
16974                                 goto RestartCopy;
16975                         }
16976                 }
16977
16978                 _vm_map_store_entry_link(map_header,
16979                     map_header->links.prev, new_entry);
16980
16981                 /*Protections for submap mapping are irrelevant here*/
16982                 if (!src_entry->is_sub_map) {
16983                         *cur_protection &= src_entry->protection;
16984                         *max_protection &= src_entry->max_protection;
16985                 }
16986
16987                 map_address += tmp_size;
16988                 mapped_size += tmp_size;
16989                 src_start += tmp_size;
16990
16991                 if (vmk_flags.vmkf_copy_single_object) {
16992                         if (mapped_size != size) {
16993                                 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
16994                                 if (src_entry->vme_next != vm_map_to_entry(map) &&
16995                                     VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
16996                                         /* XXX TODO4K */
16997                                         DEBUG4K_ERROR("could have extended copy to next entry...\n");
16998                                 }
16999                         }
17000                         break;
17001                 }
17002         } /* end while */
17003
17004         vm_map_unlock(map);
17005         if (result != KERN_SUCCESS) {
17006                 /*
17007                  * Free all allocated elements.
17008                  */
17009                 for (src_entry = map_header->links.next;
17010                     src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17011                     src_entry = new_entry) {
17012                         new_entry = src_entry->vme_next;
17013                         _vm_map_store_entry_unlink(map_header, src_entry);
17014                         if (src_entry->is_sub_map) {
17015                                 vm_map_deallocate(VME_SUBMAP(src_entry));
17016                         } else {
17017                                 vm_object_deallocate(VME_OBJECT(src_entry));
17018                         }
17019                         _vm_map_entry_dispose(map_header, src_entry);
17020                 }
17021         }
17022         return result;
17023 }
17024
17025 bool
17026 vm_map_is_exotic(
17027         vm_map_t map)
17028 {
17029         return VM_MAP_IS_EXOTIC(map);
17030 }
17031
17032 bool
17033 vm_map_is_alien(
17034         vm_map_t map)
17035 {
17036         return VM_MAP_IS_ALIEN(map);
17037 }
17038
17039 #if XNU_TARGET_OS_OSX
17040 void
17041 vm_map_mark_alien(
17042         vm_map_t map)
17043 {
17044         vm_map_lock(map);
17045         map->is_alien = true;
17046         vm_map_unlock(map);
17047 }
17048 #endif /* XNU_TARGET_OS_OSX */
17049
17050 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
17051 void
17052 vm_map_copy_to_physcopy(
17053         vm_map_copy_t   copy_map,
17054         vm_map_t        target_map)
17055 {
17056         vm_map_size_t           size;
17057         vm_map_entry_t          entry;
17058         vm_map_entry_t          new_entry;
17059         vm_object_t             new_object;
17060         unsigned int            pmap_flags;
17061         pmap_t                  new_pmap;
17062         vm_map_t                new_map;
17063         vm_map_address_t        src_start, src_end, src_cur;
17064         vm_map_address_t        dst_start, dst_end, dst_cur;
17065         kern_return_t           kr;
17066         void                    *kbuf;
17067
17068         /*
17069          * Perform the equivalent of vm_allocate() and memcpy().
17070          * Replace the mappings in "copy_map" with the newly allocated mapping.
17071          */
17072         DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17073
17074         assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17075
17076         /* allocate new VM object */
17077         size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17078         new_object = vm_object_allocate(size);
17079         assert(new_object);
17080
17081         /* allocate new VM map entry */
17082         new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17083         assert(new_entry);
17084
17085         /* finish initializing new VM map entry */
17086         new_entry->protection = VM_PROT_DEFAULT;
17087         new_entry->max_protection = VM_PROT_DEFAULT;
17088         new_entry->use_pmap = TRUE;
17089
17090         /* make new VM map entry point to new VM object */
17091         new_entry->vme_start = 0;
17092         new_entry->vme_end = size;
17093         VME_OBJECT_SET(new_entry, new_object);
17094         VME_OFFSET_SET(new_entry, 0);
17095
17096         /* create a new pmap to map "copy_map" */
17097         pmap_flags = 0;
17098         assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17099 #if PMAP_CREATE_FORCE_4K_PAGES
17100         pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17101 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17102         pmap_flags |= PMAP_CREATE_64BIT;
17103         new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17104         assert(new_pmap);
17105
17106         /* create a new pageable VM map to map "copy_map" */
17107         new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17108         assert(new_map);
17109         vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17110
17111         /* map "copy_map" in the new VM map */
17112         src_start = 0;
17113         kr = vm_map_copyout_internal(
17114                 new_map,
17115                 &src_start,
17116                 copy_map,
17117                 copy_map->size,
17118                 FALSE, /* consume_on_success */
17119                 VM_PROT_DEFAULT,
17120                 VM_PROT_DEFAULT,
17121                 VM_INHERIT_DEFAULT);
17122         assert(kr == KERN_SUCCESS);
17123         src_end = src_start + copy_map->size;
17124
17125         /* map "new_object" in the new VM map */
17126         vm_object_reference(new_object);
17127         dst_start = 0;
17128         kr = vm_map_enter(new_map,
17129             &dst_start,
17130             size,
17131             0,               /* mask */
17132             VM_FLAGS_ANYWHERE,
17133             VM_MAP_KERNEL_FLAGS_NONE,
17134             VM_KERN_MEMORY_OSFMK,
17135             new_object,
17136             0,               /* offset */
17137             FALSE,               /* needs copy */
17138             VM_PROT_DEFAULT,
17139             VM_PROT_DEFAULT,
17140             VM_INHERIT_DEFAULT);
17141         assert(kr == KERN_SUCCESS);
17142         dst_end = dst_start + size;
17143
17144         /* get a kernel buffer */
17145         kbuf = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK);
17146         assert(kbuf);
17147
17148         /* physically copy "copy_map" mappings to new VM object */
17149         for (src_cur = src_start, dst_cur = dst_start;
17150             src_cur < src_end;
17151             src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17152                 vm_size_t bytes;
17153
17154                 bytes = PAGE_SIZE;
17155                 if (src_cur + PAGE_SIZE > src_end) {
17156                         /* partial copy for last page */
17157                         bytes = src_end - src_cur;
17158                         assert(bytes > 0 && bytes < PAGE_SIZE);
17159                         /* rest of dst page should be zero-filled */
17160                 }
17161                 /* get bytes from src mapping */
17162                 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17163                 if (kr != KERN_SUCCESS) {
17164                         DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17165                 }
17166                 /* put bytes in dst mapping */
17167                 assert(dst_cur < dst_end);
17168                 assert(dst_cur + bytes <= dst_end);
17169                 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17170                 if (kr != KERN_SUCCESS) {
17171                         DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17172                 }
17173         }
17174
17175         /* free kernel buffer */
17176         kheap_free(KHEAP_TEMP, kbuf, PAGE_SIZE);
17177         kbuf = NULL;
17178
17179         /* destroy new map */
17180         vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17181         new_map = VM_MAP_NULL;
17182
17183         /* dispose of the old map entries in "copy_map" */
17184         while (vm_map_copy_first_entry(copy_map) !=
17185             vm_map_copy_to_entry(copy_map)) {
17186                 entry = vm_map_copy_first_entry(copy_map);
17187                 vm_map_copy_entry_unlink(copy_map, entry);
17188                 if (entry->is_sub_map) {
17189                         vm_map_deallocate(VME_SUBMAP(entry));
17190                 } else {
17191                         vm_object_deallocate(VME_OBJECT(entry));
17192                 }
17193                 vm_map_copy_entry_dispose(copy_map, entry);
17194         }
17195
17196         /* change "copy_map"'s page_size to match "target_map" */
17197         copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17198         copy_map->offset = 0;
17199         copy_map->size = size;
17200
17201         /* insert new map entry in "copy_map" */
17202         assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17203         vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17204
17205         DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17206 }
17207
17208 void
17209 vm_map_copy_adjust_get_target_copy_map(
17210         vm_map_copy_t   copy_map,
17211         vm_map_copy_t   *target_copy_map_p);
17212 void
17213 vm_map_copy_adjust_get_target_copy_map(
17214         vm_map_copy_t   copy_map,
17215         vm_map_copy_t   *target_copy_map_p)
17216 {
17217         vm_map_copy_t   target_copy_map;
17218         vm_map_entry_t  entry, target_entry;
17219
17220         if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17221                 /* the caller already has a "target_copy_map": use it */
17222                 return;
17223         }
17224
17225         /* the caller wants us to create a new copy of "copy_map" */
17226         target_copy_map = vm_map_copy_allocate();
17227         target_copy_map->type = copy_map->type;
17228         assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17229         target_copy_map->offset = copy_map->offset;
17230         target_copy_map->size = copy_map->size;
17231         target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17232         vm_map_store_init(&target_copy_map->cpy_hdr);
17233         for (entry = vm_map_copy_first_entry(copy_map);
17234             entry != vm_map_copy_to_entry(copy_map);
17235             entry = entry->vme_next) {
17236                 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17237                 vm_map_entry_copy_full(target_entry, entry);
17238                 if (target_entry->is_sub_map) {
17239                         vm_map_reference(VME_SUBMAP(target_entry));
17240                 } else {
17241                         vm_object_reference(VME_OBJECT(target_entry));
17242                 }
17243                 vm_map_copy_entry_link(
17244                         target_copy_map,
17245                         vm_map_copy_last_entry(target_copy_map),
17246                         target_entry);
17247         }
17248         entry = VM_MAP_ENTRY_NULL;
17249         *target_copy_map_p = target_copy_map;
17250 }
17251
17252 void
17253 vm_map_copy_trim(
17254         vm_map_copy_t   copy_map,
17255         int             new_page_shift,
17256         vm_map_offset_t trim_start,
17257         vm_map_offset_t trim_end);
17258 void
17259 vm_map_copy_trim(
17260         vm_map_copy_t   copy_map,
17261         int             new_page_shift,
17262         vm_map_offset_t trim_start,
17263         vm_map_offset_t trim_end)
17264 {
17265         int             copy_page_shift;
17266         vm_map_entry_t  entry, next_entry;
17267
17268         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17269         assert(copy_map->cpy_hdr.nentries > 0);
17270
17271         trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17272         trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17273
17274         /* use the new page_shift to do the clipping */
17275         copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17276         copy_map->cpy_hdr.page_shift = new_page_shift;
17277
17278         for (entry = vm_map_copy_first_entry(copy_map);
17279             entry != vm_map_copy_to_entry(copy_map);
17280             entry = next_entry) {
17281                 next_entry = entry->vme_next;
17282                 if (entry->vme_end <= trim_start) {
17283                         /* entry fully before trim range: skip */
17284                         continue;
17285                 }
17286                 if (entry->vme_start >= trim_end) {
17287                         /* entry fully after trim range: done */
17288                         break;
17289                 }
17290                 /* clip entry if needed */
17291                 vm_map_copy_clip_start(copy_map, entry, trim_start);
17292                 vm_map_copy_clip_end(copy_map, entry, trim_end);
17293                 /* dispose of entry */
17294                 copy_map->size -= entry->vme_end - entry->vme_start;
17295                 vm_map_copy_entry_unlink(copy_map, entry);
17296                 if (entry->is_sub_map) {
17297                         vm_map_deallocate(VME_SUBMAP(entry));
17298                 } else {
17299                         vm_object_deallocate(VME_OBJECT(entry));
17300                 }
17301                 vm_map_copy_entry_dispose(copy_map, entry);
17302                 entry = VM_MAP_ENTRY_NULL;
17303         }
17304
17305         /* restore copy_map's original page_shift */
17306         copy_map->cpy_hdr.page_shift = copy_page_shift;
17307 }
17308
17309 /*
17310  * Make any necessary adjustments to "copy_map" to allow it to be
17311  * mapped into "target_map".
17312  * If no changes were necessary, "target_copy_map" points to the
17313  * untouched "copy_map".
17314  * If changes are necessary, changes will be made to "target_copy_map".
17315  * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17316  * copy the original "copy_map" to it before applying the changes.
17317  * The caller should discard "target_copy_map" if it's not the same as
17318  * the original "copy_map".
17319  */
17320 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17321 kern_return_t
17322 vm_map_copy_adjust_to_target(
17323         vm_map_copy_t           src_copy_map,
17324         vm_map_offset_t         offset,
17325         vm_map_size_t           size,
17326         vm_map_t                target_map,
17327         boolean_t               copy,
17328         vm_map_copy_t           *target_copy_map_p,
17329         vm_map_offset_t         *overmap_start_p,
17330         vm_map_offset_t         *overmap_end_p,
17331         vm_map_offset_t         *trimmed_start_p)
17332 {
17333         vm_map_copy_t           copy_map, target_copy_map;
17334         vm_map_size_t           target_size;
17335         vm_map_size_t           src_copy_map_size;
17336         vm_map_size_t           overmap_start, overmap_end;
17337         int                     misalignments;
17338         vm_map_entry_t          entry, target_entry;
17339         vm_map_offset_t         addr_adjustment;
17340         vm_map_offset_t         new_start, new_end;
17341         int                     copy_page_mask, target_page_mask;
17342         int                     copy_page_shift, target_page_shift;
17343         vm_map_offset_t         trimmed_end;
17344
17345         /*
17346          * Assert that the vm_map_copy is coming from the right
17347          * zone and hasn't been forged
17348          */
17349         vm_map_copy_require(src_copy_map);
17350         assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17351
17352         /*
17353          * Start working with "src_copy_map" but we'll switch
17354          * to "target_copy_map" as soon as we start making adjustments.
17355          */
17356         copy_map = src_copy_map;
17357         src_copy_map_size = src_copy_map->size;
17358
17359         copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17360         copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17361         target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17362         target_page_mask = VM_MAP_PAGE_MASK(target_map);
17363
17364         DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17365
17366         target_copy_map = *target_copy_map_p;
17367         if (target_copy_map != VM_MAP_COPY_NULL) {
17368                 vm_map_copy_require(target_copy_map);
17369         }
17370
17371         if (offset + size > copy_map->size) {
17372                 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17373                 return KERN_INVALID_ARGUMENT;
17374         }
17375
17376         /* trim the end */
17377         trimmed_end = 0;
17378         new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17379         if (new_end < copy_map->size) {
17380                 trimmed_end = src_copy_map_size - new_end;
17381                 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17382                 /* get "target_copy_map" if needed and adjust it */
17383                 vm_map_copy_adjust_get_target_copy_map(copy_map,
17384                     &target_copy_map);
17385                 copy_map = target_copy_map;
17386                 vm_map_copy_trim(target_copy_map, target_page_shift,
17387                     new_end, copy_map->size);
17388         }
17389
17390         /* trim the start */
17391         new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17392         if (new_start != 0) {
17393                 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17394                 /* get "target_copy_map" if needed and adjust it */
17395                 vm_map_copy_adjust_get_target_copy_map(copy_map,
17396                     &target_copy_map);
17397                 copy_map = target_copy_map;
17398                 vm_map_copy_trim(target_copy_map, target_page_shift,
17399                     0, new_start);
17400         }
17401         *trimmed_start_p = new_start;
17402
17403         /* target_size starts with what's left after trimming */
17404         target_size = copy_map->size;
17405         assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17406             "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17407             (uint64_t)target_size, (uint64_t)src_copy_map_size,
17408             (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17409
17410         /* check for misalignments but don't adjust yet */
17411         misalignments = 0;
17412         overmap_start = 0;
17413         overmap_end = 0;
17414         if (copy_page_shift < target_page_shift) {
17415                 /*
17416                  * Remapping from 4K to 16K: check the VM object alignments
17417                  * throughout the range.
17418                  * If the start and end of the range are mis-aligned, we can
17419                  * over-map to re-align, and adjust the "overmap" start/end
17420                  * and "target_size" of the range accordingly.
17421                  * If there is any mis-alignment within the range:
17422                  *     if "copy":
17423                  *         we can do immediate-copy instead of copy-on-write,
17424                  *     else:
17425                  *         no way to remap and share; fail.
17426                  */
17427                 for (entry = vm_map_copy_first_entry(copy_map);
17428                     entry != vm_map_copy_to_entry(copy_map);
17429                     entry = entry->vme_next) {
17430                         vm_object_offset_t object_offset_start, object_offset_end;
17431
17432                         object_offset_start = VME_OFFSET(entry);
17433                         object_offset_end = object_offset_start;
17434                         object_offset_end += entry->vme_end - entry->vme_start;
17435                         if (object_offset_start & target_page_mask) {
17436                                 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17437                                         overmap_start++;
17438                                 } else {
17439                                         misalignments++;
17440                                 }
17441                         }
17442                         if (object_offset_end & target_page_mask) {
17443                                 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17444                                         overmap_end++;
17445                                 } else {
17446                                         misalignments++;
17447                                 }
17448                         }
17449                 }
17450         }
17451         entry = VM_MAP_ENTRY_NULL;
17452
17453         /* decide how to deal with misalignments */
17454         assert(overmap_start <= 1);
17455         assert(overmap_end <= 1);
17456         if (!overmap_start && !overmap_end && !misalignments) {
17457                 /* copy_map is properly aligned for target_map ... */
17458                 if (*trimmed_start_p) {
17459                         /* ... but we trimmed it, so still need to adjust */
17460                 } else {
17461                         /* ... and we didn't trim anything: we're done */
17462                         if (target_copy_map == VM_MAP_COPY_NULL) {
17463                                 target_copy_map = copy_map;
17464                         }
17465                         *target_copy_map_p = target_copy_map;
17466                         *overmap_start_p = 0;
17467                         *overmap_end_p = 0;
17468                         DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17469                         return KERN_SUCCESS;
17470                 }
17471         } else if (misalignments && !copy) {
17472                 /* can't "share" if misaligned */
17473                 DEBUG4K_ADJUST("unsupported sharing\n");
17474 #if MACH_ASSERT
17475                 if (debug4k_panic_on_misaligned_sharing) {
17476                         panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__, __LINE__);
17477                 }
17478 #endif /* MACH_ASSERT */
17479                 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17480                 return KERN_NOT_SUPPORTED;
17481         } else {
17482                 /* can't virtual-copy if misaligned (but can physical-copy) */
17483                 DEBUG4K_ADJUST("mis-aligned copying\n");
17484         }
17485
17486         /* get a "target_copy_map" if needed and switch to it */
17487         vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17488         copy_map = target_copy_map;
17489
17490         if (misalignments && copy) {
17491                 vm_map_size_t target_copy_map_size;
17492
17493                 /*
17494                  * Can't do copy-on-write with misaligned mappings.
17495                  * Replace the mappings with a physical copy of the original
17496                  * mappings' contents.
17497                  */
17498                 target_copy_map_size = target_copy_map->size;
17499                 vm_map_copy_to_physcopy(target_copy_map, target_map);
17500                 *target_copy_map_p = target_copy_map;
17501                 *overmap_start_p = 0;
17502                 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17503                 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17504                 return KERN_SUCCESS;
17505         }
17506
17507         /* apply the adjustments */
17508         misalignments = 0;
17509         overmap_start = 0;
17510         overmap_end = 0;
17511         /* remove copy_map->offset, so that everything starts at offset 0 */
17512         addr_adjustment = copy_map->offset;
17513         /* also remove whatever we trimmed from the start */
17514         addr_adjustment += *trimmed_start_p;
17515         for (target_entry = vm_map_copy_first_entry(target_copy_map);
17516             target_entry != vm_map_copy_to_entry(target_copy_map);
17517             target_entry = target_entry->vme_next) {
17518                 vm_object_offset_t object_offset_start, object_offset_end;
17519
17520                 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17521                 object_offset_start = VME_OFFSET(target_entry);
17522                 if (object_offset_start & target_page_mask) {
17523                         DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17524                         if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17525                                 /*
17526                                  * start of 1st entry is mis-aligned:
17527                                  * re-adjust by over-mapping.
17528                                  */
17529                                 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17530                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17531                                 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17532                         } else {
17533                                 misalignments++;
17534                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17535                                 assert(copy);
17536                         }
17537                 }
17538
17539                 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17540                         target_size += overmap_start;
17541                 } else {
17542                         target_entry->vme_start += overmap_start;
17543                 }
17544                 target_entry->vme_end += overmap_start;
17545
17546                 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17547                 if (object_offset_end & target_page_mask) {
17548                         DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17549                         if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17550                                 /*
17551                                  * end of last entry is mis-aligned: re-adjust by over-mapping.
17552                                  */
17553                                 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17554                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17555                                 target_entry->vme_end += overmap_end;
17556                                 target_size += overmap_end;
17557                         } else {
17558                                 misalignments++;
17559                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17560                                 assert(copy);
17561                         }
17562                 }
17563                 target_entry->vme_start -= addr_adjustment;
17564                 target_entry->vme_end -= addr_adjustment;
17565                 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17566         }
17567
17568         target_copy_map->size = target_size;
17569         target_copy_map->offset += overmap_start;
17570         target_copy_map->offset -= addr_adjustment;
17571         target_copy_map->cpy_hdr.page_shift = target_page_shift;
17572
17573 //      assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17574 //      assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17575         assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17576         assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17577
17578         *target_copy_map_p = target_copy_map;
17579         *overmap_start_p = overmap_start;
17580         *overmap_end_p = overmap_end;
17581
17582         DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17583         return KERN_SUCCESS;
17584 }
17585
17586 kern_return_t
17587 vm_map_range_physical_size(
17588         vm_map_t         map,
17589         vm_map_address_t start,
17590         mach_vm_size_t   size,
17591         mach_vm_size_t * phys_size)
17592 {
17593         kern_return_t   kr;
17594         vm_map_copy_t   copy_map, target_copy_map;
17595         vm_map_offset_t adjusted_start, adjusted_end;
17596         vm_map_size_t   adjusted_size;
17597         vm_prot_t       cur_prot, max_prot;
17598         vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17599         vm_map_kernel_flags_t vmk_flags;
17600
17601         adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17602         adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17603         adjusted_size = adjusted_end - adjusted_start;
17604         *phys_size = adjusted_size;
17605         if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17606                 return KERN_SUCCESS;
17607         }
17608         if (start == 0) {
17609                 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17610                 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17611                 adjusted_size = adjusted_end - adjusted_start;
17612                 *phys_size = adjusted_size;
17613                 return KERN_SUCCESS;
17614         }
17615         if (adjusted_size == 0) {
17616                 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17617                 *phys_size = 0;
17618                 return KERN_SUCCESS;
17619         }
17620
17621         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17622         vmk_flags.vmkf_copy_pageable = TRUE;
17623         vmk_flags.vmkf_copy_same_map = TRUE;
17624         assert(adjusted_size != 0);
17625         kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17626             VM_PROT_NONE, /* required_protection: no check here */
17627             FALSE /* copy */,
17628             &copy_map,
17629             &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17630             vmk_flags);
17631         if (kr != KERN_SUCCESS) {
17632                 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17633                 //assert(0);
17634                 *phys_size = 0;
17635                 return kr;
17636         }
17637         assert(copy_map != VM_MAP_COPY_NULL);
17638         target_copy_map = copy_map;
17639         DEBUG4K_ADJUST("adjusting...\n");
17640         kr = vm_map_copy_adjust_to_target(
17641                 copy_map,
17642                 start - adjusted_start, /* offset */
17643                 size, /* size */
17644                 kernel_map,
17645                 FALSE,                          /* copy */
17646                 &target_copy_map,
17647                 &overmap_start,
17648                 &overmap_end,
17649                 &trimmed_start);
17650         if (kr == KERN_SUCCESS) {
17651                 if (target_copy_map->size != *phys_size) {
17652                         DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17653                 }
17654                 *phys_size = target_copy_map->size;
17655         } else {
17656                 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17657                 //assert(0);
17658                 *phys_size = 0;
17659         }
17660         vm_map_copy_discard(copy_map);
17661         copy_map = VM_MAP_COPY_NULL;
17662
17663         return kr;
17664 }
17665
17666
17667 kern_return_t
17668 memory_entry_check_for_adjustment(
17669         vm_map_t                        src_map,
17670         ipc_port_t                      port,
17671         vm_map_offset_t         *overmap_start,
17672         vm_map_offset_t         *overmap_end)
17673 {
17674         kern_return_t kr = KERN_SUCCESS;
17675         vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17676
17677         assert(port);
17678         assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17679
17680         vm_named_entry_t        named_entry;
17681
17682         named_entry = (vm_named_entry_t) port->ip_kobject;
17683         named_entry_lock(named_entry);
17684         copy_map = named_entry->backing.copy;
17685         target_copy_map = copy_map;
17686
17687         if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17688                 vm_map_offset_t trimmed_start;
17689
17690                 trimmed_start = 0;
17691                 DEBUG4K_ADJUST("adjusting...\n");
17692                 kr = vm_map_copy_adjust_to_target(
17693                         copy_map,
17694                         0, /* offset */
17695                         copy_map->size, /* size */
17696                         src_map,
17697                         FALSE, /* copy */
17698                         &target_copy_map,
17699                         overmap_start,
17700                         overmap_end,
17701                         &trimmed_start);
17702                 assert(trimmed_start == 0);
17703         }
17704         named_entry_unlock(named_entry);
17705
17706         return kr;
17707 }
17708
17709
17710 /*
17711  *      Routine:        vm_remap
17712  *
17713  *                      Map portion of a task's address space.
17714  *                      Mapped region must not overlap more than
17715  *                      one vm memory object. Protections and
17716  *                      inheritance attributes remain the same
17717  *                      as in the original task and are out parameters.
17718  *                      Source and Target task can be identical
17719  *                      Other attributes are identical as for vm_map()
17720  */
17721 kern_return_t
17722 vm_map_remap(
17723         vm_map_t                target_map,
17724         vm_map_address_t        *address,
17725         vm_map_size_t           size,
17726         vm_map_offset_t         mask,
17727         int                     flags,
17728         vm_map_kernel_flags_t   vmk_flags,
17729         vm_tag_t                tag,
17730         vm_map_t                src_map,
17731         vm_map_offset_t         memory_address,
17732         boolean_t               copy,
17733         vm_prot_t               *cur_protection,
17734         vm_prot_t               *max_protection,
17735         vm_inherit_t            inheritance)
17736 {
17737         kern_return_t           result;
17738         vm_map_entry_t          entry;
17739         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
17740         vm_map_entry_t          new_entry;
17741         vm_map_copy_t           copy_map;
17742         vm_map_offset_t         offset_in_mapping;
17743         vm_map_size_t           target_size = 0;
17744         vm_map_size_t           src_page_mask, target_page_mask;
17745         vm_map_offset_t         overmap_start, overmap_end, trimmed_start;
17746         vm_map_offset_t         initial_memory_address;
17747         vm_map_size_t           initial_size;
17748
17749         if (target_map == VM_MAP_NULL) {
17750                 return KERN_INVALID_ARGUMENT;
17751         }
17752
17753         initial_memory_address = memory_address;
17754         initial_size = size;
17755         src_page_mask = VM_MAP_PAGE_MASK(src_map);
17756         target_page_mask = VM_MAP_PAGE_MASK(target_map);
17757
17758         switch (inheritance) {
17759         case VM_INHERIT_NONE:
17760         case VM_INHERIT_COPY:
17761         case VM_INHERIT_SHARE:
17762                 if (size != 0 && src_map != VM_MAP_NULL) {
17763                         break;
17764                 }
17765                 OS_FALLTHROUGH;
17766         default:
17767                 return KERN_INVALID_ARGUMENT;
17768         }
17769
17770         if (src_page_mask != target_page_mask) {
17771                 if (copy) {
17772                         DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17773                 } else {
17774                         DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17775                 }
17776         }
17777
17778         /*
17779          * If the user is requesting that we return the address of the
17780          * first byte of the data (rather than the base of the page),
17781          * then we use different rounding semantics: specifically,
17782          * we assume that (memory_address, size) describes a region
17783          * all of whose pages we must cover, rather than a base to be truncated
17784          * down and a size to be added to that base.  So we figure out
17785          * the highest page that the requested region includes and make
17786          * sure that the size will cover it.
17787          *
17788          * The key example we're worried about it is of the form:
17789          *
17790          *              memory_address = 0x1ff0, size = 0x20
17791          *
17792          * With the old semantics, we round down the memory_address to 0x1000
17793          * and round up the size to 0x1000, resulting in our covering *only*
17794          * page 0x1000.  With the new semantics, we'd realize that the region covers
17795          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
17796          * 0x1000 and page 0x2000 in the region we remap.
17797          */
17798         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
17799                 vm_map_offset_t range_start, range_end;
17800
17801                 range_start = vm_map_trunc_page(memory_address, src_page_mask);
17802                 range_end = vm_map_round_page(memory_address + size, src_page_mask);
17803                 memory_address = range_start;
17804                 size = range_end - range_start;
17805                 offset_in_mapping = initial_memory_address - memory_address;
17806         } else {
17807                 /*
17808                  * IMPORTANT:
17809                  * This legacy code path is broken: for the range mentioned
17810                  * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17811                  * two 4k pages, it yields [ memory_address = 0x1000,
17812                  * size = 0x1000 ], which covers only the first 4k page.
17813                  * BUT some code unfortunately depends on this bug, so we
17814                  * can't fix it without breaking something.
17815                  * New code should get automatically opted in the new
17816                  * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17817                  */
17818                 offset_in_mapping = 0;
17819                 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
17820                 size = vm_map_round_page(size, src_page_mask);
17821                 initial_memory_address = memory_address;
17822                 initial_size = size;
17823         }
17824
17825
17826         if (size == 0) {
17827                 return KERN_INVALID_ARGUMENT;
17828         }
17829
17830         if (flags & VM_FLAGS_RESILIENT_MEDIA) {
17831                 /* must be copy-on-write to be "media resilient" */
17832                 if (!copy) {
17833                         return KERN_INVALID_ARGUMENT;
17834                 }
17835         }
17836
17837         vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
17838         vmk_flags.vmkf_copy_same_map = (src_map == target_map);
17839
17840         assert(size != 0);
17841         result = vm_map_copy_extract(src_map,
17842             memory_address,
17843             size,
17844             VM_PROT_NONE, /* required_protection: no check here */
17845             copy, &copy_map,
17846             cur_protection,
17847             max_protection,
17848             inheritance,
17849             vmk_flags);
17850         if (result != KERN_SUCCESS) {
17851                 return result;
17852         }
17853         assert(copy_map != VM_MAP_COPY_NULL);
17854
17855         overmap_start = 0;
17856         overmap_end = 0;
17857         trimmed_start = 0;
17858         target_size = size;
17859         if (src_page_mask != target_page_mask) {
17860                 vm_map_copy_t target_copy_map;
17861
17862                 target_copy_map = copy_map; /* can modify "copy_map" itself */
17863                 DEBUG4K_ADJUST("adjusting...\n");
17864                 result = vm_map_copy_adjust_to_target(
17865                         copy_map,
17866                         offset_in_mapping, /* offset */
17867                         initial_size,
17868                         target_map,
17869                         copy,
17870                         &target_copy_map,
17871                         &overmap_start,
17872                         &overmap_end,
17873                         &trimmed_start);
17874                 if (result != KERN_SUCCESS) {
17875                         DEBUG4K_COPY("failed to adjust 0x%x\n", result);
17876                         vm_map_copy_discard(copy_map);
17877                         return result;
17878                 }
17879                 if (trimmed_start == 0) {
17880                         /* nothing trimmed: no adjustment needed */
17881                 } else if (trimmed_start >= offset_in_mapping) {
17882                         /* trimmed more than offset_in_mapping: nothing left */
17883                         assert(overmap_start == 0);
17884                         assert(overmap_end == 0);
17885                         offset_in_mapping = 0;
17886                 } else {
17887                         /* trimmed some of offset_in_mapping: adjust */
17888                         assert(overmap_start == 0);
17889                         assert(overmap_end == 0);
17890                         offset_in_mapping -= trimmed_start;
17891                 }
17892                 offset_in_mapping += overmap_start;
17893                 target_size = target_copy_map->size;
17894         }
17895
17896         /*
17897          * Allocate/check a range of free virtual address
17898          * space for the target
17899          */
17900         *address = vm_map_trunc_page(*address, target_page_mask);
17901         vm_map_lock(target_map);
17902         target_size = vm_map_round_page(target_size, target_page_mask);
17903         result = vm_map_remap_range_allocate(target_map, address,
17904             target_size,
17905             mask, flags, vmk_flags, tag,
17906             &insp_entry);
17907
17908         for (entry = vm_map_copy_first_entry(copy_map);
17909             entry != vm_map_copy_to_entry(copy_map);
17910             entry = new_entry) {
17911                 new_entry = entry->vme_next;
17912                 vm_map_copy_entry_unlink(copy_map, entry);
17913                 if (result == KERN_SUCCESS) {
17914                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17915                                 /* no codesigning -> read-only access */
17916                                 entry->max_protection = VM_PROT_READ;
17917                                 entry->protection = VM_PROT_READ;
17918                                 entry->vme_resilient_codesign = TRUE;
17919                         }
17920                         entry->vme_start += *address;
17921                         entry->vme_end += *address;
17922                         assert(!entry->map_aligned);
17923                         if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
17924                             !entry->is_sub_map &&
17925                             (VME_OBJECT(entry) == VM_OBJECT_NULL ||
17926                             VME_OBJECT(entry)->internal)) {
17927                                 entry->vme_resilient_media = TRUE;
17928                         }
17929                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
17930                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
17931                         assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
17932                         vm_map_store_entry_link(target_map, insp_entry, entry,
17933                             vmk_flags);
17934                         insp_entry = entry;
17935                 } else {
17936                         if (!entry->is_sub_map) {
17937                                 vm_object_deallocate(VME_OBJECT(entry));
17938                         } else {
17939                                 vm_map_deallocate(VME_SUBMAP(entry));
17940                         }
17941                         vm_map_copy_entry_dispose(copy_map, entry);
17942                 }
17943         }
17944
17945         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17946                 *cur_protection = VM_PROT_READ;
17947                 *max_protection = VM_PROT_READ;
17948         }
17949
17950         if (target_map->disable_vmentry_reuse == TRUE) {
17951                 assert(!target_map->is_nested_map);
17952                 if (target_map->highest_entry_end < insp_entry->vme_end) {
17953                         target_map->highest_entry_end = insp_entry->vme_end;
17954                 }
17955         }
17956
17957         if (result == KERN_SUCCESS) {
17958                 target_map->size += target_size;
17959                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
17960
17961         }
17962         vm_map_unlock(target_map);
17963
17964         if (result == KERN_SUCCESS && target_map->wiring_required) {
17965                 result = vm_map_wire_kernel(target_map, *address,
17966                     *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
17967                     TRUE);
17968         }
17969
17970         /*
17971          * If requested, return the address of the data pointed to by the
17972          * request, rather than the base of the resulting page.
17973          */
17974         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
17975                 *address += offset_in_mapping;
17976         }
17977
17978         if (src_page_mask != target_page_mask) {
17979                 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx  result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
17980         }
17981         vm_map_copy_discard(copy_map);
17982         copy_map = VM_MAP_COPY_NULL;
17983
17984         return result;
17985 }
17986
17987 /*
17988  *      Routine:        vm_map_remap_range_allocate
17989  *
17990  *      Description:
17991  *              Allocate a range in the specified virtual address map.
17992  *              returns the address and the map entry just before the allocated
17993  *              range
17994  *
17995  *      Map must be locked.
17996  */
17997
17998 static kern_return_t
17999 vm_map_remap_range_allocate(
18000         vm_map_t                map,
18001         vm_map_address_t        *address,       /* IN/OUT */
18002         vm_map_size_t           size,
18003         vm_map_offset_t         mask,
18004         int                     flags,
18005         vm_map_kernel_flags_t   vmk_flags,
18006         __unused vm_tag_t       tag,
18007         vm_map_entry_t          *map_entry)     /* OUT */
18008 {
18009         vm_map_entry_t  entry;
18010         vm_map_offset_t start;
18011         vm_map_offset_t end;
18012         vm_map_offset_t desired_empty_end;
18013         kern_return_t   kr;
18014         vm_map_entry_t          hole_entry;
18015
18016 StartAgain:;
18017
18018         start = *address;
18019
18020         if (flags & VM_FLAGS_ANYWHERE) {
18021                 if (flags & VM_FLAGS_RANDOM_ADDR) {
18022                         /*
18023                          * Get a random start address.
18024                          */
18025                         kr = vm_map_random_address_for_size(map, address, size);
18026                         if (kr != KERN_SUCCESS) {
18027                                 return kr;
18028                         }
18029                         start = *address;
18030                 }
18031
18032                 /*
18033                  *      Calculate the first possible address.
18034                  */
18035
18036                 if (start < map->min_offset) {
18037                         start = map->min_offset;
18038                 }
18039                 if (start > map->max_offset) {
18040                         return KERN_NO_SPACE;
18041                 }
18042
18043                 /*
18044                  *      Look for the first possible address;
18045                  *      if there's already something at this
18046                  *      address, we have to start after it.
18047                  */
18048
18049                 if (map->disable_vmentry_reuse == TRUE) {
18050                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
18051                 } else {
18052                         if (map->holelistenabled) {
18053                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18054
18055                                 if (hole_entry == NULL) {
18056                                         /*
18057                                          * No more space in the map?
18058                                          */
18059                                         return KERN_NO_SPACE;
18060                                 } else {
18061                                         boolean_t found_hole = FALSE;
18062
18063                                         do {
18064                                                 if (hole_entry->vme_start >= start) {
18065                                                         start = hole_entry->vme_start;
18066                                                         found_hole = TRUE;
18067                                                         break;
18068                                                 }
18069
18070                                                 if (hole_entry->vme_end > start) {
18071                                                         found_hole = TRUE;
18072                                                         break;
18073                                                 }
18074                                                 hole_entry = hole_entry->vme_next;
18075                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18076
18077                                         if (found_hole == FALSE) {
18078                                                 return KERN_NO_SPACE;
18079                                         }
18080
18081                                         entry = hole_entry;
18082                                 }
18083                         } else {
18084                                 assert(first_free_is_valid(map));
18085                                 if (start == map->min_offset) {
18086                                         if ((entry = map->first_free) != vm_map_to_entry(map)) {
18087                                                 start = entry->vme_end;
18088                                         }
18089                                 } else {
18090                                         vm_map_entry_t  tmp_entry;
18091                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18092                                                 start = tmp_entry->vme_end;
18093                                         }
18094                                         entry = tmp_entry;
18095                                 }
18096                         }
18097                         start = vm_map_round_page(start,
18098                             VM_MAP_PAGE_MASK(map));
18099                 }
18100
18101                 /*
18102                  *      In any case, the "entry" always precedes
18103                  *      the proposed new region throughout the
18104                  *      loop:
18105                  */
18106
18107                 while (TRUE) {
18108                         vm_map_entry_t  next;
18109
18110                         /*
18111                          *      Find the end of the proposed new region.
18112                          *      Be sure we didn't go beyond the end, or
18113                          *      wrap around the address.
18114                          */
18115
18116                         end = ((start + mask) & ~mask);
18117                         end = vm_map_round_page(end,
18118                             VM_MAP_PAGE_MASK(map));
18119                         if (end < start) {
18120                                 return KERN_NO_SPACE;
18121                         }
18122                         start = end;
18123                         end += size;
18124
18125                         /* We want an entire page of empty space, but don't increase the allocation size. */
18126                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18127
18128                         if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18129                                 if (map->wait_for_space) {
18130                                         if (size <= (map->max_offset -
18131                                             map->min_offset)) {
18132                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18133                                                 vm_map_unlock(map);
18134                                                 thread_block(THREAD_CONTINUE_NULL);
18135                                                 vm_map_lock(map);
18136                                                 goto StartAgain;
18137                                         }
18138                                 }
18139
18140                                 return KERN_NO_SPACE;
18141                         }
18142
18143                         next = entry->vme_next;
18144
18145                         if (map->holelistenabled) {
18146                                 if (entry->vme_end >= desired_empty_end) {
18147                                         break;
18148                                 }
18149                         } else {
18150                                 /*
18151                                  *      If there are no more entries, we must win.
18152                                  *
18153                                  *      OR
18154                                  *
18155                                  *      If there is another entry, it must be
18156                                  *      after the end of the potential new region.
18157                                  */
18158
18159                                 if (next == vm_map_to_entry(map)) {
18160                                         break;
18161                                 }
18162
18163                                 if (next->vme_start >= desired_empty_end) {
18164                                         break;
18165                                 }
18166                         }
18167
18168                         /*
18169                          *      Didn't fit -- move to the next entry.
18170                          */
18171
18172                         entry = next;
18173
18174                         if (map->holelistenabled) {
18175                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18176                                         /*
18177                                          * Wrapped around
18178                                          */
18179                                         return KERN_NO_SPACE;
18180                                 }
18181                                 start = entry->vme_start;
18182                         } else {
18183                                 start = entry->vme_end;
18184                         }
18185                 }
18186
18187                 if (map->holelistenabled) {
18188                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18189                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
18190                         }
18191                 }
18192
18193                 *address = start;
18194         } else {
18195                 vm_map_entry_t          temp_entry;
18196
18197                 /*
18198                  *      Verify that:
18199                  *              the address doesn't itself violate
18200                  *              the mask requirement.
18201                  */
18202
18203                 if ((start & mask) != 0) {
18204                         return KERN_NO_SPACE;
18205                 }
18206
18207
18208                 /*
18209                  *      ...     the address is within bounds
18210                  */
18211
18212                 end = start + size;
18213
18214                 if ((start < map->min_offset) ||
18215                     (end > map->max_offset) ||
18216                     (start >= end)) {
18217                         return KERN_INVALID_ADDRESS;
18218                 }
18219
18220                 /*
18221                  * If we're asked to overwrite whatever was mapped in that
18222                  * range, first deallocate that range.
18223                  */
18224                 if (flags & VM_FLAGS_OVERWRITE) {
18225                         vm_map_t zap_map;
18226                         int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18227
18228                         /*
18229                          * We use a "zap_map" to avoid having to unlock
18230                          * the "map" in vm_map_delete(), which would compromise
18231                          * the atomicity of the "deallocate" and then "remap"
18232                          * combination.
18233                          */
18234                         zap_map = vm_map_create(PMAP_NULL,
18235                             start,
18236                             end,
18237                             map->hdr.entries_pageable);
18238                         if (zap_map == VM_MAP_NULL) {
18239                                 return KERN_RESOURCE_SHORTAGE;
18240                         }
18241                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18242                         vm_map_disable_hole_optimization(zap_map);
18243
18244                         if (vmk_flags.vmkf_overwrite_immutable) {
18245                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18246                         }
18247                         kr = vm_map_delete(map, start, end,
18248                             remove_flags,
18249                             zap_map);
18250                         if (kr == KERN_SUCCESS) {
18251                                 vm_map_destroy(zap_map,
18252                                     VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18253                                 zap_map = VM_MAP_NULL;
18254                         }
18255                 }
18256
18257                 /*
18258                  *      ...     the starting address isn't allocated
18259                  */
18260
18261                 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18262                         return KERN_NO_SPACE;
18263                 }
18264
18265                 entry = temp_entry;
18266
18267                 /*
18268                  *      ...     the next region doesn't overlap the
18269                  *              end point.
18270                  */
18271
18272                 if ((entry->vme_next != vm_map_to_entry(map)) &&
18273                     (entry->vme_next->vme_start < end)) {
18274                         return KERN_NO_SPACE;
18275                 }
18276         }
18277         *map_entry = entry;
18278         return KERN_SUCCESS;
18279 }
18280
18281 /*
18282  *      vm_map_switch:
18283  *
18284  *      Set the address map for the current thread to the specified map
18285  */
18286
18287 vm_map_t
18288 vm_map_switch(
18289         vm_map_t        map)
18290 {
18291         int             mycpu;
18292         thread_t        thread = current_thread();
18293         vm_map_t        oldmap = thread->map;
18294
18295         mp_disable_preemption();
18296         mycpu = cpu_number();
18297
18298         /*
18299          *      Deactivate the current map and activate the requested map
18300          */
18301         PMAP_SWITCH_USER(thread, map, mycpu);
18302
18303         mp_enable_preemption();
18304         return oldmap;
18305 }
18306
18307
18308 /*
18309  *      Routine:        vm_map_write_user
18310  *
18311  *      Description:
18312  *              Copy out data from a kernel space into space in the
18313  *              destination map. The space must already exist in the
18314  *              destination map.
18315  *              NOTE:  This routine should only be called by threads
18316  *              which can block on a page fault. i.e. kernel mode user
18317  *              threads.
18318  *
18319  */
18320 kern_return_t
18321 vm_map_write_user(
18322         vm_map_t                map,
18323         void                    *src_p,
18324         vm_map_address_t        dst_addr,
18325         vm_size_t               size)
18326 {
18327         kern_return_t   kr = KERN_SUCCESS;
18328
18329         if (current_map() == map) {
18330                 if (copyout(src_p, dst_addr, size)) {
18331                         kr = KERN_INVALID_ADDRESS;
18332                 }
18333         } else {
18334                 vm_map_t        oldmap;
18335
18336                 /* take on the identity of the target map while doing */
18337                 /* the transfer */
18338
18339                 vm_map_reference(map);
18340                 oldmap = vm_map_switch(map);
18341                 if (copyout(src_p, dst_addr, size)) {
18342                         kr = KERN_INVALID_ADDRESS;
18343                 }
18344                 vm_map_switch(oldmap);
18345                 vm_map_deallocate(map);
18346         }
18347         return kr;
18348 }
18349
18350 /*
18351  *      Routine:        vm_map_read_user
18352  *
18353  *      Description:
18354  *              Copy in data from a user space source map into the
18355  *              kernel map. The space must already exist in the
18356  *              kernel map.
18357  *              NOTE:  This routine should only be called by threads
18358  *              which can block on a page fault. i.e. kernel mode user
18359  *              threads.
18360  *
18361  */
18362 kern_return_t
18363 vm_map_read_user(
18364         vm_map_t                map,
18365         vm_map_address_t        src_addr,
18366         void                    *dst_p,
18367         vm_size_t               size)
18368 {
18369         kern_return_t   kr = KERN_SUCCESS;
18370
18371         if (current_map() == map) {
18372                 if (copyin(src_addr, dst_p, size)) {
18373                         kr = KERN_INVALID_ADDRESS;
18374                 }
18375         } else {
18376                 vm_map_t        oldmap;
18377
18378                 /* take on the identity of the target map while doing */
18379                 /* the transfer */
18380
18381                 vm_map_reference(map);
18382                 oldmap = vm_map_switch(map);
18383                 if (copyin(src_addr, dst_p, size)) {
18384                         kr = KERN_INVALID_ADDRESS;
18385                 }
18386                 vm_map_switch(oldmap);
18387                 vm_map_deallocate(map);
18388         }
18389         return kr;
18390 }
18391
18392
18393 /*
18394  *      vm_map_check_protection:
18395  *
18396  *      Assert that the target map allows the specified
18397  *      privilege on the entire address region given.
18398  *      The entire region must be allocated.
18399  */
18400 boolean_t
18401 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18402     vm_map_offset_t end, vm_prot_t protection)
18403 {
18404         vm_map_entry_t entry;
18405         vm_map_entry_t tmp_entry;
18406
18407         vm_map_lock(map);
18408
18409         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18410                 vm_map_unlock(map);
18411                 return FALSE;
18412         }
18413
18414         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18415                 vm_map_unlock(map);
18416                 return FALSE;
18417         }
18418
18419         entry = tmp_entry;
18420
18421         while (start < end) {
18422                 if (entry == vm_map_to_entry(map)) {
18423                         vm_map_unlock(map);
18424                         return FALSE;
18425                 }
18426
18427                 /*
18428                  *      No holes allowed!
18429                  */
18430
18431                 if (start < entry->vme_start) {
18432                         vm_map_unlock(map);
18433                         return FALSE;
18434                 }
18435
18436                 /*
18437                  * Check protection associated with entry.
18438                  */
18439
18440                 if ((entry->protection & protection) != protection) {
18441                         vm_map_unlock(map);
18442                         return FALSE;
18443                 }
18444
18445                 /* go to next entry */
18446
18447                 start = entry->vme_end;
18448                 entry = entry->vme_next;
18449         }
18450         vm_map_unlock(map);
18451         return TRUE;
18452 }
18453
18454 kern_return_t
18455 vm_map_purgable_control(
18456         vm_map_t                map,
18457         vm_map_offset_t         address,
18458         vm_purgable_t           control,
18459         int                     *state)
18460 {
18461         vm_map_entry_t          entry;
18462         vm_object_t             object;
18463         kern_return_t           kr;
18464         boolean_t               was_nonvolatile;
18465
18466         /*
18467          * Vet all the input parameters and current type and state of the
18468          * underlaying object.  Return with an error if anything is amiss.
18469          */
18470         if (map == VM_MAP_NULL) {
18471                 return KERN_INVALID_ARGUMENT;
18472         }
18473
18474         if (control != VM_PURGABLE_SET_STATE &&
18475             control != VM_PURGABLE_GET_STATE &&
18476             control != VM_PURGABLE_PURGE_ALL &&
18477             control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18478                 return KERN_INVALID_ARGUMENT;
18479         }
18480
18481         if (control == VM_PURGABLE_PURGE_ALL) {
18482                 vm_purgeable_object_purge_all();
18483                 return KERN_SUCCESS;
18484         }
18485
18486         if ((control == VM_PURGABLE_SET_STATE ||
18487             control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18488             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18489             ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18490                 return KERN_INVALID_ARGUMENT;
18491         }
18492
18493         vm_map_lock_read(map);
18494
18495         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18496                 /*
18497                  * Must pass a valid non-submap address.
18498                  */
18499                 vm_map_unlock_read(map);
18500                 return KERN_INVALID_ADDRESS;
18501         }
18502
18503         if ((entry->protection & VM_PROT_WRITE) == 0) {
18504                 /*
18505                  * Can't apply purgable controls to something you can't write.
18506                  */
18507                 vm_map_unlock_read(map);
18508                 return KERN_PROTECTION_FAILURE;
18509         }
18510
18511         object = VME_OBJECT(entry);
18512         if (object == VM_OBJECT_NULL ||
18513             object->purgable == VM_PURGABLE_DENY) {
18514                 /*
18515                  * Object must already be present and be purgeable.
18516                  */
18517                 vm_map_unlock_read(map);
18518                 return KERN_INVALID_ARGUMENT;
18519         }
18520
18521         vm_object_lock(object);
18522
18523 #if 00
18524         if (VME_OFFSET(entry) != 0 ||
18525             entry->vme_end - entry->vme_start != object->vo_size) {
18526                 /*
18527                  * Can only apply purgable controls to the whole (existing)
18528                  * object at once.
18529                  */
18530                 vm_map_unlock_read(map);
18531                 vm_object_unlock(object);
18532                 return KERN_INVALID_ARGUMENT;
18533         }
18534 #endif
18535
18536         assert(!entry->is_sub_map);
18537         assert(!entry->use_pmap); /* purgeable has its own accounting */
18538
18539         vm_map_unlock_read(map);
18540
18541         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18542
18543         kr = vm_object_purgable_control(object, control, state);
18544
18545         if (was_nonvolatile &&
18546             object->purgable != VM_PURGABLE_NONVOLATILE &&
18547             map->pmap == kernel_pmap) {
18548 #if DEBUG
18549                 object->vo_purgeable_volatilizer = kernel_task;
18550 #endif /* DEBUG */
18551         }
18552
18553         vm_object_unlock(object);
18554
18555         return kr;
18556 }
18557
18558 void
18559 vm_map_footprint_query_page_info(
18560         vm_map_t        map,
18561         vm_map_entry_t  map_entry,
18562         vm_map_offset_t curr_s_offset,
18563         int             *disposition_p)
18564 {
18565         int             pmap_disp;
18566         vm_object_t     object;
18567         int             disposition;
18568         int             effective_page_size;
18569
18570         vm_map_lock_assert_held(map);
18571         assert(!map->has_corpse_footprint);
18572         assert(curr_s_offset >= map_entry->vme_start);
18573         assert(curr_s_offset < map_entry->vme_end);
18574
18575         object = VME_OBJECT(map_entry);
18576         if (object == VM_OBJECT_NULL) {
18577                 *disposition_p = 0;
18578                 return;
18579         }
18580
18581         effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18582
18583         pmap_disp = 0;
18584         if (object == VM_OBJECT_NULL) {
18585                 /* nothing mapped here: no need to ask */
18586                 *disposition_p = 0;
18587                 return;
18588         } else if (map_entry->is_sub_map &&
18589             !map_entry->use_pmap) {
18590                 /* nested pmap: no footprint */
18591                 *disposition_p = 0;
18592                 return;
18593         }
18594
18595         /*
18596          * Query the pmap.
18597          */
18598         pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18599
18600         /*
18601          * Compute this page's disposition.
18602          */
18603         disposition = 0;
18604
18605         /* deal with "alternate accounting" first */
18606         if (!map_entry->is_sub_map &&
18607             object->vo_no_footprint) {
18608                 /* does not count in footprint */
18609                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18610         } else if (!map_entry->is_sub_map &&
18611             (object->purgable == VM_PURGABLE_NONVOLATILE ||
18612             (object->purgable == VM_PURGABLE_DENY &&
18613             object->vo_ledger_tag)) &&
18614             VM_OBJECT_OWNER(object) != NULL &&
18615             VM_OBJECT_OWNER(object)->map == map) {
18616                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18617                 if ((((curr_s_offset
18618                     - map_entry->vme_start
18619                     + VME_OFFSET(map_entry))
18620                     / effective_page_size) <
18621                     (object->resident_page_count +
18622                     vm_compressor_pager_get_count(object->pager)))) {
18623                         /*
18624                          * Non-volatile purgeable object owned
18625                          * by this task: report the first
18626                          * "#resident + #compressed" pages as
18627                          * "resident" (to show that they
18628                          * contribute to the footprint) but not
18629                          * "dirty" (to avoid double-counting
18630                          * with the fake "non-volatile" region
18631                          * we'll report at the end of the
18632                          * address space to account for all
18633                          * (mapped or not) non-volatile memory
18634                          * owned by this task.
18635                          */
18636                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18637                 }
18638         } else if (!map_entry->is_sub_map &&
18639             (object->purgable == VM_PURGABLE_VOLATILE ||
18640             object->purgable == VM_PURGABLE_EMPTY) &&
18641             VM_OBJECT_OWNER(object) != NULL &&
18642             VM_OBJECT_OWNER(object)->map == map) {
18643                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18644                 if ((((curr_s_offset
18645                     - map_entry->vme_start
18646                     + VME_OFFSET(map_entry))
18647                     / effective_page_size) <
18648                     object->wired_page_count)) {
18649                         /*
18650                          * Volatile|empty purgeable object owned
18651                          * by this task: report the first
18652                          * "#wired" pages as "resident" (to
18653                          * show that they contribute to the
18654                          * footprint) but not "dirty" (to avoid
18655                          * double-counting with the fake
18656                          * "non-volatile" region we'll report
18657                          * at the end of the address space to
18658                          * account for all (mapped or not)
18659                          * non-volatile memory owned by this
18660                          * task.
18661                          */
18662                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18663                 }
18664         } else if (!map_entry->is_sub_map &&
18665             map_entry->iokit_acct &&
18666             object->internal &&
18667             object->purgable == VM_PURGABLE_DENY) {
18668                 /*
18669                  * Non-purgeable IOKit memory: phys_footprint
18670                  * includes the entire virtual mapping.
18671                  */
18672                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18673                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18674                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18675         } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18676             PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18677                 /* alternate accounting */
18678 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18679                 if (map->pmap->footprint_was_suspended) {
18680                         /*
18681                          * The assertion below can fail if dyld
18682                          * suspended footprint accounting
18683                          * while doing some adjustments to
18684                          * this page;  the mapping would say
18685                          * "use pmap accounting" but the page
18686                          * would be marked "alternate
18687                          * accounting".
18688                          */
18689                 } else
18690 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18691                 {
18692                         assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18693                 }
18694                 disposition = 0;
18695         } else {
18696                 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18697                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18698                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18699                         disposition |= VM_PAGE_QUERY_PAGE_REF;
18700                         if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18701                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18702                         } else {
18703                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18704                         }
18705                         if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18706                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18707                         }
18708                 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18709                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18710                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18711                 }
18712         }
18713
18714         *disposition_p = disposition;
18715 }
18716
18717 kern_return_t
18718 vm_map_page_query_internal(
18719         vm_map_t        target_map,
18720         vm_map_offset_t offset,
18721         int             *disposition,
18722         int             *ref_count)
18723 {
18724         kern_return_t                   kr;
18725         vm_page_info_basic_data_t       info;
18726         mach_msg_type_number_t          count;
18727
18728         count = VM_PAGE_INFO_BASIC_COUNT;
18729         kr = vm_map_page_info(target_map,
18730             offset,
18731             VM_PAGE_INFO_BASIC,
18732             (vm_page_info_t) &info,
18733             &count);
18734         if (kr == KERN_SUCCESS) {
18735                 *disposition = info.disposition;
18736                 *ref_count = info.ref_count;
18737         } else {
18738                 *disposition = 0;
18739                 *ref_count = 0;
18740         }
18741
18742         return kr;
18743 }
18744
18745 kern_return_t
18746 vm_map_page_info(
18747         vm_map_t                map,
18748         vm_map_offset_t         offset,
18749         vm_page_info_flavor_t   flavor,
18750         vm_page_info_t          info,
18751         mach_msg_type_number_t  *count)
18752 {
18753         return vm_map_page_range_info_internal(map,
18754                    offset, /* start of range */
18755                    (offset + 1), /* this will get rounded in the call to the page boundary */
18756                    (int)-1, /* effective_page_shift: unspecified */
18757                    flavor,
18758                    info,
18759                    count);
18760 }
18761
18762 kern_return_t
18763 vm_map_page_range_info_internal(
18764         vm_map_t                map,
18765         vm_map_offset_t         start_offset,
18766         vm_map_offset_t         end_offset,
18767         int                     effective_page_shift,
18768         vm_page_info_flavor_t   flavor,
18769         vm_page_info_t          info,
18770         mach_msg_type_number_t  *count)
18771 {
18772         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
18773         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
18774         vm_page_t               m = VM_PAGE_NULL;
18775         kern_return_t           retval = KERN_SUCCESS;
18776         int                     disposition = 0;
18777         int                     ref_count = 0;
18778         int                     depth = 0, info_idx = 0;
18779         vm_page_info_basic_t    basic_info = 0;
18780         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
18781         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
18782         boolean_t               do_region_footprint;
18783         ledger_amount_t         ledger_resident, ledger_compressed;
18784         int                     effective_page_size;
18785         vm_map_offset_t         effective_page_mask;
18786
18787         switch (flavor) {
18788         case VM_PAGE_INFO_BASIC:
18789                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
18790                         /*
18791                          * The "vm_page_info_basic_data" structure was not
18792                          * properly padded, so allow the size to be off by
18793                          * one to maintain backwards binary compatibility...
18794                          */
18795                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
18796                                 return KERN_INVALID_ARGUMENT;
18797                         }
18798                 }
18799                 break;
18800         default:
18801                 return KERN_INVALID_ARGUMENT;
18802         }
18803
18804         if (effective_page_shift == -1) {
18805                 effective_page_shift = vm_self_region_page_shift_safely(map);
18806                 if (effective_page_shift == -1) {
18807                         return KERN_INVALID_ARGUMENT;
18808                 }
18809         }
18810         effective_page_size = (1 << effective_page_shift);
18811         effective_page_mask = effective_page_size - 1;
18812
18813         do_region_footprint = task_self_region_footprint();
18814         disposition = 0;
18815         ref_count = 0;
18816         depth = 0;
18817         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
18818         retval = KERN_SUCCESS;
18819
18820         offset_in_page = start_offset & effective_page_mask;
18821         start = vm_map_trunc_page(start_offset, effective_page_mask);
18822         end = vm_map_round_page(end_offset, effective_page_mask);
18823
18824         if (end < start) {
18825                 return KERN_INVALID_ARGUMENT;
18826         }
18827
18828         assert((end - start) <= MAX_PAGE_RANGE_QUERY);
18829
18830         vm_map_lock_read(map);
18831
18832         task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
18833
18834         for (curr_s_offset = start; curr_s_offset < end;) {
18835                 /*
18836                  * New lookup needs reset of these variables.
18837                  */
18838                 curr_object = object = VM_OBJECT_NULL;
18839                 offset_in_object = 0;
18840                 ref_count = 0;
18841                 depth = 0;
18842
18843                 if (do_region_footprint &&
18844                     curr_s_offset >= vm_map_last_entry(map)->vme_end) {
18845                         /*
18846                          * Request for "footprint" info about a page beyond
18847                          * the end of address space: this must be for
18848                          * the fake region vm_map_region_recurse_64()
18849                          * reported to account for non-volatile purgeable
18850                          * memory owned by this task.
18851                          */
18852                         disposition = 0;
18853
18854                         if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
18855                             (unsigned) ledger_compressed) {
18856                                 /*
18857                                  * We haven't reported all the "non-volatile
18858                                  * compressed" pages yet, so report this fake
18859                                  * page as "compressed".
18860                                  */
18861                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18862                         } else {
18863                                 /*
18864                                  * We've reported all the non-volatile
18865                                  * compressed page but not all the non-volatile
18866                                  * pages , so report this fake page as
18867                                  * "resident dirty".
18868                                  */
18869                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18870                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18871                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
18872                         }
18873                         switch (flavor) {
18874                         case VM_PAGE_INFO_BASIC:
18875                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18876                                 basic_info->disposition = disposition;
18877                                 basic_info->ref_count = 1;
18878                                 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
18879                                 basic_info->offset = 0;
18880                                 basic_info->depth = 0;
18881
18882                                 info_idx++;
18883                                 break;
18884                         }
18885                         curr_s_offset += effective_page_size;
18886                         continue;
18887                 }
18888
18889                 /*
18890                  * First, find the map entry covering "curr_s_offset", going down
18891                  * submaps if necessary.
18892                  */
18893                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
18894                         /* no entry -> no object -> no page */
18895
18896                         if (curr_s_offset < vm_map_min(map)) {
18897                                 /*
18898                                  * Illegal address that falls below map min.
18899                                  */
18900                                 curr_e_offset = MIN(end, vm_map_min(map));
18901                         } else if (curr_s_offset >= vm_map_max(map)) {
18902                                 /*
18903                                  * Illegal address that falls on/after map max.
18904                                  */
18905                                 curr_e_offset = end;
18906                         } else if (map_entry == vm_map_to_entry(map)) {
18907                                 /*
18908                                  * Hit a hole.
18909                                  */
18910                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
18911                                         /*
18912                                          * Empty map.
18913                                          */
18914                                         curr_e_offset = MIN(map->max_offset, end);
18915                                 } else {
18916                                         /*
18917                                          * Hole at start of the map.
18918                                          */
18919                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
18920                                 }
18921                         } else {
18922                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
18923                                         /*
18924                                          * Hole at the end of the map.
18925                                          */
18926                                         curr_e_offset = MIN(map->max_offset, end);
18927                                 } else {
18928                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
18929                                 }
18930                         }
18931
18932                         assert(curr_e_offset >= curr_s_offset);
18933
18934                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
18935
18936                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18937
18938                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
18939
18940                         curr_s_offset = curr_e_offset;
18941
18942                         info_idx += num_pages;
18943
18944                         continue;
18945                 }
18946
18947                 /* compute offset from this map entry's start */
18948                 offset_in_object = curr_s_offset - map_entry->vme_start;
18949
18950                 /* compute offset into this map entry's object (or submap) */
18951                 offset_in_object += VME_OFFSET(map_entry);
18952
18953                 if (map_entry->is_sub_map) {
18954                         vm_map_t sub_map = VM_MAP_NULL;
18955                         vm_page_info_t submap_info = 0;
18956                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
18957
18958                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
18959
18960                         submap_s_offset = offset_in_object;
18961                         submap_e_offset = submap_s_offset + range_len;
18962
18963                         sub_map = VME_SUBMAP(map_entry);
18964
18965                         vm_map_reference(sub_map);
18966                         vm_map_unlock_read(map);
18967
18968                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18969
18970                         assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
18971                             "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
18972
18973                         retval = vm_map_page_range_info_internal(sub_map,
18974                             submap_s_offset,
18975                             submap_e_offset,
18976                             effective_page_shift,
18977                             VM_PAGE_INFO_BASIC,
18978                             (vm_page_info_t) submap_info,
18979                             count);
18980
18981                         assert(retval == KERN_SUCCESS);
18982
18983                         vm_map_lock_read(map);
18984                         vm_map_deallocate(sub_map);
18985
18986                         /* Move the "info" index by the number of pages we inspected.*/
18987                         info_idx += range_len >> effective_page_shift;
18988
18989                         /* Move our current offset by the size of the range we inspected.*/
18990                         curr_s_offset += range_len;
18991
18992                         continue;
18993                 }
18994
18995                 object = VME_OBJECT(map_entry);
18996
18997                 if (object == VM_OBJECT_NULL) {
18998                         /*
18999                          * We don't have an object here and, hence,
19000                          * no pages to inspect. We'll fill up the
19001                          * info structure appropriately.
19002                          */
19003
19004                         curr_e_offset = MIN(map_entry->vme_end, end);
19005
19006                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19007
19008                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19009
19010                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19011
19012                         curr_s_offset = curr_e_offset;
19013
19014                         info_idx += num_pages;
19015
19016                         continue;
19017                 }
19018
19019                 if (do_region_footprint) {
19020                         disposition = 0;
19021                         if (map->has_corpse_footprint) {
19022                                 /*
19023                                  * Query the page info data we saved
19024                                  * while forking the corpse.
19025                                  */
19026                                 vm_map_corpse_footprint_query_page_info(
19027                                         map,
19028                                         curr_s_offset,
19029                                         &disposition);
19030                         } else {
19031                                 /*
19032                                  * Query the live pmap for footprint info
19033                                  * about this page.
19034                                  */
19035                                 vm_map_footprint_query_page_info(
19036                                         map,
19037                                         map_entry,
19038                                         curr_s_offset,
19039                                         &disposition);
19040                         }
19041                         switch (flavor) {
19042                         case VM_PAGE_INFO_BASIC:
19043                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19044                                 basic_info->disposition = disposition;
19045                                 basic_info->ref_count = 1;
19046                                 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19047                                 basic_info->offset = 0;
19048                                 basic_info->depth = 0;
19049
19050                                 info_idx++;
19051                                 break;
19052                         }
19053                         curr_s_offset += effective_page_size;
19054                         continue;
19055                 }
19056
19057                 vm_object_reference(object);
19058                 /*
19059                  * Shared mode -- so we can allow other readers
19060                  * to grab the lock too.
19061                  */
19062                 vm_object_lock_shared(object);
19063
19064                 curr_e_offset = MIN(map_entry->vme_end, end);
19065
19066                 vm_map_unlock_read(map);
19067
19068                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19069
19070                 curr_object = object;
19071
19072                 for (; curr_s_offset < curr_e_offset;) {
19073                         if (object == curr_object) {
19074                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19075                         } else {
19076                                 ref_count = curr_object->ref_count;
19077                         }
19078
19079                         curr_offset_in_object = offset_in_object;
19080
19081                         for (;;) {
19082                                 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19083
19084                                 if (m != VM_PAGE_NULL) {
19085                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19086                                         break;
19087                                 } else {
19088                                         if (curr_object->internal &&
19089                                             curr_object->alive &&
19090                                             !curr_object->terminating &&
19091                                             curr_object->pager_ready) {
19092                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19093                                                     == VM_EXTERNAL_STATE_EXISTS) {
19094                                                         /* the pager has that page */
19095                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19096                                                         break;
19097                                                 }
19098                                         }
19099
19100                                         /*
19101                                          * Go down the VM object shadow chain until we find the page
19102                                          * we're looking for.
19103                                          */
19104
19105                                         if (curr_object->shadow != VM_OBJECT_NULL) {
19106                                                 vm_object_t shadow = VM_OBJECT_NULL;
19107
19108                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
19109                                                 shadow = curr_object->shadow;
19110
19111                                                 vm_object_lock_shared(shadow);
19112                                                 vm_object_unlock(curr_object);
19113
19114                                                 curr_object = shadow;
19115                                                 depth++;
19116                                                 continue;
19117                                         } else {
19118                                                 break;
19119                                         }
19120                                 }
19121                         }
19122
19123                         /* The ref_count is not strictly accurate, it measures the number   */
19124                         /* of entities holding a ref on the object, they may not be mapping */
19125                         /* the object or may not be mapping the section holding the         */
19126                         /* target page but its still a ball park number and though an over- */
19127                         /* count, it picks up the copy-on-write cases                       */
19128
19129                         /* We could also get a picture of page sharing from pmap_attributes */
19130                         /* but this would under count as only faulted-in mappings would     */
19131                         /* show up.                                                         */
19132
19133                         if ((curr_object == object) && curr_object->shadow) {
19134                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19135                         }
19136
19137                         if (!curr_object->internal) {
19138                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19139                         }
19140
19141                         if (m != VM_PAGE_NULL) {
19142                                 if (m->vmp_fictitious) {
19143                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19144                                 } else {
19145                                         if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19146                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19147                                         }
19148
19149                                         if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19150                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
19151                                         }
19152
19153                                         if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19154                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19155                                         }
19156
19157                                         /*
19158                                          * XXX TODO4K:
19159                                          * when this routine deals with 4k
19160                                          * pages, check the appropriate CS bit
19161                                          * here.
19162                                          */
19163                                         if (m->vmp_cs_validated) {
19164                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19165                                         }
19166                                         if (m->vmp_cs_tainted) {
19167                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19168                                         }
19169                                         if (m->vmp_cs_nx) {
19170                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19171                                         }
19172                                         if (m->vmp_reusable || curr_object->all_reusable) {
19173                                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19174                                         }
19175                                 }
19176                         }
19177
19178                         switch (flavor) {
19179                         case VM_PAGE_INFO_BASIC:
19180                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19181                                 basic_info->disposition = disposition;
19182                                 basic_info->ref_count = ref_count;
19183                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19184                                     VM_KERNEL_ADDRPERM(curr_object);
19185                                 basic_info->offset =
19186                                     (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19187                                 basic_info->depth = depth;
19188
19189                                 info_idx++;
19190                                 break;
19191                         }
19192
19193                         disposition = 0;
19194                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19195
19196                         /*
19197                          * Move to next offset in the range and in our object.
19198                          */
19199                         curr_s_offset += effective_page_size;
19200                         offset_in_object += effective_page_size;
19201                         curr_offset_in_object = offset_in_object;
19202
19203                         if (curr_object != object) {
19204                                 vm_object_unlock(curr_object);
19205
19206                                 curr_object = object;
19207
19208                                 vm_object_lock_shared(curr_object);
19209                         } else {
19210                                 vm_object_lock_yield_shared(curr_object);
19211                         }
19212                 }
19213
19214                 vm_object_unlock(curr_object);
19215                 vm_object_deallocate(curr_object);
19216
19217                 vm_map_lock_read(map);
19218         }
19219
19220         vm_map_unlock_read(map);
19221         return retval;
19222 }
19223
19224 /*
19225  *      vm_map_msync
19226  *
19227  *      Synchronises the memory range specified with its backing store
19228  *      image by either flushing or cleaning the contents to the appropriate
19229  *      memory manager engaging in a memory object synchronize dialog with
19230  *      the manager.  The client doesn't return until the manager issues
19231  *      m_o_s_completed message.  MIG Magically converts user task parameter
19232  *      to the task's address map.
19233  *
19234  *      interpretation of sync_flags
19235  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
19236  *                                pages to manager.
19237  *
19238  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19239  *                              - discard pages, write dirty or precious
19240  *                                pages back to memory manager.
19241  *
19242  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19243  *                              - write dirty or precious pages back to
19244  *                                the memory manager.
19245  *
19246  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
19247  *                                is a hole in the region, and we would
19248  *                                have returned KERN_SUCCESS, return
19249  *                                KERN_INVALID_ADDRESS instead.
19250  *
19251  *      NOTE
19252  *      The memory object attributes have not yet been implemented, this
19253  *      function will have to deal with the invalidate attribute
19254  *
19255  *      RETURNS
19256  *      KERN_INVALID_TASK               Bad task parameter
19257  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
19258  *      KERN_SUCCESS                    The usual.
19259  *      KERN_INVALID_ADDRESS            There was a hole in the region.
19260  */
19261
19262 kern_return_t
19263 vm_map_msync(
19264         vm_map_t                map,
19265         vm_map_address_t        address,
19266         vm_map_size_t           size,
19267         vm_sync_t               sync_flags)
19268 {
19269         vm_map_entry_t          entry;
19270         vm_map_size_t           amount_left;
19271         vm_object_offset_t      offset;
19272         vm_object_offset_t      start_offset, end_offset;
19273         boolean_t               do_sync_req;
19274         boolean_t               had_hole = FALSE;
19275         vm_map_offset_t         pmap_offset;
19276
19277         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19278             (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19279                 return KERN_INVALID_ARGUMENT;
19280         }
19281
19282         if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19283                 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19284         }
19285
19286         /*
19287          * align address and size on page boundaries
19288          */
19289         size = (vm_map_round_page(address + size,
19290             VM_MAP_PAGE_MASK(map)) -
19291             vm_map_trunc_page(address,
19292             VM_MAP_PAGE_MASK(map)));
19293         address = vm_map_trunc_page(address,
19294             VM_MAP_PAGE_MASK(map));
19295
19296         if (map == VM_MAP_NULL) {
19297                 return KERN_INVALID_TASK;
19298         }
19299
19300         if (size == 0) {
19301                 return KERN_SUCCESS;
19302         }
19303
19304         amount_left = size;
19305
19306         while (amount_left > 0) {
19307                 vm_object_size_t        flush_size;
19308                 vm_object_t             object;
19309
19310                 vm_map_lock(map);
19311                 if (!vm_map_lookup_entry(map,
19312                     address,
19313                     &entry)) {
19314                         vm_map_size_t   skip;
19315
19316                         /*
19317                          * hole in the address map.
19318                          */
19319                         had_hole = TRUE;
19320
19321                         if (sync_flags & VM_SYNC_KILLPAGES) {
19322                                 /*
19323                                  * For VM_SYNC_KILLPAGES, there should be
19324                                  * no holes in the range, since we couldn't
19325                                  * prevent someone else from allocating in
19326                                  * that hole and we wouldn't want to "kill"
19327                                  * their pages.
19328                                  */
19329                                 vm_map_unlock(map);
19330                                 break;
19331                         }
19332
19333                         /*
19334                          * Check for empty map.
19335                          */
19336                         if (entry == vm_map_to_entry(map) &&
19337                             entry->vme_next == entry) {
19338                                 vm_map_unlock(map);
19339                                 break;
19340                         }
19341                         /*
19342                          * Check that we don't wrap and that
19343                          * we have at least one real map entry.
19344                          */
19345                         if ((map->hdr.nentries == 0) ||
19346                             (entry->vme_next->vme_start < address)) {
19347                                 vm_map_unlock(map);
19348                                 break;
19349                         }
19350                         /*
19351                          * Move up to the next entry if needed
19352                          */
19353                         skip = (entry->vme_next->vme_start - address);
19354                         if (skip >= amount_left) {
19355                                 amount_left = 0;
19356                         } else {
19357                                 amount_left -= skip;
19358                         }
19359                         address = entry->vme_next->vme_start;
19360                         vm_map_unlock(map);
19361                         continue;
19362                 }
19363
19364                 offset = address - entry->vme_start;
19365                 pmap_offset = address;
19366
19367                 /*
19368                  * do we have more to flush than is contained in this
19369                  * entry ?
19370                  */
19371                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19372                         flush_size = entry->vme_end -
19373                             (entry->vme_start + offset);
19374                 } else {
19375                         flush_size = amount_left;
19376                 }
19377                 amount_left -= flush_size;
19378                 address += flush_size;
19379
19380                 if (entry->is_sub_map == TRUE) {
19381                         vm_map_t        local_map;
19382                         vm_map_offset_t local_offset;
19383
19384                         local_map = VME_SUBMAP(entry);
19385                         local_offset = VME_OFFSET(entry);
19386                         vm_map_reference(local_map);
19387                         vm_map_unlock(map);
19388                         if (vm_map_msync(
19389                                     local_map,
19390                                     local_offset,
19391                                     flush_size,
19392                                     sync_flags) == KERN_INVALID_ADDRESS) {
19393                                 had_hole = TRUE;
19394                         }
19395                         vm_map_deallocate(local_map);
19396                         continue;
19397                 }
19398                 object = VME_OBJECT(entry);
19399
19400                 /*
19401                  * We can't sync this object if the object has not been
19402                  * created yet
19403                  */
19404                 if (object == VM_OBJECT_NULL) {
19405                         vm_map_unlock(map);
19406                         continue;
19407                 }
19408                 offset += VME_OFFSET(entry);
19409
19410                 vm_object_lock(object);
19411
19412                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19413                         int kill_pages = 0;
19414                         boolean_t reusable_pages = FALSE;
19415
19416                         if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19417                                 /*
19418                                  * This is a destructive operation and so we
19419                                  * err on the side of limiting the range of
19420                                  * the operation.
19421                                  */
19422                                 start_offset = vm_object_round_page(offset);
19423                                 end_offset = vm_object_trunc_page(offset + flush_size);
19424
19425                                 if (end_offset <= start_offset) {
19426                                         vm_object_unlock(object);
19427                                         vm_map_unlock(map);
19428                                         continue;
19429                                 }
19430
19431                                 pmap_offset += start_offset - offset;;
19432                         } else {
19433                                 start_offset = offset;
19434                                 end_offset = offset + flush_size;
19435                         }
19436
19437                         if (sync_flags & VM_SYNC_KILLPAGES) {
19438                                 if (((object->ref_count == 1) ||
19439                                     ((object->copy_strategy !=
19440                                     MEMORY_OBJECT_COPY_SYMMETRIC) &&
19441                                     (object->copy == VM_OBJECT_NULL))) &&
19442                                     (object->shadow == VM_OBJECT_NULL)) {
19443                                         if (object->ref_count != 1) {
19444                                                 vm_page_stats_reusable.free_shared++;
19445                                         }
19446                                         kill_pages = 1;
19447                                 } else {
19448                                         kill_pages = -1;
19449                                 }
19450                         }
19451                         if (kill_pages != -1) {
19452                                 vm_object_deactivate_pages(
19453                                         object,
19454                                         start_offset,
19455                                         (vm_object_size_t) (end_offset - start_offset),
19456                                         kill_pages,
19457                                         reusable_pages,
19458                                         map->pmap,
19459                                         pmap_offset);
19460                         }
19461                         vm_object_unlock(object);
19462                         vm_map_unlock(map);
19463                         continue;
19464                 }
19465                 /*
19466                  * We can't sync this object if there isn't a pager.
19467                  * Don't bother to sync internal objects, since there can't
19468                  * be any "permanent" storage for these objects anyway.
19469                  */
19470                 if ((object->pager == MEMORY_OBJECT_NULL) ||
19471                     (object->internal) || (object->private)) {
19472                         vm_object_unlock(object);
19473                         vm_map_unlock(map);
19474                         continue;
19475                 }
19476                 /*
19477                  * keep reference on the object until syncing is done
19478                  */
19479                 vm_object_reference_locked(object);
19480                 vm_object_unlock(object);
19481
19482                 vm_map_unlock(map);
19483
19484                 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19485                         start_offset = vm_object_trunc_page(offset);
19486                         end_offset = vm_object_round_page(offset + flush_size);
19487                 } else {
19488                         start_offset = offset;
19489                         end_offset = offset + flush_size;
19490                 }
19491
19492                 do_sync_req = vm_object_sync(object,
19493                     start_offset,
19494                     (end_offset - start_offset),
19495                     sync_flags & VM_SYNC_INVALIDATE,
19496                     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19497                     (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19498                     sync_flags & VM_SYNC_SYNCHRONOUS);
19499
19500                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19501                         /*
19502                          * clear out the clustering and read-ahead hints
19503                          */
19504                         vm_object_lock(object);
19505
19506                         object->pages_created = 0;
19507                         object->pages_used = 0;
19508                         object->sequential = 0;
19509                         object->last_alloc = 0;
19510
19511                         vm_object_unlock(object);
19512                 }
19513                 vm_object_deallocate(object);
19514         } /* while */
19515
19516         /* for proper msync() behaviour */
19517         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19518                 return KERN_INVALID_ADDRESS;
19519         }
19520
19521         return KERN_SUCCESS;
19522 }/* vm_msync */
19523
19524 kern_return_t
19525 vm_named_entry_from_vm_object(
19526         vm_named_entry_t        named_entry,
19527         vm_object_t             object,
19528         vm_object_offset_t      offset,
19529         vm_object_size_t        size,
19530         vm_prot_t               prot)
19531 {
19532         vm_map_copy_t copy;
19533         vm_map_entry_t copy_entry;
19534
19535         assert(!named_entry->is_sub_map);
19536         assert(!named_entry->is_copy);
19537         assert(!named_entry->is_object);
19538         assert(!named_entry->internal);
19539         assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19540
19541         copy = vm_map_copy_allocate();
19542         copy->type = VM_MAP_COPY_ENTRY_LIST;
19543         copy->offset = offset;
19544         copy->size = size;
19545         copy->cpy_hdr.page_shift = PAGE_SHIFT;
19546         vm_map_store_init(&copy->cpy_hdr);
19547
19548         copy_entry = vm_map_copy_entry_create(copy, FALSE);
19549         copy_entry->protection = prot;
19550         copy_entry->max_protection = prot;
19551         copy_entry->use_pmap = TRUE;
19552         copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19553         copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19554         VME_OBJECT_SET(copy_entry, object);
19555         VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19556         vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19557
19558         named_entry->backing.copy = copy;
19559         named_entry->is_object = TRUE;
19560         if (object->internal) {
19561                 named_entry->internal = TRUE;
19562         }
19563
19564         DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19565
19566         return KERN_SUCCESS;
19567 }
19568
19569 vm_object_t
19570 vm_named_entry_to_vm_object(
19571         vm_named_entry_t named_entry)
19572 {
19573         vm_map_copy_t   copy;
19574         vm_map_entry_t  copy_entry;
19575         vm_object_t     object;
19576
19577         assert(!named_entry->is_sub_map);
19578         assert(!named_entry->is_copy);
19579         assert(named_entry->is_object);
19580         copy = named_entry->backing.copy;
19581         assert(copy != VM_MAP_COPY_NULL);
19582         assert(copy->cpy_hdr.nentries == 1);
19583         copy_entry = vm_map_copy_first_entry(copy);
19584         assert(!copy_entry->is_sub_map);
19585         object = VME_OBJECT(copy_entry);
19586
19587         DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19588
19589         return object;
19590 }
19591
19592 /*
19593  *      Routine:        convert_port_entry_to_map
19594  *      Purpose:
19595  *              Convert from a port specifying an entry or a task
19596  *              to a map. Doesn't consume the port ref; produces a map ref,
19597  *              which may be null.  Unlike convert_port_to_map, the
19598  *              port may be task or a named entry backed.
19599  *      Conditions:
19600  *              Nothing locked.
19601  */
19602
19603
19604 vm_map_t
19605 convert_port_entry_to_map(
19606         ipc_port_t      port)
19607 {
19608         vm_map_t map;
19609         vm_named_entry_t        named_entry;
19610         uint32_t        try_failed_count = 0;
19611
19612         if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19613                 while (TRUE) {
19614                         ip_lock(port);
19615                         if (ip_active(port) && (ip_kotype(port)
19616                             == IKOT_NAMED_ENTRY)) {
19617                                 named_entry =
19618                                     (vm_named_entry_t) ip_get_kobject(port);
19619                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19620                                         ip_unlock(port);
19621
19622                                         try_failed_count++;
19623                                         mutex_pause(try_failed_count);
19624                                         continue;
19625                                 }
19626                                 named_entry->ref_count++;
19627                                 lck_mtx_unlock(&(named_entry)->Lock);
19628                                 ip_unlock(port);
19629                                 if ((named_entry->is_sub_map) &&
19630                                     (named_entry->protection
19631                                     & VM_PROT_WRITE)) {
19632                                         map = named_entry->backing.map;
19633                                         if (map->pmap != PMAP_NULL) {
19634                                                 if (map->pmap == kernel_pmap) {
19635                                                         panic("userspace has access "
19636                                                             "to a kernel map %p", map);
19637                                                 }
19638                                                 pmap_require(map->pmap);
19639                                         }
19640                                 } else {
19641                                         mach_destroy_memory_entry(port);
19642                                         return VM_MAP_NULL;
19643                                 }
19644                                 vm_map_reference_swap(map);
19645                                 mach_destroy_memory_entry(port);
19646                                 break;
19647                         } else {
19648                                 return VM_MAP_NULL;
19649                         }
19650                 }
19651         } else {
19652                 map = convert_port_to_map(port);
19653         }
19654
19655         return map;
19656 }
19657
19658 /*
19659  *      Routine:        convert_port_entry_to_object
19660  *      Purpose:
19661  *              Convert from a port specifying a named entry to an
19662  *              object. Doesn't consume the port ref; produces a map ref,
19663  *              which may be null.
19664  *      Conditions:
19665  *              Nothing locked.
19666  */
19667
19668
19669 vm_object_t
19670 convert_port_entry_to_object(
19671         ipc_port_t      port)
19672 {
19673         vm_object_t             object = VM_OBJECT_NULL;
19674         vm_named_entry_t        named_entry;
19675         uint32_t                try_failed_count = 0;
19676
19677         if (IP_VALID(port) &&
19678             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19679 try_again:
19680                 ip_lock(port);
19681                 if (ip_active(port) &&
19682                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19683                         named_entry = (vm_named_entry_t) ip_get_kobject(port);
19684                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19685                                 ip_unlock(port);
19686                                 try_failed_count++;
19687                                 mutex_pause(try_failed_count);
19688                                 goto try_again;
19689                         }
19690                         named_entry->ref_count++;
19691                         lck_mtx_unlock(&(named_entry)->Lock);
19692                         ip_unlock(port);
19693                         if (!(named_entry->is_sub_map) &&
19694                             !(named_entry->is_copy) &&
19695                             (named_entry->is_object) &&
19696                             (named_entry->protection & VM_PROT_WRITE)) {
19697                                 vm_map_copy_t copy;
19698                                 vm_map_entry_t copy_entry;
19699
19700                                 copy = named_entry->backing.copy;
19701                                 assert(copy->cpy_hdr.nentries == 1);
19702                                 copy_entry = vm_map_copy_first_entry(copy);
19703                                 assert(!copy_entry->is_sub_map);
19704                                 object = VME_OBJECT(copy_entry);
19705                                 assert(object != VM_OBJECT_NULL);
19706                                 vm_object_reference(object);
19707                         }
19708                         mach_destroy_memory_entry(port);
19709                 }
19710         }
19711
19712         return object;
19713 }
19714
19715 /*
19716  * Export routines to other components for the things we access locally through
19717  * macros.
19718  */
19719 #undef current_map
19720 vm_map_t
19721 current_map(void)
19722 {
19723         return current_map_fast();
19724 }
19725
19726 /*
19727  *      vm_map_reference:
19728  *
19729  *      Most code internal to the osfmk will go through a
19730  *      macro defining this.  This is always here for the
19731  *      use of other kernel components.
19732  */
19733 #undef vm_map_reference
19734 void
19735 vm_map_reference(
19736         vm_map_t        map)
19737 {
19738         if (map == VM_MAP_NULL) {
19739                 return;
19740         }
19741
19742         lck_mtx_lock(&map->s_lock);
19743 #if     TASK_SWAPPER
19744         assert(map->res_count > 0);
19745         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
19746         map->res_count++;
19747 #endif
19748         os_ref_retain_locked(&map->map_refcnt);
19749         lck_mtx_unlock(&map->s_lock);
19750 }
19751
19752 /*
19753  *      vm_map_deallocate:
19754  *
19755  *      Removes a reference from the specified map,
19756  *      destroying it if no references remain.
19757  *      The map should not be locked.
19758  */
19759 void
19760 vm_map_deallocate(
19761         vm_map_t        map)
19762 {
19763         unsigned int            ref;
19764
19765         if (map == VM_MAP_NULL) {
19766                 return;
19767         }
19768
19769         lck_mtx_lock(&map->s_lock);
19770         ref = os_ref_release_locked(&map->map_refcnt);
19771         if (ref > 0) {
19772                 vm_map_res_deallocate(map);
19773                 lck_mtx_unlock(&map->s_lock);
19774                 return;
19775         }
19776         assert(os_ref_get_count(&map->map_refcnt) == 0);
19777         lck_mtx_unlock(&map->s_lock);
19778
19779 #if     TASK_SWAPPER
19780         /*
19781          * The map residence count isn't decremented here because
19782          * the vm_map_delete below will traverse the entire map,
19783          * deleting entries, and the residence counts on objects
19784          * and sharing maps will go away then.
19785          */
19786 #endif
19787
19788         vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19789 }
19790
19791 void
19792 vm_map_inspect_deallocate(
19793         vm_map_inspect_t      map)
19794 {
19795         vm_map_deallocate((vm_map_t)map);
19796 }
19797
19798 void
19799 vm_map_read_deallocate(
19800         vm_map_read_t      map)
19801 {
19802         vm_map_deallocate((vm_map_t)map);
19803 }
19804
19805
19806 void
19807 vm_map_disable_NX(vm_map_t map)
19808 {
19809         if (map == NULL) {
19810                 return;
19811         }
19812         if (map->pmap == NULL) {
19813                 return;
19814         }
19815
19816         pmap_disable_NX(map->pmap);
19817 }
19818
19819 void
19820 vm_map_disallow_data_exec(vm_map_t map)
19821 {
19822         if (map == NULL) {
19823                 return;
19824         }
19825
19826         map->map_disallow_data_exec = TRUE;
19827 }
19828
19829 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19830  * more descriptive.
19831  */
19832 void
19833 vm_map_set_32bit(vm_map_t map)
19834 {
19835 #if defined(__arm__) || defined(__arm64__)
19836         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19837 #else
19838         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
19839 #endif
19840 }
19841
19842
19843 void
19844 vm_map_set_64bit(vm_map_t map)
19845 {
19846 #if defined(__arm__) || defined(__arm64__)
19847         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19848 #else
19849         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
19850 #endif
19851 }
19852
19853 /*
19854  * Expand the maximum size of an existing map to the maximum supported.
19855  */
19856 void
19857 vm_map_set_jumbo(vm_map_t map)
19858 {
19859 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19860         vm_map_set_max_addr(map, ~0);
19861 #else /* arm64 */
19862         (void) map;
19863 #endif
19864 }
19865
19866 /*
19867  * This map has a JIT entitlement
19868  */
19869 void
19870 vm_map_set_jit_entitled(vm_map_t map)
19871 {
19872 #if defined (__arm64__)
19873         pmap_set_jit_entitled(map->pmap);
19874 #else /* arm64 */
19875         (void) map;
19876 #endif
19877 }
19878
19879 /*
19880  * Expand the maximum size of an existing map.
19881  */
19882 void
19883 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
19884 {
19885 #if defined(__arm64__)
19886         vm_map_offset_t max_supported_offset = 0;
19887         vm_map_offset_t old_max_offset = map->max_offset;
19888         max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
19889
19890         new_max_offset = trunc_page(new_max_offset);
19891
19892         /* The address space cannot be shrunk using this routine. */
19893         if (old_max_offset >= new_max_offset) {
19894                 return;
19895         }
19896
19897         if (max_supported_offset < new_max_offset) {
19898                 new_max_offset = max_supported_offset;
19899         }
19900
19901         map->max_offset = new_max_offset;
19902
19903         if (map->holes_list->prev->vme_end == old_max_offset) {
19904                 /*
19905                  * There is already a hole at the end of the map; simply make it bigger.
19906                  */
19907                 map->holes_list->prev->vme_end = map->max_offset;
19908         } else {
19909                 /*
19910                  * There is no hole at the end, so we need to create a new hole
19911                  * for the new empty space we're creating.
19912                  */
19913                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
19914                 new_hole->start = old_max_offset;
19915                 new_hole->end = map->max_offset;
19916                 new_hole->prev = map->holes_list->prev;
19917                 new_hole->next = (struct vm_map_entry *)map->holes_list;
19918                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
19919                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
19920         }
19921 #else
19922         (void)map;
19923         (void)new_max_offset;
19924 #endif
19925 }
19926
19927 vm_map_offset_t
19928 vm_compute_max_offset(boolean_t is64)
19929 {
19930 #if defined(__arm__) || defined(__arm64__)
19931         return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
19932 #else
19933         return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
19934 #endif
19935 }
19936
19937 void
19938 vm_map_get_max_aslr_slide_section(
19939         vm_map_t                map __unused,
19940         int64_t                 *max_sections,
19941         int64_t                 *section_size)
19942 {
19943 #if defined(__arm64__)
19944         *max_sections = 3;
19945         *section_size = ARM_TT_TWIG_SIZE;
19946 #else
19947         *max_sections = 1;
19948         *section_size = 0;
19949 #endif
19950 }
19951
19952 uint64_t
19953 vm_map_get_max_aslr_slide_pages(vm_map_t map)
19954 {
19955 #if defined(__arm64__)
19956         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
19957          * limited embedded address space; this is also meant to minimize pmap
19958          * memory usage on 16KB page systems.
19959          */
19960         return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
19961 #else
19962         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
19963 #endif
19964 }
19965
19966 uint64_t
19967 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
19968 {
19969 #if defined(__arm64__)
19970         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
19971          * of independent entropy on 16KB page systems.
19972          */
19973         return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
19974 #else
19975         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
19976 #endif
19977 }
19978
19979 #ifndef __arm__
19980 boolean_t
19981 vm_map_is_64bit(
19982         vm_map_t map)
19983 {
19984         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
19985 }
19986 #endif
19987
19988 boolean_t
19989 vm_map_has_hard_pagezero(
19990         vm_map_t        map,
19991         vm_map_offset_t pagezero_size)
19992 {
19993         /*
19994          * XXX FBDP
19995          * We should lock the VM map (for read) here but we can get away
19996          * with it for now because there can't really be any race condition:
19997          * the VM map's min_offset is changed only when the VM map is created
19998          * and when the zero page is established (when the binary gets loaded),
19999          * and this routine gets called only when the task terminates and the
20000          * VM map is being torn down, and when a new map is created via
20001          * load_machfile()/execve().
20002          */
20003         return map->min_offset >= pagezero_size;
20004 }
20005
20006 /*
20007  * Raise a VM map's maximun offset.
20008  */
20009 kern_return_t
20010 vm_map_raise_max_offset(
20011         vm_map_t        map,
20012         vm_map_offset_t new_max_offset)
20013 {
20014         kern_return_t   ret;
20015
20016         vm_map_lock(map);
20017         ret = KERN_INVALID_ADDRESS;
20018
20019         if (new_max_offset >= map->max_offset) {
20020                 if (!vm_map_is_64bit(map)) {
20021                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20022                                 map->max_offset = new_max_offset;
20023                                 ret = KERN_SUCCESS;
20024                         }
20025                 } else {
20026                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20027                                 map->max_offset = new_max_offset;
20028                                 ret = KERN_SUCCESS;
20029                         }
20030                 }
20031         }
20032
20033         vm_map_unlock(map);
20034         return ret;
20035 }
20036
20037
20038 /*
20039  * Raise a VM map's minimum offset.
20040  * To strictly enforce "page zero" reservation.
20041  */
20042 kern_return_t
20043 vm_map_raise_min_offset(
20044         vm_map_t        map,
20045         vm_map_offset_t new_min_offset)
20046 {
20047         vm_map_entry_t  first_entry;
20048
20049         new_min_offset = vm_map_round_page(new_min_offset,
20050             VM_MAP_PAGE_MASK(map));
20051
20052         vm_map_lock(map);
20053
20054         if (new_min_offset < map->min_offset) {
20055                 /*
20056                  * Can't move min_offset backwards, as that would expose
20057                  * a part of the address space that was previously, and for
20058                  * possibly good reasons, inaccessible.
20059                  */
20060                 vm_map_unlock(map);
20061                 return KERN_INVALID_ADDRESS;
20062         }
20063         if (new_min_offset >= map->max_offset) {
20064                 /* can't go beyond the end of the address space */
20065                 vm_map_unlock(map);
20066                 return KERN_INVALID_ADDRESS;
20067         }
20068
20069         first_entry = vm_map_first_entry(map);
20070         if (first_entry != vm_map_to_entry(map) &&
20071             first_entry->vme_start < new_min_offset) {
20072                 /*
20073                  * Some memory was already allocated below the new
20074                  * minimun offset.  It's too late to change it now...
20075                  */
20076                 vm_map_unlock(map);
20077                 return KERN_NO_SPACE;
20078         }
20079
20080         map->min_offset = new_min_offset;
20081
20082         assert(map->holes_list);
20083         map->holes_list->start = new_min_offset;
20084         assert(new_min_offset < map->holes_list->end);
20085
20086         vm_map_unlock(map);
20087
20088         return KERN_SUCCESS;
20089 }
20090
20091 /*
20092  * Set the limit on the maximum amount of user wired memory allowed for this map.
20093  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20094  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
20095  * don't have to reach over to the BSD data structures.
20096  */
20097
20098 void
20099 vm_map_set_user_wire_limit(vm_map_t     map,
20100     vm_size_t    limit)
20101 {
20102         map->user_wire_limit = limit;
20103 }
20104
20105
20106 void
20107 vm_map_switch_protect(vm_map_t     map,
20108     boolean_t    val)
20109 {
20110         vm_map_lock(map);
20111         map->switch_protect = val;
20112         vm_map_unlock(map);
20113 }
20114
20115 extern int cs_process_enforcement_enable;
20116 boolean_t
20117 vm_map_cs_enforcement(
20118         vm_map_t map)
20119 {
20120         if (cs_process_enforcement_enable) {
20121                 return TRUE;
20122         }
20123         return map->cs_enforcement;
20124 }
20125
20126 void
20127 vm_map_cs_enforcement_set(
20128         vm_map_t map,
20129         boolean_t val)
20130 {
20131         vm_map_lock(map);
20132         map->cs_enforcement = val;
20133         pmap_set_vm_map_cs_enforced(map->pmap, val);
20134         vm_map_unlock(map);
20135 }
20136
20137 /*
20138  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20139  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20140  * bump both counters.
20141  */
20142 void
20143 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20144 {
20145         pmap_t pmap = vm_map_pmap(map);
20146
20147         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20148         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20149 }
20150
20151 void
20152 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20153 {
20154         pmap_t pmap = vm_map_pmap(map);
20155
20156         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20157         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20158 }
20159
20160 /* Add (generate) code signature for memory range */
20161 #if CONFIG_DYNAMIC_CODE_SIGNING
20162 kern_return_t
20163 vm_map_sign(vm_map_t map,
20164     vm_map_offset_t start,
20165     vm_map_offset_t end)
20166 {
20167         vm_map_entry_t entry;
20168         vm_page_t m;
20169         vm_object_t object;
20170
20171         /*
20172          * Vet all the input parameters and current type and state of the
20173          * underlaying object.  Return with an error if anything is amiss.
20174          */
20175         if (map == VM_MAP_NULL) {
20176                 return KERN_INVALID_ARGUMENT;
20177         }
20178
20179         vm_map_lock_read(map);
20180
20181         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20182                 /*
20183                  * Must pass a valid non-submap address.
20184                  */
20185                 vm_map_unlock_read(map);
20186                 return KERN_INVALID_ADDRESS;
20187         }
20188
20189         if ((entry->vme_start > start) || (entry->vme_end < end)) {
20190                 /*
20191                  * Map entry doesn't cover the requested range. Not handling
20192                  * this situation currently.
20193                  */
20194                 vm_map_unlock_read(map);
20195                 return KERN_INVALID_ARGUMENT;
20196         }
20197
20198         object = VME_OBJECT(entry);
20199         if (object == VM_OBJECT_NULL) {
20200                 /*
20201                  * Object must already be present or we can't sign.
20202                  */
20203                 vm_map_unlock_read(map);
20204                 return KERN_INVALID_ARGUMENT;
20205         }
20206
20207         vm_object_lock(object);
20208         vm_map_unlock_read(map);
20209
20210         while (start < end) {
20211                 uint32_t refmod;
20212
20213                 m = vm_page_lookup(object,
20214                     start - entry->vme_start + VME_OFFSET(entry));
20215                 if (m == VM_PAGE_NULL) {
20216                         /* shoud we try to fault a page here? we can probably
20217                          * demand it exists and is locked for this request */
20218                         vm_object_unlock(object);
20219                         return KERN_FAILURE;
20220                 }
20221                 /* deal with special page status */
20222                 if (m->vmp_busy ||
20223                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20224                         vm_object_unlock(object);
20225                         return KERN_FAILURE;
20226                 }
20227
20228                 /* Page is OK... now "validate" it */
20229                 /* This is the place where we'll call out to create a code
20230                  * directory, later */
20231                 /* XXX TODO4K: deal with 4k subpages individually? */
20232                 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20233
20234                 /* The page is now "clean" for codesigning purposes. That means
20235                  * we don't consider it as modified (wpmapped) anymore. But
20236                  * we'll disconnect the page so we note any future modification
20237                  * attempts. */
20238                 m->vmp_wpmapped = FALSE;
20239                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20240
20241                 /* Pull the dirty status from the pmap, since we cleared the
20242                  * wpmapped bit */
20243                 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20244                         SET_PAGE_DIRTY(m, FALSE);
20245                 }
20246
20247                 /* On to the next page */
20248                 start += PAGE_SIZE;
20249         }
20250         vm_object_unlock(object);
20251
20252         return KERN_SUCCESS;
20253 }
20254 #endif
20255
20256 kern_return_t
20257 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20258 {
20259         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
20260         vm_map_entry_t next_entry;
20261         kern_return_t   kr = KERN_SUCCESS;
20262         vm_map_t        zap_map;
20263
20264         vm_map_lock(map);
20265
20266         /*
20267          * We use a "zap_map" to avoid having to unlock
20268          * the "map" in vm_map_delete().
20269          */
20270         zap_map = vm_map_create(PMAP_NULL,
20271             map->min_offset,
20272             map->max_offset,
20273             map->hdr.entries_pageable);
20274
20275         if (zap_map == VM_MAP_NULL) {
20276                 return KERN_RESOURCE_SHORTAGE;
20277         }
20278
20279         vm_map_set_page_shift(zap_map,
20280             VM_MAP_PAGE_SHIFT(map));
20281         vm_map_disable_hole_optimization(zap_map);
20282
20283         for (entry = vm_map_first_entry(map);
20284             entry != vm_map_to_entry(map);
20285             entry = next_entry) {
20286                 next_entry = entry->vme_next;
20287
20288                 if (VME_OBJECT(entry) &&
20289                     !entry->is_sub_map &&
20290                     (VME_OBJECT(entry)->internal == TRUE) &&
20291                     (VME_OBJECT(entry)->ref_count == 1)) {
20292                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20293                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20294
20295                         (void)vm_map_delete(map,
20296                             entry->vme_start,
20297                             entry->vme_end,
20298                             VM_MAP_REMOVE_SAVE_ENTRIES,
20299                             zap_map);
20300                 }
20301         }
20302
20303         vm_map_unlock(map);
20304
20305         /*
20306          * Get rid of the "zap_maps" and all the map entries that
20307          * they may still contain.
20308          */
20309         if (zap_map != VM_MAP_NULL) {
20310                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20311                 zap_map = VM_MAP_NULL;
20312         }
20313
20314         return kr;
20315 }
20316
20317
20318 #if DEVELOPMENT || DEBUG
20319
20320 int
20321 vm_map_disconnect_page_mappings(
20322         vm_map_t map,
20323         boolean_t do_unnest)
20324 {
20325         vm_map_entry_t entry;
20326         int     page_count = 0;
20327
20328         if (do_unnest == TRUE) {
20329 #ifndef NO_NESTED_PMAP
20330                 vm_map_lock(map);
20331
20332                 for (entry = vm_map_first_entry(map);
20333                     entry != vm_map_to_entry(map);
20334                     entry = entry->vme_next) {
20335                         if (entry->is_sub_map && entry->use_pmap) {
20336                                 /*
20337                                  * Make sure the range between the start of this entry and
20338                                  * the end of this entry is no longer nested, so that
20339                                  * we will only remove mappings from the pmap in use by this
20340                                  * this task
20341                                  */
20342                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20343                         }
20344                 }
20345                 vm_map_unlock(map);
20346 #endif
20347         }
20348         vm_map_lock_read(map);
20349
20350         page_count = map->pmap->stats.resident_count;
20351
20352         for (entry = vm_map_first_entry(map);
20353             entry != vm_map_to_entry(map);
20354             entry = entry->vme_next) {
20355                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20356                     (VME_OBJECT(entry)->phys_contiguous))) {
20357                         continue;
20358                 }
20359                 if (entry->is_sub_map) {
20360                         assert(!entry->use_pmap);
20361                 }
20362
20363                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20364         }
20365         vm_map_unlock_read(map);
20366
20367         return page_count;
20368 }
20369
20370 kern_return_t
20371 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20372 {
20373         vm_object_t object = NULL;
20374         vm_object_offset_t offset;
20375         vm_prot_t prot;
20376         boolean_t wired;
20377         vm_map_version_t version;
20378         vm_map_t real_map;
20379         int result = KERN_FAILURE;
20380
20381         vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20382         vm_map_lock(map);
20383
20384         result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20385             OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20386             NULL, &real_map, NULL);
20387         if (object == NULL) {
20388                 result = KERN_MEMORY_ERROR;
20389         } else if (object->pager) {
20390                 result = vm_compressor_pager_inject_error(object->pager,
20391                     offset);
20392         } else {
20393                 result = KERN_MEMORY_PRESENT;
20394         }
20395
20396         if (object != NULL) {
20397                 vm_object_unlock(object);
20398         }
20399
20400         if (real_map != map) {
20401                 vm_map_unlock(real_map);
20402         }
20403         vm_map_unlock(map);
20404
20405         return result;
20406 }
20407
20408 #endif
20409
20410
20411 #if CONFIG_FREEZE
20412
20413
20414 extern struct freezer_context freezer_context_global;
20415 AbsoluteTime c_freezer_last_yield_ts = 0;
20416
20417 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20418 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20419
20420 kern_return_t
20421 vm_map_freeze(
20422         task_t       task,
20423         unsigned int *purgeable_count,
20424         unsigned int *wired_count,
20425         unsigned int *clean_count,
20426         unsigned int *dirty_count,
20427         unsigned int dirty_budget,
20428         unsigned int *shared_count,
20429         int          *freezer_error_code,
20430         boolean_t    eval_only)
20431 {
20432         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
20433         kern_return_t   kr = KERN_SUCCESS;
20434         boolean_t       evaluation_phase = TRUE;
20435         vm_object_t     cur_shared_object = NULL;
20436         int             cur_shared_obj_ref_cnt = 0;
20437         unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20438
20439         *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20440
20441         /*
20442          * We need the exclusive lock here so that we can
20443          * block any page faults or lookups while we are
20444          * in the middle of freezing this vm map.
20445          */
20446         vm_map_t map = task->map;
20447
20448         vm_map_lock(map);
20449
20450         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20451
20452         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20453                 if (vm_compressor_low_on_space()) {
20454                         *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20455                 }
20456
20457                 if (vm_swap_low_on_space()) {
20458                         *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20459                 }
20460
20461                 kr = KERN_NO_SPACE;
20462                 goto done;
20463         }
20464
20465         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20466                 /*
20467                  * In-memory compressor backing the freezer. No disk.
20468                  * So no need to do the evaluation phase.
20469                  */
20470                 evaluation_phase = FALSE;
20471
20472                 if (eval_only == TRUE) {
20473                         /*
20474                          * We don't support 'eval_only' mode
20475                          * in this non-swap config.
20476                          */
20477                         *freezer_error_code = FREEZER_ERROR_GENERIC;
20478                         kr = KERN_INVALID_ARGUMENT;
20479                         goto done;
20480                 }
20481
20482                 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20483                 clock_get_uptime(&c_freezer_last_yield_ts);
20484         }
20485 again:
20486
20487         for (entry2 = vm_map_first_entry(map);
20488             entry2 != vm_map_to_entry(map);
20489             entry2 = entry2->vme_next) {
20490                 vm_object_t     src_object = VME_OBJECT(entry2);
20491
20492                 if (src_object &&
20493                     !entry2->is_sub_map &&
20494                     !src_object->phys_contiguous) {
20495                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
20496
20497                         if (src_object->internal == TRUE) {
20498                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20499                                         /*
20500                                          * We skip purgeable objects during evaluation phase only.
20501                                          * If we decide to freeze this process, we'll explicitly
20502                                          * purge these objects before we go around again with
20503                                          * 'evaluation_phase' set to FALSE.
20504                                          */
20505
20506                                         if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20507                                                 /*
20508                                                  * We want to purge objects that may not belong to this task but are mapped
20509                                                  * in this task alone. Since we already purged this task's purgeable memory
20510                                                  * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20511                                                  * on this task's purgeable objects. Hence the check for only volatile objects.
20512                                                  */
20513                                                 if (evaluation_phase == FALSE &&
20514                                                     (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20515                                                     (src_object->ref_count == 1)) {
20516                                                         vm_object_lock(src_object);
20517                                                         vm_object_purge(src_object, 0);
20518                                                         vm_object_unlock(src_object);
20519                                                 }
20520                                                 continue;
20521                                         }
20522
20523                                         /*
20524                                          * Pages belonging to this object could be swapped to disk.
20525                                          * Make sure it's not a shared object because we could end
20526                                          * up just bringing it back in again.
20527                                          *
20528                                          * We try to optimize somewhat by checking for objects that are mapped
20529                                          * more than once within our own map. But we don't do full searches,
20530                                          * we just look at the entries following our current entry.
20531                                          */
20532
20533                                         if (src_object->ref_count > 1) {
20534                                                 if (src_object != cur_shared_object) {
20535                                                         obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20536                                                         dirty_shared_count += obj_pages_snapshot;
20537
20538                                                         cur_shared_object = src_object;
20539                                                         cur_shared_obj_ref_cnt = 1;
20540                                                         continue;
20541                                                 } else {
20542                                                         cur_shared_obj_ref_cnt++;
20543                                                         if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20544                                                                 /*
20545                                                                  * Fall through to below and treat this object as private.
20546                                                                  * So deduct its pages from our shared total and add it to the
20547                                                                  * private total.
20548                                                                  */
20549
20550                                                                 dirty_shared_count -= obj_pages_snapshot;
20551                                                                 dirty_private_count += obj_pages_snapshot;
20552                                                         } else {
20553                                                                 continue;
20554                                                         }
20555                                                 }
20556                                         }
20557
20558
20559                                         if (src_object->ref_count == 1) {
20560                                                 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20561                                         }
20562
20563                                         if (evaluation_phase == TRUE) {
20564                                                 continue;
20565                                         }
20566                                 }
20567
20568                                 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20569                                 *wired_count += src_object->wired_page_count;
20570
20571                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20572                                         if (vm_compressor_low_on_space()) {
20573                                                 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20574                                         }
20575
20576                                         if (vm_swap_low_on_space()) {
20577                                                 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20578                                         }
20579
20580                                         kr = KERN_NO_SPACE;
20581                                         break;
20582                                 }
20583                                 if (paged_out_count >= dirty_budget) {
20584                                         break;
20585                                 }
20586                                 dirty_budget -= paged_out_count;
20587                         }
20588                 }
20589         }
20590
20591         *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20592         if (evaluation_phase) {
20593                 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20594
20595                 if (dirty_shared_count > shared_pages_threshold) {
20596                         *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20597                         kr = KERN_FAILURE;
20598                         goto done;
20599                 }
20600
20601                 if (dirty_shared_count &&
20602                     ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20603                         *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20604                         kr = KERN_FAILURE;
20605                         goto done;
20606                 }
20607
20608                 evaluation_phase = FALSE;
20609                 dirty_shared_count = dirty_private_count = 0;
20610
20611                 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20612                 clock_get_uptime(&c_freezer_last_yield_ts);
20613
20614                 if (eval_only) {
20615                         kr = KERN_SUCCESS;
20616                         goto done;
20617                 }
20618
20619                 vm_purgeable_purge_task_owned(task);
20620
20621                 goto again;
20622         } else {
20623                 kr = KERN_SUCCESS;
20624         }
20625
20626 done:
20627         vm_map_unlock(map);
20628
20629         if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20630                 vm_object_compressed_freezer_done();
20631         }
20632         return kr;
20633 }
20634
20635 #endif
20636
20637 /*
20638  * vm_map_entry_should_cow_for_true_share:
20639  *
20640  * Determines if the map entry should be clipped and setup for copy-on-write
20641  * to avoid applying "true_share" to a large VM object when only a subset is
20642  * targeted.
20643  *
20644  * For now, we target only the map entries created for the Objective C
20645  * Garbage Collector, which initially have the following properties:
20646  *      - alias == VM_MEMORY_MALLOC
20647  *      - wired_count == 0
20648  *      - !needs_copy
20649  * and a VM object with:
20650  *      - internal
20651  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20652  *      - !true_share
20653  *      - vo_size == ANON_CHUNK_SIZE
20654  *
20655  * Only non-kernel map entries.
20656  */
20657 boolean_t
20658 vm_map_entry_should_cow_for_true_share(
20659         vm_map_entry_t  entry)
20660 {
20661         vm_object_t     object;
20662
20663         if (entry->is_sub_map) {
20664                 /* entry does not point at a VM object */
20665                 return FALSE;
20666         }
20667
20668         if (entry->needs_copy) {
20669                 /* already set for copy_on_write: done! */
20670                 return FALSE;
20671         }
20672
20673         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20674             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20675                 /* not a malloc heap or Obj-C Garbage Collector heap */
20676                 return FALSE;
20677         }
20678
20679         if (entry->wired_count) {
20680                 /* wired: can't change the map entry... */
20681                 vm_counters.should_cow_but_wired++;
20682                 return FALSE;
20683         }
20684
20685         object = VME_OBJECT(entry);
20686
20687         if (object == VM_OBJECT_NULL) {
20688                 /* no object yet... */
20689                 return FALSE;
20690         }
20691
20692         if (!object->internal) {
20693                 /* not an internal object */
20694                 return FALSE;
20695         }
20696
20697         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20698                 /* not the default copy strategy */
20699                 return FALSE;
20700         }
20701
20702         if (object->true_share) {
20703                 /* already true_share: too late to avoid it */
20704                 return FALSE;
20705         }
20706
20707         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20708             object->vo_size != ANON_CHUNK_SIZE) {
20709                 /* ... not an object created for the ObjC Garbage Collector */
20710                 return FALSE;
20711         }
20712
20713         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20714             object->vo_size != 2048 * 4096) {
20715                 /* ... not a "MALLOC_SMALL" heap */
20716                 return FALSE;
20717         }
20718
20719         /*
20720          * All the criteria match: we have a large object being targeted for "true_share".
20721          * To limit the adverse side-effects linked with "true_share", tell the caller to
20722          * try and avoid setting up the entire object for "true_share" by clipping the
20723          * targeted range and setting it up for copy-on-write.
20724          */
20725         return TRUE;
20726 }
20727
20728 vm_map_offset_t
20729 vm_map_round_page_mask(
20730         vm_map_offset_t offset,
20731         vm_map_offset_t mask)
20732 {
20733         return VM_MAP_ROUND_PAGE(offset, mask);
20734 }
20735
20736 vm_map_offset_t
20737 vm_map_trunc_page_mask(
20738         vm_map_offset_t offset,
20739         vm_map_offset_t mask)
20740 {
20741         return VM_MAP_TRUNC_PAGE(offset, mask);
20742 }
20743
20744 boolean_t
20745 vm_map_page_aligned(
20746         vm_map_offset_t offset,
20747         vm_map_offset_t mask)
20748 {
20749         return ((offset) & mask) == 0;
20750 }
20751
20752 int
20753 vm_map_page_shift(
20754         vm_map_t map)
20755 {
20756         return VM_MAP_PAGE_SHIFT(map);
20757 }
20758
20759 int
20760 vm_map_page_size(
20761         vm_map_t map)
20762 {
20763         return VM_MAP_PAGE_SIZE(map);
20764 }
20765
20766 vm_map_offset_t
20767 vm_map_page_mask(
20768         vm_map_t map)
20769 {
20770         return VM_MAP_PAGE_MASK(map);
20771 }
20772
20773 kern_return_t
20774 vm_map_set_page_shift(
20775         vm_map_t        map,
20776         int             pageshift)
20777 {
20778         if (map->hdr.nentries != 0) {
20779                 /* too late to change page size */
20780                 return KERN_FAILURE;
20781         }
20782
20783         map->hdr.page_shift = pageshift;
20784
20785         return KERN_SUCCESS;
20786 }
20787
20788 kern_return_t
20789 vm_map_query_volatile(
20790         vm_map_t        map,
20791         mach_vm_size_t  *volatile_virtual_size_p,
20792         mach_vm_size_t  *volatile_resident_size_p,
20793         mach_vm_size_t  *volatile_compressed_size_p,
20794         mach_vm_size_t  *volatile_pmap_size_p,
20795         mach_vm_size_t  *volatile_compressed_pmap_size_p)
20796 {
20797         mach_vm_size_t  volatile_virtual_size;
20798         mach_vm_size_t  volatile_resident_count;
20799         mach_vm_size_t  volatile_compressed_count;
20800         mach_vm_size_t  volatile_pmap_count;
20801         mach_vm_size_t  volatile_compressed_pmap_count;
20802         mach_vm_size_t  resident_count;
20803         vm_map_entry_t  entry;
20804         vm_object_t     object;
20805
20806         /* map should be locked by caller */
20807
20808         volatile_virtual_size = 0;
20809         volatile_resident_count = 0;
20810         volatile_compressed_count = 0;
20811         volatile_pmap_count = 0;
20812         volatile_compressed_pmap_count = 0;
20813
20814         for (entry = vm_map_first_entry(map);
20815             entry != vm_map_to_entry(map);
20816             entry = entry->vme_next) {
20817                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
20818
20819                 if (entry->is_sub_map) {
20820                         continue;
20821                 }
20822                 if (!(entry->protection & VM_PROT_WRITE)) {
20823                         continue;
20824                 }
20825                 object = VME_OBJECT(entry);
20826                 if (object == VM_OBJECT_NULL) {
20827                         continue;
20828                 }
20829                 if (object->purgable != VM_PURGABLE_VOLATILE &&
20830                     object->purgable != VM_PURGABLE_EMPTY) {
20831                         continue;
20832                 }
20833                 if (VME_OFFSET(entry)) {
20834                         /*
20835                          * If the map entry has been split and the object now
20836                          * appears several times in the VM map, we don't want
20837                          * to count the object's resident_page_count more than
20838                          * once.  We count it only for the first one, starting
20839                          * at offset 0 and ignore the other VM map entries.
20840                          */
20841                         continue;
20842                 }
20843                 resident_count = object->resident_page_count;
20844                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
20845                         resident_count = 0;
20846                 } else {
20847                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
20848                 }
20849
20850                 volatile_virtual_size += entry->vme_end - entry->vme_start;
20851                 volatile_resident_count += resident_count;
20852                 if (object->pager) {
20853                         volatile_compressed_count +=
20854                             vm_compressor_pager_get_count(object->pager);
20855                 }
20856                 pmap_compressed_bytes = 0;
20857                 pmap_resident_bytes =
20858                     pmap_query_resident(map->pmap,
20859                     entry->vme_start,
20860                     entry->vme_end,
20861                     &pmap_compressed_bytes);
20862                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
20863                 volatile_compressed_pmap_count += (pmap_compressed_bytes
20864                     / PAGE_SIZE);
20865         }
20866
20867         /* map is still locked on return */
20868
20869         *volatile_virtual_size_p = volatile_virtual_size;
20870         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
20871         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
20872         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
20873         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
20874
20875         return KERN_SUCCESS;
20876 }
20877
20878 void
20879 vm_map_sizes(vm_map_t map,
20880     vm_map_size_t * psize,
20881     vm_map_size_t * pfree,
20882     vm_map_size_t * plargest_free)
20883 {
20884         vm_map_entry_t  entry;
20885         vm_map_offset_t prev;
20886         vm_map_size_t   free, total_free, largest_free;
20887         boolean_t       end;
20888
20889         if (!map) {
20890                 *psize = *pfree = *plargest_free = 0;
20891                 return;
20892         }
20893         total_free = largest_free = 0;
20894
20895         vm_map_lock_read(map);
20896         if (psize) {
20897                 *psize = map->max_offset - map->min_offset;
20898         }
20899
20900         prev = map->min_offset;
20901         for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
20902                 end = (entry == vm_map_to_entry(map));
20903
20904                 if (end) {
20905                         free = entry->vme_end   - prev;
20906                 } else {
20907                         free = entry->vme_start - prev;
20908                 }
20909
20910                 total_free += free;
20911                 if (free > largest_free) {
20912                         largest_free = free;
20913                 }
20914
20915                 if (end) {
20916                         break;
20917                 }
20918                 prev = entry->vme_end;
20919         }
20920         vm_map_unlock_read(map);
20921         if (pfree) {
20922                 *pfree = total_free;
20923         }
20924         if (plargest_free) {
20925                 *plargest_free = largest_free;
20926         }
20927 }
20928
20929 #if VM_SCAN_FOR_SHADOW_CHAIN
20930 int vm_map_shadow_max(vm_map_t map);
20931 int
20932 vm_map_shadow_max(
20933         vm_map_t map)
20934 {
20935         int             shadows, shadows_max;
20936         vm_map_entry_t  entry;
20937         vm_object_t     object, next_object;
20938
20939         if (map == NULL) {
20940                 return 0;
20941         }
20942
20943         shadows_max = 0;
20944
20945         vm_map_lock_read(map);
20946
20947         for (entry = vm_map_first_entry(map);
20948             entry != vm_map_to_entry(map);
20949             entry = entry->vme_next) {
20950                 if (entry->is_sub_map) {
20951                         continue;
20952                 }
20953                 object = VME_OBJECT(entry);
20954                 if (object == NULL) {
20955                         continue;
20956                 }
20957                 vm_object_lock_shared(object);
20958                 for (shadows = 0;
20959                     object->shadow != NULL;
20960                     shadows++, object = next_object) {
20961                         next_object = object->shadow;
20962                         vm_object_lock_shared(next_object);
20963                         vm_object_unlock(object);
20964                 }
20965                 vm_object_unlock(object);
20966                 if (shadows > shadows_max) {
20967                         shadows_max = shadows;
20968                 }
20969         }
20970
20971         vm_map_unlock_read(map);
20972
20973         return shadows_max;
20974 }
20975 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
20976
20977 void
20978 vm_commit_pagezero_status(vm_map_t lmap)
20979 {
20980         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
20981 }
20982
20983 #if XNU_TARGET_OS_OSX
20984 void
20985 vm_map_set_high_start(
20986         vm_map_t        map,
20987         vm_map_offset_t high_start)
20988 {
20989         map->vmmap_high_start = high_start;
20990 }
20991 #endif /* XNU_TARGET_OS_OSX */
20992
20993
20994 /*
20995  * FORKED CORPSE FOOTPRINT
20996  *
20997  * A forked corpse gets a copy of the original VM map but its pmap is mostly
20998  * empty since it never ran and never got to fault in any pages.
20999  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21000  * a forked corpse would therefore return very little information.
21001  *
21002  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21003  * to vm_map_fork() to collect footprint information from the original VM map
21004  * and its pmap, and store it in the forked corpse's VM map.  That information
21005  * is stored in place of the VM map's "hole list" since we'll never need to
21006  * lookup for holes in the corpse's map.
21007  *
21008  * The corpse's footprint info looks like this:
21009  *
21010  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21011  * as follows:
21012  *                     +---------------------------------------+
21013  *            header-> | cf_size                               |
21014  *                     +-------------------+-------------------+
21015  *                     | cf_last_region    | cf_last_zeroes    |
21016  *                     +-------------------+-------------------+
21017  *           region1-> | cfr_vaddr                             |
21018  *                     +-------------------+-------------------+
21019  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
21020  *                     +---------------------------------------+
21021  *                     | d4 | d5 | ...                         |
21022  *                     +---------------------------------------+
21023  *                     | ...                                   |
21024  *                     +-------------------+-------------------+
21025  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
21026  *                     +-------------------+-------------------+
21027  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
21028  *                     +---------------------------------------+
21029  *                     | d0 | d1 ...                           |
21030  *                     +---------------------------------------+
21031  *                       ...
21032  *                     +---------------------------------------+
21033  *       last region-> | cfr_vaddr                             |
21034  *                     +---------------------------------------+
21035  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
21036  *                     +---------------------------------------+
21037  *                       ...
21038  *                     +---------------------------------------+
21039  *                     | dx | dy | dz | na | na | na | na | na |
21040  *                     +---------------------------------------+
21041  *
21042  * where:
21043  *      cf_size:        total size of the buffer (rounded to page size)
21044  *      cf_last_region: offset in the buffer of the last "region" sub-header
21045  *      cf_last_zeroes: number of trailing "zero" dispositions at the end
21046  *                      of last region
21047  *      cfr_vaddr:      virtual address of the start of the covered "region"
21048  *      cfr_num_pages:  number of pages in the covered "region"
21049  *      d*:             disposition of the page at that virtual address
21050  * Regions in the buffer are word-aligned.
21051  *
21052  * We estimate the size of the buffer based on the number of memory regions
21053  * and the virtual size of the address space.  While copying each memory region
21054  * during vm_map_fork(), we also collect the footprint info for that region
21055  * and store it in the buffer, packing it as much as possible (coalescing
21056  * contiguous memory regions to avoid having too many region headers and
21057  * avoiding long streaks of "zero" page dispositions by splitting footprint
21058  * "regions", so the number of regions in the footprint buffer might not match
21059  * the number of memory regions in the address space.
21060  *
21061  * We also have to copy the original task's "nonvolatile" ledgers since that's
21062  * part of the footprint and will need to be reported to any tool asking for
21063  * the footprint information of the forked corpse.
21064  */
21065
21066 uint64_t vm_map_corpse_footprint_count = 0;
21067 uint64_t vm_map_corpse_footprint_size_avg = 0;
21068 uint64_t vm_map_corpse_footprint_size_max = 0;
21069 uint64_t vm_map_corpse_footprint_full = 0;
21070 uint64_t vm_map_corpse_footprint_no_buf = 0;
21071
21072 struct vm_map_corpse_footprint_header {
21073         vm_size_t       cf_size;        /* allocated buffer size */
21074         uint32_t        cf_last_region; /* offset of last region in buffer */
21075         union {
21076                 uint32_t cfu_last_zeroes; /* during creation:
21077                                            * number of "zero" dispositions at
21078                                            * end of last region */
21079                 uint32_t cfu_hint_region; /* during lookup:
21080                                            * offset of last looked up region */
21081 #define cf_last_zeroes cfu.cfu_last_zeroes
21082 #define cf_hint_region cfu.cfu_hint_region
21083         } cfu;
21084 };
21085 typedef uint8_t cf_disp_t;
21086 struct vm_map_corpse_footprint_region {
21087         vm_map_offset_t cfr_vaddr;      /* region start virtual address */
21088         uint32_t        cfr_num_pages;  /* number of pages in this "region" */
21089         cf_disp_t   cfr_disposition[0]; /* disposition of each page */
21090 } __attribute__((packed));
21091
21092 static cf_disp_t
21093 vm_page_disposition_to_cf_disp(
21094         int disposition)
21095 {
21096         assert(sizeof(cf_disp_t) == 1);
21097         /* relocate bits that don't fit in a "uint8_t" */
21098         if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21099                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21100         }
21101         /* cast gets rid of extra bits */
21102         return (cf_disp_t) disposition;
21103 }
21104
21105 static int
21106 vm_page_cf_disp_to_disposition(
21107         cf_disp_t cf_disp)
21108 {
21109         int disposition;
21110
21111         assert(sizeof(cf_disp_t) == 1);
21112         disposition = (int) cf_disp;
21113         /* move relocated bits back in place */
21114         if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21115                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21116                 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21117         }
21118         return disposition;
21119 }
21120
21121 /*
21122  * vm_map_corpse_footprint_new_region:
21123  *      closes the current footprint "region" and creates a new one
21124  *
21125  * Returns NULL if there's not enough space in the buffer for a new region.
21126  */
21127 static struct vm_map_corpse_footprint_region *
21128 vm_map_corpse_footprint_new_region(
21129         struct vm_map_corpse_footprint_header *footprint_header)
21130 {
21131         uintptr_t       footprint_edge;
21132         uint32_t        new_region_offset;
21133         struct vm_map_corpse_footprint_region *footprint_region;
21134         struct vm_map_corpse_footprint_region *new_footprint_region;
21135
21136         footprint_edge = ((uintptr_t)footprint_header +
21137             footprint_header->cf_size);
21138         footprint_region = ((struct vm_map_corpse_footprint_region *)
21139             ((char *)footprint_header +
21140             footprint_header->cf_last_region));
21141         assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21142             footprint_edge);
21143
21144         /* get rid of trailing zeroes in the last region */
21145         assert(footprint_region->cfr_num_pages >=
21146             footprint_header->cf_last_zeroes);
21147         footprint_region->cfr_num_pages -=
21148             footprint_header->cf_last_zeroes;
21149         footprint_header->cf_last_zeroes = 0;
21150
21151         /* reuse this region if it's now empty */
21152         if (footprint_region->cfr_num_pages == 0) {
21153                 return footprint_region;
21154         }
21155
21156         /* compute offset of new region */
21157         new_region_offset = footprint_header->cf_last_region;
21158         new_region_offset += sizeof(*footprint_region);
21159         new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21160         new_region_offset = roundup(new_region_offset, sizeof(int));
21161
21162         /* check if we're going over the edge */
21163         if (((uintptr_t)footprint_header +
21164             new_region_offset +
21165             sizeof(*footprint_region)) >=
21166             footprint_edge) {
21167                 /* over the edge: no new region */
21168                 return NULL;
21169         }
21170
21171         /* adjust offset of last region in header */
21172         footprint_header->cf_last_region = new_region_offset;
21173
21174         new_footprint_region = (struct vm_map_corpse_footprint_region *)
21175             ((char *)footprint_header +
21176             footprint_header->cf_last_region);
21177         new_footprint_region->cfr_vaddr = 0;
21178         new_footprint_region->cfr_num_pages = 0;
21179         /* caller needs to initialize new region */
21180
21181         return new_footprint_region;
21182 }
21183
21184 /*
21185  * vm_map_corpse_footprint_collect:
21186  *      collect footprint information for "old_entry" in "old_map" and
21187  *      stores it in "new_map"'s vmmap_footprint_info.
21188  */
21189 kern_return_t
21190 vm_map_corpse_footprint_collect(
21191         vm_map_t        old_map,
21192         vm_map_entry_t  old_entry,
21193         vm_map_t        new_map)
21194 {
21195         vm_map_offset_t va;
21196         kern_return_t   kr;
21197         struct vm_map_corpse_footprint_header *footprint_header;
21198         struct vm_map_corpse_footprint_region *footprint_region;
21199         struct vm_map_corpse_footprint_region *new_footprint_region;
21200         cf_disp_t       *next_disp_p;
21201         uintptr_t       footprint_edge;
21202         uint32_t        num_pages_tmp;
21203         int             effective_page_size;
21204
21205         effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21206
21207         va = old_entry->vme_start;
21208
21209         vm_map_lock_assert_exclusive(old_map);
21210         vm_map_lock_assert_exclusive(new_map);
21211
21212         assert(new_map->has_corpse_footprint);
21213         assert(!old_map->has_corpse_footprint);
21214         if (!new_map->has_corpse_footprint ||
21215             old_map->has_corpse_footprint) {
21216                 /*
21217                  * This can only transfer footprint info from a
21218                  * map with a live pmap to a map with a corpse footprint.
21219                  */
21220                 return KERN_NOT_SUPPORTED;
21221         }
21222
21223         if (new_map->vmmap_corpse_footprint == NULL) {
21224                 vm_offset_t     buf;
21225                 vm_size_t       buf_size;
21226
21227                 buf = 0;
21228                 buf_size = (sizeof(*footprint_header) +
21229                     (old_map->hdr.nentries
21230                     *
21231                     (sizeof(*footprint_region) +
21232                     +3))            /* potential alignment for each region */
21233                     +
21234                     ((old_map->size / effective_page_size)
21235                     *
21236                     sizeof(cf_disp_t)));      /* disposition for each page */
21237 //              printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21238                 buf_size = round_page(buf_size);
21239
21240                 /* limit buffer to 1 page to validate overflow detection */
21241 //              buf_size = PAGE_SIZE;
21242
21243                 /* limit size to a somewhat sane amount */
21244 #if XNU_TARGET_OS_OSX
21245 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
21246 #else /* XNU_TARGET_OS_OSX */
21247 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
21248 #endif /* XNU_TARGET_OS_OSX */
21249                 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21250                         buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21251                 }
21252
21253                 /*
21254                  * Allocate the pageable buffer (with a trailing guard page).
21255                  * It will be zero-filled on demand.
21256                  */
21257                 kr = kernel_memory_allocate(kernel_map,
21258                     &buf,
21259                     (buf_size
21260                     + PAGE_SIZE),                          /* trailing guard page */
21261                     0,                         /* mask */
21262                     KMA_PAGEABLE | KMA_GUARD_LAST,
21263                     VM_KERN_MEMORY_DIAG);
21264                 if (kr != KERN_SUCCESS) {
21265                         vm_map_corpse_footprint_no_buf++;
21266                         return kr;
21267                 }
21268
21269                 /* initialize header and 1st region */
21270                 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21271                 new_map->vmmap_corpse_footprint = footprint_header;
21272
21273                 footprint_header->cf_size = buf_size;
21274                 footprint_header->cf_last_region =
21275                     sizeof(*footprint_header);
21276                 footprint_header->cf_last_zeroes = 0;
21277
21278                 footprint_region = (struct vm_map_corpse_footprint_region *)
21279                     ((char *)footprint_header +
21280                     footprint_header->cf_last_region);
21281                 footprint_region->cfr_vaddr = 0;
21282                 footprint_region->cfr_num_pages = 0;
21283         } else {
21284                 /* retrieve header and last region */
21285                 footprint_header = (struct vm_map_corpse_footprint_header *)
21286                     new_map->vmmap_corpse_footprint;
21287                 footprint_region = (struct vm_map_corpse_footprint_region *)
21288                     ((char *)footprint_header +
21289                     footprint_header->cf_last_region);
21290         }
21291         footprint_edge = ((uintptr_t)footprint_header +
21292             footprint_header->cf_size);
21293
21294         if ((footprint_region->cfr_vaddr +
21295             (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21296             effective_page_size))
21297             != old_entry->vme_start) {
21298                 uint64_t num_pages_delta, num_pages_delta_size;
21299                 uint32_t region_offset_delta_size;
21300
21301                 /*
21302                  * Not the next contiguous virtual address:
21303                  * start a new region or store "zero" dispositions for
21304                  * the missing pages?
21305                  */
21306                 /* size of gap in actual page dispositions */
21307                 num_pages_delta = ((old_entry->vme_start -
21308                     footprint_region->cfr_vaddr) / effective_page_size)
21309                     - footprint_region->cfr_num_pages;
21310                 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21311                 /* size of gap as a new footprint region header */
21312                 region_offset_delta_size =
21313                     (sizeof(*footprint_region) +
21314                     roundup(((footprint_region->cfr_num_pages -
21315                     footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21316                     sizeof(int)) -
21317                     ((footprint_region->cfr_num_pages -
21318                     footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21319 //              printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21320                 if (region_offset_delta_size < num_pages_delta_size ||
21321                     os_add3_overflow(footprint_region->cfr_num_pages,
21322                     (uint32_t) num_pages_delta,
21323                     1,
21324                     &num_pages_tmp)) {
21325                         /*
21326                          * Storing data for this gap would take more space
21327                          * than inserting a new footprint region header:
21328                          * let's start a new region and save space. If it's a
21329                          * tie, let's avoid using a new region, since that
21330                          * would require more region hops to find the right
21331                          * range during lookups.
21332                          *
21333                          * If the current region's cfr_num_pages would overflow
21334                          * if we added "zero" page dispositions for the gap,
21335                          * no choice but to start a new region.
21336                          */
21337 //                      printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21338                         new_footprint_region =
21339                             vm_map_corpse_footprint_new_region(footprint_header);
21340                         /* check that we're not going over the edge */
21341                         if (new_footprint_region == NULL) {
21342                                 goto over_the_edge;
21343                         }
21344                         footprint_region = new_footprint_region;
21345                         /* initialize new region as empty */
21346                         footprint_region->cfr_vaddr = old_entry->vme_start;
21347                         footprint_region->cfr_num_pages = 0;
21348                 } else {
21349                         /*
21350                          * Store "zero" page dispositions for the missing
21351                          * pages.
21352                          */
21353 //                      printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21354                         for (; num_pages_delta > 0; num_pages_delta--) {
21355                                 next_disp_p = (cf_disp_t *)
21356                                     ((uintptr_t) footprint_region +
21357                                     sizeof(*footprint_region));
21358                                 next_disp_p += footprint_region->cfr_num_pages;
21359                                 /* check that we're not going over the edge */
21360                                 if ((uintptr_t)next_disp_p >= footprint_edge) {
21361                                         goto over_the_edge;
21362                                 }
21363                                 /* store "zero" disposition for this gap page */
21364                                 footprint_region->cfr_num_pages++;
21365                                 *next_disp_p = (cf_disp_t) 0;
21366                                 footprint_header->cf_last_zeroes++;
21367                         }
21368                 }
21369         }
21370
21371         for (va = old_entry->vme_start;
21372             va < old_entry->vme_end;
21373             va += effective_page_size) {
21374                 int             disposition;
21375                 cf_disp_t       cf_disp;
21376
21377                 vm_map_footprint_query_page_info(old_map,
21378                     old_entry,
21379                     va,
21380                     &disposition);
21381                 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21382
21383 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21384
21385                 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21386                         /*
21387                          * Ignore "zero" dispositions at start of
21388                          * region: just move start of region.
21389                          */
21390                         footprint_region->cfr_vaddr += effective_page_size;
21391                         continue;
21392                 }
21393
21394                 /* would region's cfr_num_pages overflow? */
21395                 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21396                     &num_pages_tmp)) {
21397                         /* overflow: create a new region */
21398                         new_footprint_region =
21399                             vm_map_corpse_footprint_new_region(
21400                                 footprint_header);
21401                         if (new_footprint_region == NULL) {
21402                                 goto over_the_edge;
21403                         }
21404                         footprint_region = new_footprint_region;
21405                         footprint_region->cfr_vaddr = va;
21406                         footprint_region->cfr_num_pages = 0;
21407                 }
21408
21409                 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21410                     sizeof(*footprint_region));
21411                 next_disp_p += footprint_region->cfr_num_pages;
21412                 /* check that we're not going over the edge */
21413                 if ((uintptr_t)next_disp_p >= footprint_edge) {
21414                         goto over_the_edge;
21415                 }
21416                 /* store this dispostion */
21417                 *next_disp_p = cf_disp;
21418                 footprint_region->cfr_num_pages++;
21419
21420                 if (cf_disp != 0) {
21421                         /* non-zero disp: break the current zero streak */
21422                         footprint_header->cf_last_zeroes = 0;
21423                         /* done */
21424                         continue;
21425                 }
21426
21427                 /* zero disp: add to the current streak of zeroes */
21428                 footprint_header->cf_last_zeroes++;
21429                 if ((footprint_header->cf_last_zeroes +
21430                     roundup(((footprint_region->cfr_num_pages -
21431                     footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21432                     (sizeof(int) - 1),
21433                     sizeof(int))) <
21434                     (sizeof(*footprint_header))) {
21435                         /*
21436                          * There are not enough trailing "zero" dispositions
21437                          * (+ the extra padding we would need for the previous
21438                          * region); creating a new region would not save space
21439                          * at this point, so let's keep this "zero" disposition
21440                          * in this region and reconsider later.
21441                          */
21442                         continue;
21443                 }
21444                 /*
21445                  * Create a new region to avoid having too many consecutive
21446                  * "zero" dispositions.
21447                  */
21448                 new_footprint_region =
21449                     vm_map_corpse_footprint_new_region(footprint_header);
21450                 if (new_footprint_region == NULL) {
21451                         goto over_the_edge;
21452                 }
21453                 footprint_region = new_footprint_region;
21454                 /* initialize the new region as empty ... */
21455                 footprint_region->cfr_num_pages = 0;
21456                 /* ... and skip this "zero" disp */
21457                 footprint_region->cfr_vaddr = va + effective_page_size;
21458         }
21459
21460         return KERN_SUCCESS;
21461
21462 over_the_edge:
21463 //      printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21464         vm_map_corpse_footprint_full++;
21465         return KERN_RESOURCE_SHORTAGE;
21466 }
21467
21468 /*
21469  * vm_map_corpse_footprint_collect_done:
21470  *      completes the footprint collection by getting rid of any remaining
21471  *      trailing "zero" dispositions and trimming the unused part of the
21472  *      kernel buffer
21473  */
21474 void
21475 vm_map_corpse_footprint_collect_done(
21476         vm_map_t        new_map)
21477 {
21478         struct vm_map_corpse_footprint_header *footprint_header;
21479         struct vm_map_corpse_footprint_region *footprint_region;
21480         vm_size_t       buf_size, actual_size;
21481         kern_return_t   kr;
21482
21483         assert(new_map->has_corpse_footprint);
21484         if (!new_map->has_corpse_footprint ||
21485             new_map->vmmap_corpse_footprint == NULL) {
21486                 return;
21487         }
21488
21489         footprint_header = (struct vm_map_corpse_footprint_header *)
21490             new_map->vmmap_corpse_footprint;
21491         buf_size = footprint_header->cf_size;
21492
21493         footprint_region = (struct vm_map_corpse_footprint_region *)
21494             ((char *)footprint_header +
21495             footprint_header->cf_last_region);
21496
21497         /* get rid of trailing zeroes in last region */
21498         assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21499         footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21500         footprint_header->cf_last_zeroes = 0;
21501
21502         actual_size = (vm_size_t)(footprint_header->cf_last_region +
21503             sizeof(*footprint_region) +
21504             (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21505
21506 //      printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21507         vm_map_corpse_footprint_size_avg =
21508             (((vm_map_corpse_footprint_size_avg *
21509             vm_map_corpse_footprint_count) +
21510             actual_size) /
21511             (vm_map_corpse_footprint_count + 1));
21512         vm_map_corpse_footprint_count++;
21513         if (actual_size > vm_map_corpse_footprint_size_max) {
21514                 vm_map_corpse_footprint_size_max = actual_size;
21515         }
21516
21517         actual_size = round_page(actual_size);
21518         if (buf_size > actual_size) {
21519                 kr = vm_deallocate(kernel_map,
21520                     ((vm_address_t)footprint_header +
21521                     actual_size +
21522                     PAGE_SIZE),                 /* trailing guard page */
21523                     (buf_size - actual_size));
21524                 assertf(kr == KERN_SUCCESS,
21525                     "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21526                     footprint_header,
21527                     (uint64_t) buf_size,
21528                     (uint64_t) actual_size,
21529                     kr);
21530                 kr = vm_protect(kernel_map,
21531                     ((vm_address_t)footprint_header +
21532                     actual_size),
21533                     PAGE_SIZE,
21534                     FALSE,             /* set_maximum */
21535                     VM_PROT_NONE);
21536                 assertf(kr == KERN_SUCCESS,
21537                     "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21538                     footprint_header,
21539                     (uint64_t) buf_size,
21540                     (uint64_t) actual_size,
21541                     kr);
21542         }
21543
21544         footprint_header->cf_size = actual_size;
21545 }
21546
21547 /*
21548  * vm_map_corpse_footprint_query_page_info:
21549  *      retrieves the disposition of the page at virtual address "vaddr"
21550  *      in the forked corpse's VM map
21551  *
21552  * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21553  */
21554 kern_return_t
21555 vm_map_corpse_footprint_query_page_info(
21556         vm_map_t        map,
21557         vm_map_offset_t va,
21558         int             *disposition_p)
21559 {
21560         struct vm_map_corpse_footprint_header *footprint_header;
21561         struct vm_map_corpse_footprint_region *footprint_region;
21562         uint32_t        footprint_region_offset;
21563         vm_map_offset_t region_start, region_end;
21564         int             disp_idx;
21565         kern_return_t   kr;
21566         int             effective_page_size;
21567         cf_disp_t       cf_disp;
21568
21569         if (!map->has_corpse_footprint) {
21570                 *disposition_p = 0;
21571                 kr = KERN_INVALID_ARGUMENT;
21572                 goto done;
21573         }
21574
21575         footprint_header = map->vmmap_corpse_footprint;
21576         if (footprint_header == NULL) {
21577                 *disposition_p = 0;
21578 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21579                 kr = KERN_INVALID_ARGUMENT;
21580                 goto done;
21581         }
21582
21583         /* start looking at the hint ("cf_hint_region") */
21584         footprint_region_offset = footprint_header->cf_hint_region;
21585
21586         effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21587
21588 lookup_again:
21589         if (footprint_region_offset < sizeof(*footprint_header)) {
21590                 /* hint too low: start from 1st region */
21591                 footprint_region_offset = sizeof(*footprint_header);
21592         }
21593         if (footprint_region_offset >= footprint_header->cf_last_region) {
21594                 /* hint too high: re-start from 1st region */
21595                 footprint_region_offset = sizeof(*footprint_header);
21596         }
21597         footprint_region = (struct vm_map_corpse_footprint_region *)
21598             ((char *)footprint_header + footprint_region_offset);
21599         region_start = footprint_region->cfr_vaddr;
21600         region_end = (region_start +
21601             ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21602             effective_page_size));
21603         if (va < region_start &&
21604             footprint_region_offset != sizeof(*footprint_header)) {
21605                 /* our range starts before the hint region */
21606
21607                 /* reset the hint (in a racy way...) */
21608                 footprint_header->cf_hint_region = sizeof(*footprint_header);
21609                 /* lookup "va" again from 1st region */
21610                 footprint_region_offset = sizeof(*footprint_header);
21611                 goto lookup_again;
21612         }
21613
21614         while (va >= region_end) {
21615                 if (footprint_region_offset >= footprint_header->cf_last_region) {
21616                         break;
21617                 }
21618                 /* skip the region's header */
21619                 footprint_region_offset += sizeof(*footprint_region);
21620                 /* skip the region's page dispositions */
21621                 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21622                 /* align to next word boundary */
21623                 footprint_region_offset =
21624                     roundup(footprint_region_offset,
21625                     sizeof(int));
21626                 footprint_region = (struct vm_map_corpse_footprint_region *)
21627                     ((char *)footprint_header + footprint_region_offset);
21628                 region_start = footprint_region->cfr_vaddr;
21629                 region_end = (region_start +
21630                     ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21631                     effective_page_size));
21632         }
21633         if (va < region_start || va >= region_end) {
21634                 /* page not found */
21635                 *disposition_p = 0;
21636 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21637                 kr = KERN_SUCCESS;
21638                 goto done;
21639         }
21640
21641         /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21642         footprint_header->cf_hint_region = footprint_region_offset;
21643
21644         /* get page disposition for "va" in this region */
21645         disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21646         cf_disp = footprint_region->cfr_disposition[disp_idx];
21647         *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21648         kr = KERN_SUCCESS;
21649 done:
21650 //      if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21651         /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21652         DTRACE_VM4(footprint_query_page_info,
21653             vm_map_t, map,
21654             vm_map_offset_t, va,
21655             int, *disposition_p,
21656             kern_return_t, kr);
21657
21658         return kr;
21659 }
21660
21661 void
21662 vm_map_corpse_footprint_destroy(
21663         vm_map_t        map)
21664 {
21665         if (map->has_corpse_footprint &&
21666             map->vmmap_corpse_footprint != 0) {
21667                 struct vm_map_corpse_footprint_header *footprint_header;
21668                 vm_size_t buf_size;
21669                 kern_return_t kr;
21670
21671                 footprint_header = map->vmmap_corpse_footprint;
21672                 buf_size = footprint_header->cf_size;
21673                 kr = vm_deallocate(kernel_map,
21674                     (vm_offset_t) map->vmmap_corpse_footprint,
21675                     ((vm_size_t) buf_size
21676                     + PAGE_SIZE));                 /* trailing guard page */
21677                 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21678                 map->vmmap_corpse_footprint = 0;
21679                 map->has_corpse_footprint = FALSE;
21680         }
21681 }
21682
21683 /*
21684  * vm_map_copy_footprint_ledgers:
21685  *      copies any ledger that's relevant to the memory footprint of "old_task"
21686  *      into the forked corpse's task ("new_task")
21687  */
21688 void
21689 vm_map_copy_footprint_ledgers(
21690         task_t  old_task,
21691         task_t  new_task)
21692 {
21693         vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21694         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21695         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21696         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21697         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21698         vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21699         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21700         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21701         vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21702         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21703         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21704         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21705         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21706         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21707         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21708         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21709         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21710         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21711         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21712         vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21713 }
21714
21715 /*
21716  * vm_map_copy_ledger:
21717  *      copy a single ledger from "old_task" to "new_task"
21718  */
21719 void
21720 vm_map_copy_ledger(
21721         task_t  old_task,
21722         task_t  new_task,
21723         int     ledger_entry)
21724 {
21725         ledger_amount_t old_balance, new_balance, delta;
21726
21727         assert(new_task->map->has_corpse_footprint);
21728         if (!new_task->map->has_corpse_footprint) {
21729                 return;
21730         }
21731
21732         /* turn off sanity checks for the ledger we're about to mess with */
21733         ledger_disable_panic_on_negative(new_task->ledger,
21734             ledger_entry);
21735
21736         /* adjust "new_task" to match "old_task" */
21737         ledger_get_balance(old_task->ledger,
21738             ledger_entry,
21739             &old_balance);
21740         ledger_get_balance(new_task->ledger,
21741             ledger_entry,
21742             &new_balance);
21743         if (new_balance == old_balance) {
21744                 /* new == old: done */
21745         } else if (new_balance > old_balance) {
21746                 /* new > old ==> new -= new - old */
21747                 delta = new_balance - old_balance;
21748                 ledger_debit(new_task->ledger,
21749                     ledger_entry,
21750                     delta);
21751         } else {
21752                 /* new < old ==> new += old - new */
21753                 delta = old_balance - new_balance;
21754                 ledger_credit(new_task->ledger,
21755                     ledger_entry,
21756                     delta);
21757         }
21758 }
21759
21760 #if MACH_ASSERT
21761
21762 extern int pmap_ledgers_panic;
21763 extern int pmap_ledgers_panic_leeway;
21764
21765 #define LEDGER_DRIFT(__LEDGER)                    \
21766         int             __LEDGER##_over;          \
21767         ledger_amount_t __LEDGER##_over_total;    \
21768         ledger_amount_t __LEDGER##_over_max;      \
21769         int             __LEDGER##_under;         \
21770         ledger_amount_t __LEDGER##_under_total;   \
21771         ledger_amount_t __LEDGER##_under_max
21772
21773 struct {
21774         uint64_t        num_pmaps_checked;
21775
21776         LEDGER_DRIFT(phys_footprint);
21777         LEDGER_DRIFT(internal);
21778         LEDGER_DRIFT(internal_compressed);
21779         LEDGER_DRIFT(iokit_mapped);
21780         LEDGER_DRIFT(alternate_accounting);
21781         LEDGER_DRIFT(alternate_accounting_compressed);
21782         LEDGER_DRIFT(page_table);
21783         LEDGER_DRIFT(purgeable_volatile);
21784         LEDGER_DRIFT(purgeable_nonvolatile);
21785         LEDGER_DRIFT(purgeable_volatile_compressed);
21786         LEDGER_DRIFT(purgeable_nonvolatile_compressed);
21787         LEDGER_DRIFT(tagged_nofootprint);
21788         LEDGER_DRIFT(tagged_footprint);
21789         LEDGER_DRIFT(tagged_nofootprint_compressed);
21790         LEDGER_DRIFT(tagged_footprint_compressed);
21791         LEDGER_DRIFT(network_volatile);
21792         LEDGER_DRIFT(network_nonvolatile);
21793         LEDGER_DRIFT(network_volatile_compressed);
21794         LEDGER_DRIFT(network_nonvolatile_compressed);
21795         LEDGER_DRIFT(media_nofootprint);
21796         LEDGER_DRIFT(media_footprint);
21797         LEDGER_DRIFT(media_nofootprint_compressed);
21798         LEDGER_DRIFT(media_footprint_compressed);
21799         LEDGER_DRIFT(graphics_nofootprint);
21800         LEDGER_DRIFT(graphics_footprint);
21801         LEDGER_DRIFT(graphics_nofootprint_compressed);
21802         LEDGER_DRIFT(graphics_footprint_compressed);
21803         LEDGER_DRIFT(neural_nofootprint);
21804         LEDGER_DRIFT(neural_footprint);
21805         LEDGER_DRIFT(neural_nofootprint_compressed);
21806         LEDGER_DRIFT(neural_footprint_compressed);
21807 } pmap_ledgers_drift;
21808
21809 void
21810 vm_map_pmap_check_ledgers(
21811         pmap_t          pmap,
21812         ledger_t        ledger,
21813         int             pid,
21814         char            *procname)
21815 {
21816         ledger_amount_t bal;
21817         boolean_t       do_panic;
21818
21819         do_panic = FALSE;
21820
21821         pmap_ledgers_drift.num_pmaps_checked++;
21822
21823 #define LEDGER_CHECK_BALANCE(__LEDGER)                                  \
21824 MACRO_BEGIN                                                             \
21825         int panic_on_negative = TRUE;                                   \
21826         ledger_get_balance(ledger,                                      \
21827                            task_ledgers.__LEDGER,                       \
21828                            &bal);                                       \
21829         ledger_get_panic_on_negative(ledger,                            \
21830                                      task_ledgers.__LEDGER,             \
21831                                      &panic_on_negative);               \
21832         if (bal != 0) {                                                 \
21833                 if (panic_on_negative ||                                \
21834                     (pmap_ledgers_panic &&                              \
21835                      pmap_ledgers_panic_leeway > 0 &&                   \
21836                      (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) ||  \
21837                       bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
21838                         do_panic = TRUE;                                \
21839                 }                                                       \
21840                 printf("LEDGER BALANCE proc %d (%s) "                   \
21841                        "\"%s\" = %lld\n",                               \
21842                        pid, procname, #__LEDGER, bal);                  \
21843                 if (bal > 0) {                                          \
21844                         pmap_ledgers_drift.__LEDGER##_over++;           \
21845                         pmap_ledgers_drift.__LEDGER##_over_total += bal; \
21846                         if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
21847                                 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
21848                         }                                               \
21849                 } else if (bal < 0) {                                   \
21850                         pmap_ledgers_drift.__LEDGER##_under++;          \
21851                         pmap_ledgers_drift.__LEDGER##_under_total += bal; \
21852                         if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
21853                                 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
21854                         }                                               \
21855                 }                                                       \
21856         }                                                               \
21857 MACRO_END
21858
21859         LEDGER_CHECK_BALANCE(phys_footprint);
21860         LEDGER_CHECK_BALANCE(internal);
21861         LEDGER_CHECK_BALANCE(internal_compressed);
21862         LEDGER_CHECK_BALANCE(iokit_mapped);
21863         LEDGER_CHECK_BALANCE(alternate_accounting);
21864         LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
21865         LEDGER_CHECK_BALANCE(page_table);
21866         LEDGER_CHECK_BALANCE(purgeable_volatile);
21867         LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
21868         LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
21869         LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
21870         LEDGER_CHECK_BALANCE(tagged_nofootprint);
21871         LEDGER_CHECK_BALANCE(tagged_footprint);
21872         LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
21873         LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
21874         LEDGER_CHECK_BALANCE(network_volatile);
21875         LEDGER_CHECK_BALANCE(network_nonvolatile);
21876         LEDGER_CHECK_BALANCE(network_volatile_compressed);
21877         LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
21878         LEDGER_CHECK_BALANCE(media_nofootprint);
21879         LEDGER_CHECK_BALANCE(media_footprint);
21880         LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
21881         LEDGER_CHECK_BALANCE(media_footprint_compressed);
21882         LEDGER_CHECK_BALANCE(graphics_nofootprint);
21883         LEDGER_CHECK_BALANCE(graphics_footprint);
21884         LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
21885         LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
21886         LEDGER_CHECK_BALANCE(neural_nofootprint);
21887         LEDGER_CHECK_BALANCE(neural_footprint);
21888         LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
21889         LEDGER_CHECK_BALANCE(neural_footprint_compressed);
21890
21891         if (do_panic) {
21892                 if (pmap_ledgers_panic) {
21893                         panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21894                             pmap, pid, procname);
21895                 } else {
21896                         printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21897                             pmap, pid, procname);
21898                 }
21899         }
21900 }
21901 #endif /* MACH_ASSERT */