osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/exc_guard.h>
  88 #include <kern/kalloc.h>
  89 #include <kern/zalloc.h>
  90
  91 #include <vm/cpm.h>
  92 #include <vm/vm_compressor.h>
  93 #include <vm/vm_compressor_pager.h>
  94 #include <vm/vm_init.h>
  95 #include <vm/vm_fault.h>
  96 #include <vm/vm_map.h>
  97 #include <vm/vm_object.h>
  98 #include <vm/vm_page.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/pmap.h>
 101 #include <vm/vm_kern.h>
 102 #include <ipc/ipc_port.h>
 103 #include <kern/sched_prim.h>
 104 #include <kern/misc_protos.h>
 105
 106 #include <mach/vm_map_server.h>
 107 #include <mach/mach_host_server.h>
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_purgeable_internal.h>
 110
 111 #include <vm/vm_protos.h>
 112 #include <vm/vm_shared_region.h>
 113 #include <vm/vm_map_store.h>
 114
 115 #include <san/kasan.h>
 116
 117 #include <sys/codesign.h>
 118 #include <libkern/section_keywords.h>
 119 #if DEVELOPMENT || DEBUG
 120 extern int proc_selfcsflags(void);
 121 #if CONFIG_EMBEDDED
 122 extern int panic_on_unsigned_execute;
 123 #endif /* CONFIG_EMBEDDED */
 124 #endif /* DEVELOPMENT || DEBUG */
 125
 126 #if __arm64__
 127 extern const int fourk_binary_compatibility_unsafe;
 128 extern const int fourk_binary_compatibility_allow_wx;
 129 #endif /* __arm64__ */
 130 extern int proc_selfpid(void);
 131 extern char *proc_name_address(void *p);
 132
 133 #if VM_MAP_DEBUG_APPLE_PROTECT
 134 int vm_map_debug_apple_protect = 0;
 135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 136 #if VM_MAP_DEBUG_FOURK
 137 int vm_map_debug_fourk = 0;
 138 #endif /* VM_MAP_DEBUG_FOURK */
 139
 140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
 141 int vm_map_executable_immutable_verbose = 0;
 142
 143 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
 144
 145 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 146 /* Internal prototypes
 147  */
 148
 149 static void vm_map_simplify_range(
 150         vm_map_t        map,
 151         vm_map_offset_t start,
 152         vm_map_offset_t end);   /* forward */
 153
 154 static boolean_t        vm_map_range_check(
 155         vm_map_t        map,
 156         vm_map_offset_t start,
 157         vm_map_offset_t end,
 158         vm_map_entry_t  *entry);
 159
 160 static vm_map_entry_t   _vm_map_entry_create(
 161         struct vm_map_header    *map_header, boolean_t map_locked);
 162
 163 static void             _vm_map_entry_dispose(
 164         struct vm_map_header    *map_header,
 165         vm_map_entry_t          entry);
 166
 167 static void             vm_map_pmap_enter(
 168         vm_map_t                map,
 169         vm_map_offset_t         addr,
 170         vm_map_offset_t         end_addr,
 171         vm_object_t             object,
 172         vm_object_offset_t      offset,
 173         vm_prot_t               protection);
 174
 175 static void             _vm_map_clip_end(
 176         struct vm_map_header    *map_header,
 177         vm_map_entry_t          entry,
 178         vm_map_offset_t         end);
 179
 180 static void             _vm_map_clip_start(
 181         struct vm_map_header    *map_header,
 182         vm_map_entry_t          entry,
 183         vm_map_offset_t         start);
 184
 185 static void             vm_map_entry_delete(
 186         vm_map_t        map,
 187         vm_map_entry_t  entry);
 188
 189 static kern_return_t    vm_map_delete(
 190         vm_map_t        map,
 191         vm_map_offset_t start,
 192         vm_map_offset_t end,
 193         int             flags,
 194         vm_map_t        zap_map);
 195
 196 static void             vm_map_copy_insert(
 197         vm_map_t        map,
 198         vm_map_entry_t  after_where,
 199         vm_map_copy_t   copy);
 200
 201 static kern_return_t    vm_map_copy_overwrite_unaligned(
 202         vm_map_t        dst_map,
 203         vm_map_entry_t  entry,
 204         vm_map_copy_t   copy,
 205         vm_map_address_t start,
 206         boolean_t       discard_on_success);
 207
 208 static kern_return_t    vm_map_copy_overwrite_aligned(
 209         vm_map_t        dst_map,
 210         vm_map_entry_t  tmp_entry,
 211         vm_map_copy_t   copy,
 212         vm_map_offset_t start,
 213         pmap_t          pmap);
 214
 215 static kern_return_t    vm_map_copyin_kernel_buffer(
 216         vm_map_t        src_map,
 217         vm_map_address_t src_addr,
 218         vm_map_size_t   len,
 219         boolean_t       src_destroy,
 220         vm_map_copy_t   *copy_result);  /* OUT */
 221
 222 static kern_return_t    vm_map_copyout_kernel_buffer(
 223         vm_map_t        map,
 224         vm_map_address_t *addr, /* IN/OUT */
 225         vm_map_copy_t   copy,
 226         vm_map_size_t   copy_size,
 227         boolean_t       overwrite,
 228         boolean_t       consume_on_success);
 229
 230 static void             vm_map_fork_share(
 231         vm_map_t        old_map,
 232         vm_map_entry_t  old_entry,
 233         vm_map_t        new_map);
 234
 235 static boolean_t        vm_map_fork_copy(
 236         vm_map_t        old_map,
 237         vm_map_entry_t  *old_entry_p,
 238         vm_map_t        new_map,
 239         int             vm_map_copyin_flags);
 240
 241 static kern_return_t    vm_map_wire_nested(
 242         vm_map_t                   map,
 243         vm_map_offset_t            start,
 244         vm_map_offset_t            end,
 245         vm_prot_t                  caller_prot,
 246         vm_tag_t                   tag,
 247         boolean_t                  user_wire,
 248         pmap_t                     map_pmap,
 249         vm_map_offset_t            pmap_addr,
 250         ppnum_t                    *physpage_p);
 251
 252 static kern_return_t    vm_map_unwire_nested(
 253         vm_map_t                   map,
 254         vm_map_offset_t            start,
 255         vm_map_offset_t            end,
 256         boolean_t                  user_wire,
 257         pmap_t                     map_pmap,
 258         vm_map_offset_t            pmap_addr);
 259
 260 static kern_return_t    vm_map_overwrite_submap_recurse(
 261         vm_map_t                   dst_map,
 262         vm_map_offset_t            dst_addr,
 263         vm_map_size_t              dst_size);
 264
 265 static kern_return_t    vm_map_copy_overwrite_nested(
 266         vm_map_t                   dst_map,
 267         vm_map_offset_t            dst_addr,
 268         vm_map_copy_t              copy,
 269         boolean_t                  interruptible,
 270         pmap_t                     pmap,
 271         boolean_t                  discard_on_success);
 272
 273 static kern_return_t    vm_map_remap_extract(
 274         vm_map_t                map,
 275         vm_map_offset_t         addr,
 276         vm_map_size_t           size,
 277         boolean_t               copy,
 278         struct vm_map_header    *map_header,
 279         vm_prot_t               *cur_protection,
 280         vm_prot_t               *max_protection,
 281         vm_inherit_t            inheritance,
 282         boolean_t               pageable,
 283         boolean_t               same_map,
 284         vm_map_kernel_flags_t   vmk_flags);
 285
 286 static kern_return_t    vm_map_remap_range_allocate(
 287         vm_map_t                map,
 288         vm_map_address_t        *address,
 289         vm_map_size_t           size,
 290         vm_map_offset_t         mask,
 291         int                     flags,
 292         vm_map_kernel_flags_t   vmk_flags,
 293         vm_tag_t                tag,
 294         vm_map_entry_t          *map_entry);
 295
 296 static void             vm_map_region_look_for_page(
 297         vm_map_t                   map,
 298         vm_map_offset_t            va,
 299         vm_object_t                object,
 300         vm_object_offset_t         offset,
 301         int                        max_refcnt,
 302         int                        depth,
 303         vm_region_extended_info_t  extended,
 304         mach_msg_type_number_t count);
 305
 306 static int              vm_map_region_count_obj_refs(
 307         vm_map_entry_t             entry,
 308         vm_object_t                object);
 309
 310
 311 static kern_return_t    vm_map_willneed(
 312         vm_map_t        map,
 313         vm_map_offset_t start,
 314         vm_map_offset_t end);
 315
 316 static kern_return_t    vm_map_reuse_pages(
 317         vm_map_t        map,
 318         vm_map_offset_t start,
 319         vm_map_offset_t end);
 320
 321 static kern_return_t    vm_map_reusable_pages(
 322         vm_map_t        map,
 323         vm_map_offset_t start,
 324         vm_map_offset_t end);
 325
 326 static kern_return_t    vm_map_can_reuse(
 327         vm_map_t        map,
 328         vm_map_offset_t start,
 329         vm_map_offset_t end);
 330
 331 #if MACH_ASSERT
 332 static kern_return_t    vm_map_pageout(
 333         vm_map_t        map,
 334         vm_map_offset_t start,
 335         vm_map_offset_t end);
 336 #endif /* MACH_ASSERT */
 337
 338 static void             vm_map_corpse_footprint_destroy(
 339         vm_map_t        map);
 340
 341 pid_t find_largest_process_vm_map_entries(void);
 342
 343 /*
 344  * Macros to copy a vm_map_entry. We must be careful to correctly
 345  * manage the wired page count. vm_map_entry_copy() creates a new
 346  * map entry to the same memory - the wired count in the new entry
 347  * must be set to zero. vm_map_entry_copy_full() creates a new
 348  * entry that is identical to the old entry.  This preserves the
 349  * wire count; it's used for map splitting and zone changing in
 350  * vm_map_copyout.
 351  */
 352
 353 #if CONFIG_EMBEDDED
 354
 355 /*
 356  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 357  * But for security reasons on embedded platforms, we don't want the
 358  * new mapping to be "used for jit", so we always reset the flag here.
 359  * Same for "pmap_cs_associated".
 360  */
 361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD)         \
 362 MACRO_BEGIN                                             \
 363         (NEW)->used_for_jit = FALSE;                    \
 364         (NEW)->pmap_cs_associated = FALSE;                              \
 365 MACRO_END
 366
 367 #else /* CONFIG_EMBEDDED */
 368
 369 /*
 370  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 371  * On macOS, the new mapping can be "used for jit".
 372  */
 373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD)                         \
 374 MACRO_BEGIN                                                             \
 375         assert((NEW)->used_for_jit == (OLD)->used_for_jit);             \
 376         assert((NEW)->pmap_cs_associated == FALSE);                             \
 377 MACRO_END
 378
 379 #endif /* CONFIG_EMBEDDED */
 380
 381 #define vm_map_entry_copy(NEW, OLD)      \
 382 MACRO_BEGIN                             \
 383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 384         *(NEW) = *(OLD);                \
 385         (NEW)->is_shared = FALSE;       \
 386         (NEW)->needs_wakeup = FALSE;    \
 387         (NEW)->in_transition = FALSE;   \
 388         (NEW)->wired_count = 0;         \
 389         (NEW)->user_wired_count = 0;    \
 390         (NEW)->permanent = FALSE;       \
 391         VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD));    \
 392         (NEW)->from_reserved_zone = _vmec_reserved;     \
 393         if ((NEW)->iokit_acct) {                        \
 394              assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
 395              (NEW)->iokit_acct = FALSE;                 \
 396              (NEW)->use_pmap = TRUE;                    \
 397         }                                               \
 398         (NEW)->vme_resilient_codesign = FALSE; \
 399         (NEW)->vme_resilient_media = FALSE;     \
 400         (NEW)->vme_atomic = FALSE;      \
 401         (NEW)->vme_no_copy_on_read = FALSE;     \
 402 MACRO_END
 403
 404 #define vm_map_entry_copy_full(NEW, OLD)                 \
 405 MACRO_BEGIN                                             \
 406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 407 (*(NEW) = *(OLD));                                      \
 408 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 409 MACRO_END
 410
 411 /*
 412  * Normal lock_read_to_write() returns FALSE/0 on failure.
 413  * These functions evaluate to zero on success and non-zero value on failure.
 414  */
 415 __attribute__((always_inline))
 416 int
 417 vm_map_lock_read_to_write(vm_map_t map)
 418 {
 419         if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
 420                 DTRACE_VM(vm_map_lock_upgrade);
 421                 return 0;
 422         }
 423         return 1;
 424 }
 425
 426 __attribute__((always_inline))
 427 boolean_t
 428 vm_map_try_lock(vm_map_t map)
 429 {
 430         if (lck_rw_try_lock_exclusive(&(map)->lock)) {
 431                 DTRACE_VM(vm_map_lock_w);
 432                 return TRUE;
 433         }
 434         return FALSE;
 435 }
 436
 437 __attribute__((always_inline))
 438 boolean_t
 439 vm_map_try_lock_read(vm_map_t map)
 440 {
 441         if (lck_rw_try_lock_shared(&(map)->lock)) {
 442                 DTRACE_VM(vm_map_lock_r);
 443                 return TRUE;
 444         }
 445         return FALSE;
 446 }
 447
 448 /*
 449  *      Decide if we want to allow processes to execute from their data or stack areas.
 450  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 451  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 452  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 453  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 454  *      specific pmap files since the default behavior varies according to architecture.  The
 455  *      main reason it varies is because of the need to provide binary compatibility with old
 456  *      applications that were written before these restrictions came into being.  In the old
 457  *      days, an app could execute anything it could read, but this has slowly been tightened
 458  *      up over time.  The default behavior is:
 459  *
 460  *      32-bit PPC apps         may execute from both stack and data areas
 461  *      32-bit Intel apps       may exeucte from data areas but not stack
 462  *      64-bit PPC/Intel apps   may not execute from either data or stack
 463  *
 464  *      An application on any architecture may override these defaults by explicitly
 465  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 466  *      system call.  This code here just determines what happens when an app tries to
 467  *      execute from a page that lacks execute permission.
 468  *
 469  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 470  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 471  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 472  *      execution from data areas for a particular binary even if the arch normally permits it. As
 473  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 474  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 475  *      are not all NX-safe.
 476  */
 477
 478 extern int allow_data_exec, allow_stack_exec;
 479
 480 int
 481 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 482 {
 483         int current_abi;
 484
 485         if (map->pmap == kernel_pmap) {
 486                 return FALSE;
 487         }
 488
 489         /*
 490          * Determine if the app is running in 32 or 64 bit mode.
 491          */
 492
 493         if (vm_map_is_64bit(map)) {
 494                 current_abi = VM_ABI_64;
 495         } else {
 496                 current_abi = VM_ABI_32;
 497         }
 498
 499         /*
 500          * Determine if we should allow the execution based on whether it's a
 501          * stack or data area and the current architecture.
 502          */
 503
 504         if (user_tag == VM_MEMORY_STACK) {
 505                 return allow_stack_exec & current_abi;
 506         }
 507
 508         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 509 }
 510
 511
 512 /*
 513  *      Virtual memory maps provide for the mapping, protection,
 514  *      and sharing of virtual memory objects.  In addition,
 515  *      this module provides for an efficient virtual copy of
 516  *      memory from one map to another.
 517  *
 518  *      Synchronization is required prior to most operations.
 519  *
 520  *      Maps consist of an ordered doubly-linked list of simple
 521  *      entries; a single hint is used to speed up lookups.
 522  *
 523  *      Sharing maps have been deleted from this version of Mach.
 524  *      All shared objects are now mapped directly into the respective
 525  *      maps.  This requires a change in the copy on write strategy;
 526  *      the asymmetric (delayed) strategy is used for shared temporary
 527  *      objects instead of the symmetric (shadow) strategy.  All maps
 528  *      are now "top level" maps (either task map, kernel map or submap
 529  *      of the kernel map).
 530  *
 531  *      Since portions of maps are specified by start/end addreses,
 532  *      which may not align with existing map entries, all
 533  *      routines merely "clip" entries to these start/end values.
 534  *      [That is, an entry is split into two, bordering at a
 535  *      start or end value.]  Note that these clippings may not
 536  *      always be necessary (as the two resulting entries are then
 537  *      not changed); however, the clipping is done for convenience.
 538  *      No attempt is currently made to "glue back together" two
 539  *      abutting entries.
 540  *
 541  *      The symmetric (shadow) copy strategy implements virtual copy
 542  *      by copying VM object references from one map to
 543  *      another, and then marking both regions as copy-on-write.
 544  *      It is important to note that only one writeable reference
 545  *      to a VM object region exists in any map when this strategy
 546  *      is used -- this means that shadow object creation can be
 547  *      delayed until a write operation occurs.  The symmetric (delayed)
 548  *      strategy allows multiple maps to have writeable references to
 549  *      the same region of a vm object, and hence cannot delay creating
 550  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 551  *      Copying of permanent objects is completely different; see
 552  *      vm_object_copy_strategically() in vm_object.c.
 553  */
 554
 555 static zone_t   vm_map_zone;                            /* zone for vm_map structures */
 556 zone_t                  vm_map_entry_zone;                      /* zone for vm_map_entry structures */
 557 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking allocations */
 558 static zone_t   vm_map_copy_zone;                       /* zone for vm_map_copy structures */
 559 zone_t                  vm_map_holes_zone;                      /* zone for vm map holes (vm_map_links) structures */
 560
 561
 562 /*
 563  *      Placeholder object for submap operations.  This object is dropped
 564  *      into the range by a call to vm_map_find, and removed when
 565  *      vm_map_submap creates the submap.
 566  */
 567
 568 vm_object_t     vm_submap_object;
 569
 570 static void             *map_data;
 571 static vm_size_t        map_data_size;
 572 static void             *kentry_data;
 573 static vm_size_t        kentry_data_size;
 574 static void             *map_holes_data;
 575 static vm_size_t        map_holes_data_size;
 576
 577 #if CONFIG_EMBEDDED
 578 #define         NO_COALESCE_LIMIT  0
 579 #else
 580 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 581 #endif
 582
 583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 584 unsigned int not_in_kdp = 1;
 585
 586 unsigned int vm_map_set_cache_attr_count = 0;
 587
 588 kern_return_t
 589 vm_map_set_cache_attr(
 590         vm_map_t        map,
 591         vm_map_offset_t va)
 592 {
 593         vm_map_entry_t  map_entry;
 594         vm_object_t     object;
 595         kern_return_t   kr = KERN_SUCCESS;
 596
 597         vm_map_lock_read(map);
 598
 599         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 600             map_entry->is_sub_map) {
 601                 /*
 602                  * that memory is not properly mapped
 603                  */
 604                 kr = KERN_INVALID_ARGUMENT;
 605                 goto done;
 606         }
 607         object = VME_OBJECT(map_entry);
 608
 609         if (object == VM_OBJECT_NULL) {
 610                 /*
 611                  * there should be a VM object here at this point
 612                  */
 613                 kr = KERN_INVALID_ARGUMENT;
 614                 goto done;
 615         }
 616         vm_object_lock(object);
 617         object->set_cache_attr = TRUE;
 618         vm_object_unlock(object);
 619
 620         vm_map_set_cache_attr_count++;
 621 done:
 622         vm_map_unlock_read(map);
 623
 624         return kr;
 625 }
 626
 627
 628 #if CONFIG_CODE_DECRYPTION
 629 /*
 630  * vm_map_apple_protected:
 631  * This remaps the requested part of the object with an object backed by
 632  * the decrypting pager.
 633  * crypt_info contains entry points and session data for the crypt module.
 634  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 635  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 636  */
 637 kern_return_t
 638 vm_map_apple_protected(
 639         vm_map_t                map,
 640         vm_map_offset_t         start,
 641         vm_map_offset_t         end,
 642         vm_object_offset_t      crypto_backing_offset,
 643         struct pager_crypt_info *crypt_info)
 644 {
 645         boolean_t       map_locked;
 646         kern_return_t   kr;
 647         vm_map_entry_t  map_entry;
 648         struct vm_map_entry tmp_entry;
 649         memory_object_t unprotected_mem_obj;
 650         vm_object_t     protected_object;
 651         vm_map_offset_t map_addr;
 652         vm_map_offset_t start_aligned, end_aligned;
 653         vm_object_offset_t      crypto_start, crypto_end;
 654         int             vm_flags;
 655         vm_map_kernel_flags_t vmk_flags;
 656
 657         vm_flags = 0;
 658         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 659
 660         map_locked = FALSE;
 661         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 662
 663         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 664         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 665         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 666         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 667
 668 #if __arm64__
 669         /*
 670          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 671          * so we might have to loop and establish up to 3 mappings:
 672          *
 673          * + the first 16K-page, which might overlap with the previous
 674          *   4K-aligned mapping,
 675          * + the center,
 676          * + the last 16K-page, which might overlap with the next
 677          *   4K-aligned mapping.
 678          * Each of these mapping might be backed by a vnode pager (if
 679          * properly page-aligned) or a "fourk_pager", itself backed by a
 680          * vnode pager (if 4K-aligned but not page-aligned).
 681          */
 682 #endif /* __arm64__ */
 683
 684         map_addr = start_aligned;
 685         for (map_addr = start_aligned;
 686             map_addr < end;
 687             map_addr = tmp_entry.vme_end) {
 688                 vm_map_lock(map);
 689                 map_locked = TRUE;
 690
 691                 /* lookup the protected VM object */
 692                 if (!vm_map_lookup_entry(map,
 693                     map_addr,
 694                     &map_entry) ||
 695                     map_entry->is_sub_map ||
 696                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 697                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 698                         /* that memory is not properly mapped */
 699                         kr = KERN_INVALID_ARGUMENT;
 700                         goto done;
 701                 }
 702
 703                 /* get the protected object to be decrypted */
 704                 protected_object = VME_OBJECT(map_entry);
 705                 if (protected_object == VM_OBJECT_NULL) {
 706                         /* there should be a VM object here at this point */
 707                         kr = KERN_INVALID_ARGUMENT;
 708                         goto done;
 709                 }
 710                 /* ensure protected object stays alive while map is unlocked */
 711                 vm_object_reference(protected_object);
 712
 713                 /* limit the map entry to the area we want to cover */
 714                 vm_map_clip_start(map, map_entry, start_aligned);
 715                 vm_map_clip_end(map, map_entry, end_aligned);
 716
 717                 tmp_entry = *map_entry;
 718                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 719                 vm_map_unlock(map);
 720                 map_locked = FALSE;
 721
 722                 /*
 723                  * This map entry might be only partially encrypted
 724                  * (if not fully "page-aligned").
 725                  */
 726                 crypto_start = 0;
 727                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 728                 if (tmp_entry.vme_start < start) {
 729                         if (tmp_entry.vme_start != start_aligned) {
 730                                 kr = KERN_INVALID_ADDRESS;
 731                         }
 732                         crypto_start += (start - tmp_entry.vme_start);
 733                 }
 734                 if (tmp_entry.vme_end > end) {
 735                         if (tmp_entry.vme_end != end_aligned) {
 736                                 kr = KERN_INVALID_ADDRESS;
 737                         }
 738                         crypto_end -= (tmp_entry.vme_end - end);
 739                 }
 740
 741                 /*
 742                  * This "extra backing offset" is needed to get the decryption
 743                  * routine to use the right key.  It adjusts for the possibly
 744                  * relative offset of an interposed "4K" pager...
 745                  */
 746                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 747                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 748                 }
 749
 750                 /*
 751                  * Lookup (and create if necessary) the protected memory object
 752                  * matching that VM object.
 753                  * If successful, this also grabs a reference on the memory object,
 754                  * to guarantee that it doesn't go away before we get a chance to map
 755                  * it.
 756                  */
 757                 unprotected_mem_obj = apple_protect_pager_setup(
 758                         protected_object,
 759                         VME_OFFSET(&tmp_entry),
 760                         crypto_backing_offset,
 761                         crypt_info,
 762                         crypto_start,
 763                         crypto_end);
 764
 765                 /* release extra ref on protected object */
 766                 vm_object_deallocate(protected_object);
 767
 768                 if (unprotected_mem_obj == NULL) {
 769                         kr = KERN_FAILURE;
 770                         goto done;
 771                 }
 772
 773                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 774                 /* can overwrite an immutable mapping */
 775                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 776 #if __arm64__
 777                 if (tmp_entry.used_for_jit &&
 778                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 779                     PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 780                     fourk_binary_compatibility_unsafe &&
 781                     fourk_binary_compatibility_allow_wx) {
 782                         printf("** FOURK_COMPAT [%d]: "
 783                             "allowing write+execute at 0x%llx\n",
 784                             proc_selfpid(), tmp_entry.vme_start);
 785                         vmk_flags.vmkf_map_jit = TRUE;
 786                 }
 787 #endif /* __arm64__ */
 788
 789                 /* map this memory object in place of the current one */
 790                 map_addr = tmp_entry.vme_start;
 791                 kr = vm_map_enter_mem_object(map,
 792                     &map_addr,
 793                     (tmp_entry.vme_end -
 794                     tmp_entry.vme_start),
 795                     (mach_vm_offset_t) 0,
 796                     vm_flags,
 797                     vmk_flags,
 798                     VM_KERN_MEMORY_NONE,
 799                     (ipc_port_t)(uintptr_t) unprotected_mem_obj,
 800                     0,
 801                     TRUE,
 802                     tmp_entry.protection,
 803                     tmp_entry.max_protection,
 804                     tmp_entry.inheritance);
 805                 assertf(kr == KERN_SUCCESS,
 806                     "kr = 0x%x\n", kr);
 807                 assertf(map_addr == tmp_entry.vme_start,
 808                     "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 809                     (uint64_t)map_addr,
 810                     (uint64_t) tmp_entry.vme_start,
 811                     &tmp_entry);
 812
 813 #if VM_MAP_DEBUG_APPLE_PROTECT
 814                 if (vm_map_debug_apple_protect) {
 815                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 816                             " backing:[object:%p,offset:0x%llx,"
 817                             "crypto_backing_offset:0x%llx,"
 818                             "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 819                             map,
 820                             (uint64_t) map_addr,
 821                             (uint64_t) (map_addr + (tmp_entry.vme_end -
 822                             tmp_entry.vme_start)),
 823                             unprotected_mem_obj,
 824                             protected_object,
 825                             VME_OFFSET(&tmp_entry),
 826                             crypto_backing_offset,
 827                             crypto_start,
 828                             crypto_end);
 829                 }
 830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 831
 832                 /*
 833                  * Release the reference obtained by
 834                  * apple_protect_pager_setup().
 835                  * The mapping (if it succeeded) is now holding a reference on
 836                  * the memory object.
 837                  */
 838                 memory_object_deallocate(unprotected_mem_obj);
 839                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 840
 841                 /* continue with next map entry */
 842                 crypto_backing_offset += (tmp_entry.vme_end -
 843                     tmp_entry.vme_start);
 844                 crypto_backing_offset -= crypto_start;
 845         }
 846         kr = KERN_SUCCESS;
 847
 848 done:
 849         if (map_locked) {
 850                 vm_map_unlock(map);
 851         }
 852         return kr;
 853 }
 854 #endif  /* CONFIG_CODE_DECRYPTION */
 855
 856
 857 lck_grp_t               vm_map_lck_grp;
 858 lck_grp_attr_t  vm_map_lck_grp_attr;
 859 lck_attr_t              vm_map_lck_attr;
 860 lck_attr_t              vm_map_lck_rw_attr;
 861
 862 #if CONFIG_EMBEDDED
 863 int malloc_no_cow = 1;
 864 #define VM_PROTECT_WX_FAIL 0
 865 #else /* CONFIG_EMBEDDED */
 866 int malloc_no_cow = 0;
 867 #define VM_PROTECT_WX_FAIL 1
 868 #endif /* CONFIG_EMBEDDED */
 869 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
 870 #if DEBUG
 871 int vm_check_map_sanity = 0;
 872 #endif
 873
 874 /*
 875  *      vm_map_init:
 876  *
 877  *      Initialize the vm_map module.  Must be called before
 878  *      any other vm_map routines.
 879  *
 880  *      Map and entry structures are allocated from zones -- we must
 881  *      initialize those zones.
 882  *
 883  *      There are three zones of interest:
 884  *
 885  *      vm_map_zone:            used to allocate maps.
 886  *      vm_map_entry_zone:      used to allocate map entries.
 887  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 888  *
 889  *      The kernel allocates map entries from a special zone that is initially
 890  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 891  *      the kernel to allocate more memory to a entry zone when it became
 892  *      empty since the very act of allocating memory implies the creation
 893  *      of a new entry.
 894  */
 895 void
 896 vm_map_init(
 897         void)
 898 {
 899         vm_size_t entry_zone_alloc_size;
 900         const char *mez_name = "VM map entries";
 901
 902         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
 903             PAGE_SIZE, "maps");
 904         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 905 #if     defined(__LP64__)
 906         entry_zone_alloc_size = PAGE_SIZE * 5;
 907 #else
 908         entry_zone_alloc_size = PAGE_SIZE * 6;
 909 #endif
 910         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 911             1024 * 1024, entry_zone_alloc_size,
 912             mez_name);
 913         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 914         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 915         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 916
 917         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 918             kentry_data_size * 64, kentry_data_size,
 919             "Reserved VM map entries");
 920         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 921         /* Don't quarantine because we always need elements available */
 922         zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
 923
 924         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 925             16 * 1024, PAGE_SIZE, "VM map copies");
 926         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 927
 928         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 929             16 * 1024, PAGE_SIZE, "VM map holes");
 930         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 931
 932         /*
 933          *      Cram the map and kentry zones with initial data.
 934          *      Set reserved_zone non-collectible to aid zone_gc().
 935          */
 936         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 937         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 938         zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
 939
 940         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 941         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 942         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 943         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 944         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 945         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 946         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 947
 948         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 949         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 950         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 951         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 952         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 953         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 954
 955         /*
 956          * Add the stolen memory to zones, adjust zone size and stolen counts.
 957          * zcram only up to the maximum number of pages for each zone chunk.
 958          */
 959         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 960
 961         const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
 962         for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
 963                 zcram(vm_map_entry_reserved_zone,
 964                     (vm_offset_t)kentry_data + off,
 965                     MIN(kentry_data_size - off, stride));
 966         }
 967         for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
 968                 zcram(vm_map_holes_zone,
 969                     (vm_offset_t)map_holes_data + off,
 970                     MIN(map_holes_data_size - off, stride));
 971         }
 972
 973         /*
 974          * Since these are covered by zones, remove them from stolen page accounting.
 975          */
 976         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 977
 978         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 979         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 980         lck_attr_setdefault(&vm_map_lck_attr);
 981
 982         lck_attr_setdefault(&vm_map_lck_rw_attr);
 983         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 984
 985 #if VM_MAP_DEBUG_APPLE_PROTECT
 986         PE_parse_boot_argn("vm_map_debug_apple_protect",
 987             &vm_map_debug_apple_protect,
 988             sizeof(vm_map_debug_apple_protect));
 989 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 990 #if VM_MAP_DEBUG_APPLE_FOURK
 991         PE_parse_boot_argn("vm_map_debug_fourk",
 992             &vm_map_debug_fourk,
 993             sizeof(vm_map_debug_fourk));
 994 #endif /* VM_MAP_DEBUG_FOURK */
 995         PE_parse_boot_argn("vm_map_executable_immutable",
 996             &vm_map_executable_immutable,
 997             sizeof(vm_map_executable_immutable));
 998         PE_parse_boot_argn("vm_map_executable_immutable_verbose",
 999             &vm_map_executable_immutable_verbose,
1000             sizeof(vm_map_executable_immutable_verbose));
1001
1002         PE_parse_boot_argn("malloc_no_cow",
1003             &malloc_no_cow,
1004             sizeof(malloc_no_cow));
1005         if (malloc_no_cow) {
1006                 vm_memory_malloc_no_cow_mask = 0ULL;
1007                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1008                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1009                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1010                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1011 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1014                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1015                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1016                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1017 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018                 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1019                     &vm_memory_malloc_no_cow_mask,
1020                     sizeof(vm_memory_malloc_no_cow_mask));
1021         }
1022
1023 #if DEBUG
1024         PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1025         if (vm_check_map_sanity) {
1026                 kprintf("VM sanity checking enabled\n");
1027         } else {
1028                 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1029         }
1030 #endif /* DEBUG */
1031 }
1032
1033 void
1034 vm_map_steal_memory(
1035         void)
1036 {
1037         uint32_t kentry_initial_pages;
1038
1039         map_data_size = round_page(10 * sizeof(struct _vm_map));
1040         map_data = pmap_steal_memory(map_data_size);
1041
1042         /*
1043          * kentry_initial_pages corresponds to the number of kernel map entries
1044          * required during bootstrap until the asynchronous replenishment
1045          * scheme is activated and/or entries are available from the general
1046          * map entry pool.
1047          */
1048 #if     defined(__LP64__)
1049         kentry_initial_pages = 10;
1050 #else
1051         kentry_initial_pages = 6;
1052 #endif
1053
1054 #if CONFIG_GZALLOC
1055         /* If using the guard allocator, reserve more memory for the kernel
1056          * reserved map entry pool.
1057          */
1058         if (gzalloc_enabled()) {
1059                 kentry_initial_pages *= 1024;
1060         }
1061 #endif
1062
1063         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1064         kentry_data = pmap_steal_memory(kentry_data_size);
1065
1066         map_holes_data_size = kentry_data_size;
1067         map_holes_data = pmap_steal_memory(map_holes_data_size);
1068 }
1069
1070 boolean_t vm_map_supports_hole_optimization = FALSE;
1071
1072 void
1073 vm_kernel_reserved_entry_init(void)
1074 {
1075         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
1076
1077         /*
1078          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1079          */
1080         zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
1081         vm_map_supports_hole_optimization = TRUE;
1082 }
1083
1084 void
1085 vm_map_disable_hole_optimization(vm_map_t map)
1086 {
1087         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1088
1089         if (map->holelistenabled) {
1090                 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1091
1092                 while (hole_entry != NULL) {
1093                         next_hole_entry = hole_entry->vme_next;
1094
1095                         hole_entry->vme_next = NULL;
1096                         hole_entry->vme_prev = NULL;
1097                         zfree(vm_map_holes_zone, hole_entry);
1098
1099                         if (next_hole_entry == head_entry) {
1100                                 hole_entry = NULL;
1101                         } else {
1102                                 hole_entry = next_hole_entry;
1103                         }
1104                 }
1105
1106                 map->holes_list = NULL;
1107                 map->holelistenabled = FALSE;
1108
1109                 map->first_free = vm_map_first_entry(map);
1110                 SAVE_HINT_HOLE_WRITE(map, NULL);
1111         }
1112 }
1113
1114 boolean_t
1115 vm_kernel_map_is_kernel(vm_map_t map)
1116 {
1117         return map->pmap == kernel_pmap;
1118 }
1119
1120 /*
1121  *      vm_map_create:
1122  *
1123  *      Creates and returns a new empty VM map with
1124  *      the given physical map structure, and having
1125  *      the given lower and upper address bounds.
1126  */
1127
1128 vm_map_t
1129 vm_map_create(
1130         pmap_t          pmap,
1131         vm_map_offset_t min,
1132         vm_map_offset_t max,
1133         boolean_t       pageable)
1134 {
1135         int options;
1136
1137         options = 0;
1138         if (pageable) {
1139                 options |= VM_MAP_CREATE_PAGEABLE;
1140         }
1141         return vm_map_create_options(pmap, min, max, options);
1142 }
1143
1144 vm_map_t
1145 vm_map_create_options(
1146         pmap_t          pmap,
1147         vm_map_offset_t min,
1148         vm_map_offset_t max,
1149         int             options)
1150 {
1151         vm_map_t        result;
1152         struct vm_map_links     *hole_entry = NULL;
1153
1154         if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1155                 /* unknown option */
1156                 return VM_MAP_NULL;
1157         }
1158
1159         result = (vm_map_t) zalloc(vm_map_zone);
1160         if (result == VM_MAP_NULL) {
1161                 panic("vm_map_create");
1162         }
1163
1164         vm_map_first_entry(result) = vm_map_to_entry(result);
1165         vm_map_last_entry(result)  = vm_map_to_entry(result);
1166         result->hdr.nentries = 0;
1167         if (options & VM_MAP_CREATE_PAGEABLE) {
1168                 result->hdr.entries_pageable = TRUE;
1169         } else {
1170                 result->hdr.entries_pageable = FALSE;
1171         }
1172
1173         vm_map_store_init( &(result->hdr));
1174
1175         result->hdr.page_shift = PAGE_SHIFT;
1176
1177         result->size = 0;
1178         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1179         result->user_wire_size  = 0;
1180 #if !CONFIG_EMBEDDED
1181         result->vmmap_high_start = 0;
1182 #endif
1183         os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1184 #if     TASK_SWAPPER
1185         result->res_count = 1;
1186         result->sw_state = MAP_SW_IN;
1187 #endif  /* TASK_SWAPPER */
1188         result->pmap = pmap;
1189         result->min_offset = min;
1190         result->max_offset = max;
1191         result->wiring_required = FALSE;
1192         result->no_zero_fill = FALSE;
1193         result->mapped_in_other_pmaps = FALSE;
1194         result->wait_for_space = FALSE;
1195         result->switch_protect = FALSE;
1196         result->disable_vmentry_reuse = FALSE;
1197         result->map_disallow_data_exec = FALSE;
1198         result->is_nested_map = FALSE;
1199         result->map_disallow_new_exec = FALSE;
1200         result->terminated = FALSE;
1201         result->highest_entry_end = 0;
1202         result->first_free = vm_map_to_entry(result);
1203         result->hint = vm_map_to_entry(result);
1204         result->jit_entry_exists = FALSE;
1205
1206         /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1207         if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1208                 result->has_corpse_footprint = TRUE;
1209                 result->holelistenabled = FALSE;
1210                 result->vmmap_corpse_footprint = NULL;
1211         } else {
1212                 result->has_corpse_footprint = FALSE;
1213                 if (vm_map_supports_hole_optimization) {
1214                         hole_entry = zalloc(vm_map_holes_zone);
1215
1216                         hole_entry->start = min;
1217 #if defined(__arm__) || defined(__arm64__)
1218                         hole_entry->end = result->max_offset;
1219 #else
1220                         hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1221 #endif
1222                         result->holes_list = result->hole_hint = hole_entry;
1223                         hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1224                         result->holelistenabled = TRUE;
1225                 } else {
1226                         result->holelistenabled = FALSE;
1227                 }
1228         }
1229
1230         vm_map_lock_init(result);
1231         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1232
1233         return result;
1234 }
1235
1236 /*
1237  *      vm_map_entry_create:    [ internal use only ]
1238  *
1239  *      Allocates a VM map entry for insertion in the
1240  *      given map (or map copy).  No fields are filled.
1241  */
1242 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1243
1244 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1245         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1246 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1247
1248 static vm_map_entry_t
1249 _vm_map_entry_create(
1250         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1251 {
1252         zone_t  zone;
1253         vm_map_entry_t  entry;
1254
1255         zone = vm_map_entry_zone;
1256
1257         assert(map_header->entries_pageable ? !map_locked : TRUE);
1258
1259         if (map_header->entries_pageable) {
1260                 entry = (vm_map_entry_t) zalloc(zone);
1261         } else {
1262                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1263
1264                 if (entry == VM_MAP_ENTRY_NULL) {
1265                         zone = vm_map_entry_reserved_zone;
1266                         entry = (vm_map_entry_t) zalloc(zone);
1267                         OSAddAtomic(1, &reserved_zalloc_count);
1268                 } else {
1269                         OSAddAtomic(1, &nonreserved_zalloc_count);
1270                 }
1271         }
1272
1273         if (entry == VM_MAP_ENTRY_NULL) {
1274                 panic("vm_map_entry_create");
1275         }
1276         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1277
1278         vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1279 #if     MAP_ENTRY_CREATION_DEBUG
1280         entry->vme_creation_maphdr = map_header;
1281         backtrace(&entry->vme_creation_bt[0],
1282             (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1283 #endif
1284         return entry;
1285 }
1286
1287 /*
1288  *      vm_map_entry_dispose:   [ internal use only ]
1289  *
1290  *      Inverse of vm_map_entry_create.
1291  *
1292  *      write map lock held so no need to
1293  *      do anything special to insure correctness
1294  *      of the stores
1295  */
1296 #define vm_map_entry_dispose(map, entry)                        \
1297         _vm_map_entry_dispose(&(map)->hdr, (entry))
1298
1299 #define vm_map_copy_entry_dispose(map, entry) \
1300         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1301
1302 static void
1303 _vm_map_entry_dispose(
1304         struct vm_map_header    *map_header,
1305         vm_map_entry_t          entry)
1306 {
1307         zone_t          zone;
1308
1309         if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1310                 zone = vm_map_entry_zone;
1311         } else {
1312                 zone = vm_map_entry_reserved_zone;
1313         }
1314
1315         if (!map_header->entries_pageable) {
1316                 if (zone == vm_map_entry_zone) {
1317                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1318                 } else {
1319                         OSAddAtomic(-1, &reserved_zalloc_count);
1320                 }
1321         }
1322
1323         zfree(zone, entry);
1324 }
1325
1326 #if MACH_ASSERT
1327 static boolean_t first_free_check = FALSE;
1328 boolean_t
1329 first_free_is_valid(
1330         vm_map_t        map)
1331 {
1332         if (!first_free_check) {
1333                 return TRUE;
1334         }
1335
1336         return first_free_is_valid_store( map );
1337 }
1338 #endif /* MACH_ASSERT */
1339
1340
1341 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1342         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1343
1344 #define vm_map_copy_entry_unlink(copy, entry)                           \
1345         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1346
1347 #if     MACH_ASSERT && TASK_SWAPPER
1348 /*
1349  *      vm_map_res_reference:
1350  *
1351  *      Adds another valid residence count to the given map.
1352  *
1353  *      Map is locked so this function can be called from
1354  *      vm_map_swapin.
1355  *
1356  */
1357 void
1358 vm_map_res_reference(vm_map_t map)
1359 {
1360         /* assert map is locked */
1361         assert(map->res_count >= 0);
1362         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1363         if (map->res_count == 0) {
1364                 lck_mtx_unlock(&map->s_lock);
1365                 vm_map_lock(map);
1366                 vm_map_swapin(map);
1367                 lck_mtx_lock(&map->s_lock);
1368                 ++map->res_count;
1369                 vm_map_unlock(map);
1370         } else {
1371                 ++map->res_count;
1372         }
1373 }
1374
1375 /*
1376  *      vm_map_reference_swap:
1377  *
1378  *      Adds valid reference and residence counts to the given map.
1379  *
1380  *      The map may not be in memory (i.e. zero residence count).
1381  *
1382  */
1383 void
1384 vm_map_reference_swap(vm_map_t map)
1385 {
1386         assert(map != VM_MAP_NULL);
1387         lck_mtx_lock(&map->s_lock);
1388         assert(map->res_count >= 0);
1389         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1390         os_ref_retain_locked(&map->map_refcnt);
1391         vm_map_res_reference(map);
1392         lck_mtx_unlock(&map->s_lock);
1393 }
1394
1395 /*
1396  *      vm_map_res_deallocate:
1397  *
1398  *      Decrement residence count on a map; possibly causing swapout.
1399  *
1400  *      The map must be in memory (i.e. non-zero residence count).
1401  *
1402  *      The map is locked, so this function is callable from vm_map_deallocate.
1403  *
1404  */
1405 void
1406 vm_map_res_deallocate(vm_map_t map)
1407 {
1408         assert(map->res_count > 0);
1409         if (--map->res_count == 0) {
1410                 lck_mtx_unlock(&map->s_lock);
1411                 vm_map_lock(map);
1412                 vm_map_swapout(map);
1413                 vm_map_unlock(map);
1414                 lck_mtx_lock(&map->s_lock);
1415         }
1416         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1417 }
1418 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1419
1420 /*
1421  *      vm_map_destroy:
1422  *
1423  *      Actually destroy a map.
1424  */
1425 void
1426 vm_map_destroy(
1427         vm_map_t        map,
1428         int             flags)
1429 {
1430         vm_map_lock(map);
1431
1432         /* final cleanup: no need to unnest shared region */
1433         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1434         /* final cleanup: ok to remove immutable mappings */
1435         flags |= VM_MAP_REMOVE_IMMUTABLE;
1436         /* final cleanup: allow gaps in range */
1437         flags |= VM_MAP_REMOVE_GAPS_OK;
1438
1439         /* clean up regular map entries */
1440         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1441             flags, VM_MAP_NULL);
1442         /* clean up leftover special mappings (commpage, etc...) */
1443 #if     !defined(__arm__) && !defined(__arm64__)
1444         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1445             flags, VM_MAP_NULL);
1446 #endif /* !__arm__ && !__arm64__ */
1447
1448         vm_map_disable_hole_optimization(map);
1449         vm_map_corpse_footprint_destroy(map);
1450
1451         vm_map_unlock(map);
1452
1453         assert(map->hdr.nentries == 0);
1454
1455         if (map->pmap) {
1456                 pmap_destroy(map->pmap);
1457         }
1458
1459         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1460                 /*
1461                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1462                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1463                  * structure or kalloc'ed via lck_mtx_init.
1464                  * An example is s_lock_ext within struct _vm_map.
1465                  *
1466                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1467                  * can add another tag to detect embedded vs alloc'ed indirect external
1468                  * mutexes but that'll be additional checks in the lock path and require
1469                  * updating dependencies for the old vs new tag.
1470                  *
1471                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1472                  * just when lock debugging is ON, we choose to forego explicitly destroying
1473                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1474                  * count on vm_map_lck_grp, which has no serious side-effect.
1475                  */
1476         } else {
1477                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1478                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1479         }
1480
1481         zfree(vm_map_zone, map);
1482 }
1483
1484 /*
1485  * Returns pid of the task with the largest number of VM map entries.
1486  * Used in the zone-map-exhaustion jetsam path.
1487  */
1488 pid_t
1489 find_largest_process_vm_map_entries(void)
1490 {
1491         pid_t victim_pid = -1;
1492         int max_vm_map_entries = 0;
1493         task_t task = TASK_NULL;
1494         queue_head_t *task_list = &tasks;
1495
1496         lck_mtx_lock(&tasks_threads_lock);
1497         queue_iterate(task_list, task, task_t, tasks) {
1498                 if (task == kernel_task || !task->active) {
1499                         continue;
1500                 }
1501
1502                 vm_map_t task_map = task->map;
1503                 if (task_map != VM_MAP_NULL) {
1504                         int task_vm_map_entries = task_map->hdr.nentries;
1505                         if (task_vm_map_entries > max_vm_map_entries) {
1506                                 max_vm_map_entries = task_vm_map_entries;
1507                                 victim_pid = pid_from_task(task);
1508                         }
1509                 }
1510         }
1511         lck_mtx_unlock(&tasks_threads_lock);
1512
1513         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1514         return victim_pid;
1515 }
1516
1517 #if     TASK_SWAPPER
1518 /*
1519  * vm_map_swapin/vm_map_swapout
1520  *
1521  * Swap a map in and out, either referencing or releasing its resources.
1522  * These functions are internal use only; however, they must be exported
1523  * because they may be called from macros, which are exported.
1524  *
1525  * In the case of swapout, there could be races on the residence count,
1526  * so if the residence count is up, we return, assuming that a
1527  * vm_map_deallocate() call in the near future will bring us back.
1528  *
1529  * Locking:
1530  *      -- We use the map write lock for synchronization among races.
1531  *      -- The map write lock, and not the simple s_lock, protects the
1532  *         swap state of the map.
1533  *      -- If a map entry is a share map, then we hold both locks, in
1534  *         hierarchical order.
1535  *
1536  * Synchronization Notes:
1537  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1538  *      will block on the map lock and proceed when swapout is through.
1539  *      2) A vm_map_reference() call at this time is illegal, and will
1540  *      cause a panic.  vm_map_reference() is only allowed on resident
1541  *      maps, since it refuses to block.
1542  *      3) A vm_map_swapin() call during a swapin will block, and
1543  *      proceeed when the first swapin is done, turning into a nop.
1544  *      This is the reason the res_count is not incremented until
1545  *      after the swapin is complete.
1546  *      4) There is a timing hole after the checks of the res_count, before
1547  *      the map lock is taken, during which a swapin may get the lock
1548  *      before a swapout about to happen.  If this happens, the swapin
1549  *      will detect the state and increment the reference count, causing
1550  *      the swapout to be a nop, thereby delaying it until a later
1551  *      vm_map_deallocate.  If the swapout gets the lock first, then
1552  *      the swapin will simply block until the swapout is done, and
1553  *      then proceed.
1554  *
1555  * Because vm_map_swapin() is potentially an expensive operation, it
1556  * should be used with caution.
1557  *
1558  * Invariants:
1559  *      1) A map with a residence count of zero is either swapped, or
1560  *         being swapped.
1561  *      2) A map with a non-zero residence count is either resident,
1562  *         or being swapped in.
1563  */
1564
1565 int vm_map_swap_enable = 1;
1566
1567 void
1568 vm_map_swapin(vm_map_t map)
1569 {
1570         vm_map_entry_t entry;
1571
1572         if (!vm_map_swap_enable) {      /* debug */
1573                 return;
1574         }
1575
1576         /*
1577          * Map is locked
1578          * First deal with various races.
1579          */
1580         if (map->sw_state == MAP_SW_IN) {
1581                 /*
1582                  * we raced with swapout and won.  Returning will incr.
1583                  * the res_count, turning the swapout into a nop.
1584                  */
1585                 return;
1586         }
1587
1588         /*
1589          * The residence count must be zero.  If we raced with another
1590          * swapin, the state would have been IN; if we raced with a
1591          * swapout (after another competing swapin), we must have lost
1592          * the race to get here (see above comment), in which case
1593          * res_count is still 0.
1594          */
1595         assert(map->res_count == 0);
1596
1597         /*
1598          * There are no intermediate states of a map going out or
1599          * coming in, since the map is locked during the transition.
1600          */
1601         assert(map->sw_state == MAP_SW_OUT);
1602
1603         /*
1604          * We now operate upon each map entry.  If the entry is a sub-
1605          * or share-map, we call vm_map_res_reference upon it.
1606          * If the entry is an object, we call vm_object_res_reference
1607          * (this may iterate through the shadow chain).
1608          * Note that we hold the map locked the entire time,
1609          * even if we get back here via a recursive call in
1610          * vm_map_res_reference.
1611          */
1612         entry = vm_map_first_entry(map);
1613
1614         while (entry != vm_map_to_entry(map)) {
1615                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1616                         if (entry->is_sub_map) {
1617                                 vm_map_t lmap = VME_SUBMAP(entry);
1618                                 lck_mtx_lock(&lmap->s_lock);
1619                                 vm_map_res_reference(lmap);
1620                                 lck_mtx_unlock(&lmap->s_lock);
1621                         } else {
1622                                 vm_object_t object = VME_OBEJCT(entry);
1623                                 vm_object_lock(object);
1624                                 /*
1625                                  * This call may iterate through the
1626                                  * shadow chain.
1627                                  */
1628                                 vm_object_res_reference(object);
1629                                 vm_object_unlock(object);
1630                         }
1631                 }
1632                 entry = entry->vme_next;
1633         }
1634         assert(map->sw_state == MAP_SW_OUT);
1635         map->sw_state = MAP_SW_IN;
1636 }
1637
1638 void
1639 vm_map_swapout(vm_map_t map)
1640 {
1641         vm_map_entry_t entry;
1642
1643         /*
1644          * Map is locked
1645          * First deal with various races.
1646          * If we raced with a swapin and lost, the residence count
1647          * will have been incremented to 1, and we simply return.
1648          */
1649         lck_mtx_lock(&map->s_lock);
1650         if (map->res_count != 0) {
1651                 lck_mtx_unlock(&map->s_lock);
1652                 return;
1653         }
1654         lck_mtx_unlock(&map->s_lock);
1655
1656         /*
1657          * There are no intermediate states of a map going out or
1658          * coming in, since the map is locked during the transition.
1659          */
1660         assert(map->sw_state == MAP_SW_IN);
1661
1662         if (!vm_map_swap_enable) {
1663                 return;
1664         }
1665
1666         /*
1667          * We now operate upon each map entry.  If the entry is a sub-
1668          * or share-map, we call vm_map_res_deallocate upon it.
1669          * If the entry is an object, we call vm_object_res_deallocate
1670          * (this may iterate through the shadow chain).
1671          * Note that we hold the map locked the entire time,
1672          * even if we get back here via a recursive call in
1673          * vm_map_res_deallocate.
1674          */
1675         entry = vm_map_first_entry(map);
1676
1677         while (entry != vm_map_to_entry(map)) {
1678                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1679                         if (entry->is_sub_map) {
1680                                 vm_map_t lmap = VME_SUBMAP(entry);
1681                                 lck_mtx_lock(&lmap->s_lock);
1682                                 vm_map_res_deallocate(lmap);
1683                                 lck_mtx_unlock(&lmap->s_lock);
1684                         } else {
1685                                 vm_object_t object = VME_OBJECT(entry);
1686                                 vm_object_lock(object);
1687                                 /*
1688                                  * This call may take a long time,
1689                                  * since it could actively push
1690                                  * out pages (if we implement it
1691                                  * that way).
1692                                  */
1693                                 vm_object_res_deallocate(object);
1694                                 vm_object_unlock(object);
1695                         }
1696                 }
1697                 entry = entry->vme_next;
1698         }
1699         assert(map->sw_state == MAP_SW_IN);
1700         map->sw_state = MAP_SW_OUT;
1701 }
1702
1703 #endif  /* TASK_SWAPPER */
1704
1705 /*
1706  *      vm_map_lookup_entry:    [ internal use only ]
1707  *
1708  *      Calls into the vm map store layer to find the map
1709  *      entry containing (or immediately preceding) the
1710  *      specified address in the given map; the entry is returned
1711  *      in the "entry" parameter.  The boolean
1712  *      result indicates whether the address is
1713  *      actually contained in the map.
1714  */
1715 boolean_t
1716 vm_map_lookup_entry(
1717         vm_map_t                map,
1718         vm_map_offset_t address,
1719         vm_map_entry_t          *entry)         /* OUT */
1720 {
1721         return vm_map_store_lookup_entry( map, address, entry );
1722 }
1723
1724 /*
1725  *      Routine:        vm_map_find_space
1726  *      Purpose:
1727  *              Allocate a range in the specified virtual address map,
1728  *              returning the entry allocated for that range.
1729  *              Used by kmem_alloc, etc.
1730  *
1731  *              The map must be NOT be locked. It will be returned locked
1732  *              on KERN_SUCCESS, unlocked on failure.
1733  *
1734  *              If an entry is allocated, the object/offset fields
1735  *              are initialized to zero.
1736  */
1737 kern_return_t
1738 vm_map_find_space(
1739         vm_map_t        map,
1740         vm_map_offset_t         *address,       /* OUT */
1741         vm_map_size_t           size,
1742         vm_map_offset_t         mask,
1743         int                     flags __unused,
1744         vm_map_kernel_flags_t   vmk_flags,
1745         vm_tag_t                tag,
1746         vm_map_entry_t          *o_entry)       /* OUT */
1747 {
1748         vm_map_entry_t                  entry, new_entry;
1749         vm_map_offset_t start;
1750         vm_map_offset_t end;
1751         vm_map_entry_t                  hole_entry;
1752
1753         if (size == 0) {
1754                 *address = 0;
1755                 return KERN_INVALID_ARGUMENT;
1756         }
1757
1758         if (vmk_flags.vmkf_guard_after) {
1759                 /* account for the back guard page in the size */
1760                 size += VM_MAP_PAGE_SIZE(map);
1761         }
1762
1763         new_entry = vm_map_entry_create(map, FALSE);
1764
1765         /*
1766          *      Look for the first possible address; if there's already
1767          *      something at this address, we have to start after it.
1768          */
1769
1770         vm_map_lock(map);
1771
1772         if (map->disable_vmentry_reuse == TRUE) {
1773                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1774         } else {
1775                 if (map->holelistenabled) {
1776                         hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1777
1778                         if (hole_entry == NULL) {
1779                                 /*
1780                                  * No more space in the map?
1781                                  */
1782                                 vm_map_entry_dispose(map, new_entry);
1783                                 vm_map_unlock(map);
1784                                 return KERN_NO_SPACE;
1785                         }
1786
1787                         entry = hole_entry;
1788                         start = entry->vme_start;
1789                 } else {
1790                         assert(first_free_is_valid(map));
1791                         if ((entry = map->first_free) == vm_map_to_entry(map)) {
1792                                 start = map->min_offset;
1793                         } else {
1794                                 start = entry->vme_end;
1795                         }
1796                 }
1797         }
1798
1799         /*
1800          *      In any case, the "entry" always precedes
1801          *      the proposed new region throughout the loop:
1802          */
1803
1804         while (TRUE) {
1805                 vm_map_entry_t  next;
1806
1807                 /*
1808                  *      Find the end of the proposed new region.
1809                  *      Be sure we didn't go beyond the end, or
1810                  *      wrap around the address.
1811                  */
1812
1813                 if (vmk_flags.vmkf_guard_before) {
1814                         /* reserve space for the front guard page */
1815                         start += VM_MAP_PAGE_SIZE(map);
1816                 }
1817                 end = ((start + mask) & ~mask);
1818
1819                 if (end < start) {
1820                         vm_map_entry_dispose(map, new_entry);
1821                         vm_map_unlock(map);
1822                         return KERN_NO_SPACE;
1823                 }
1824                 start = end;
1825                 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1826                 end += size;
1827                 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1828
1829                 if ((end > map->max_offset) || (end < start)) {
1830                         vm_map_entry_dispose(map, new_entry);
1831                         vm_map_unlock(map);
1832                         return KERN_NO_SPACE;
1833                 }
1834
1835                 next = entry->vme_next;
1836
1837                 if (map->holelistenabled) {
1838                         if (entry->vme_end >= end) {
1839                                 break;
1840                         }
1841                 } else {
1842                         /*
1843                          *      If there are no more entries, we must win.
1844                          *
1845                          *      OR
1846                          *
1847                          *      If there is another entry, it must be
1848                          *      after the end of the potential new region.
1849                          */
1850
1851                         if (next == vm_map_to_entry(map)) {
1852                                 break;
1853                         }
1854
1855                         if (next->vme_start >= end) {
1856                                 break;
1857                         }
1858                 }
1859
1860                 /*
1861                  *      Didn't fit -- move to the next entry.
1862                  */
1863
1864                 entry = next;
1865
1866                 if (map->holelistenabled) {
1867                         if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1868                                 /*
1869                                  * Wrapped around
1870                                  */
1871                                 vm_map_entry_dispose(map, new_entry);
1872                                 vm_map_unlock(map);
1873                                 return KERN_NO_SPACE;
1874                         }
1875                         start = entry->vme_start;
1876                 } else {
1877                         start = entry->vme_end;
1878                 }
1879         }
1880
1881         if (map->holelistenabled) {
1882                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1883                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1884                 }
1885         }
1886
1887         /*
1888          *      At this point,
1889          *              "start" and "end" should define the endpoints of the
1890          *                      available new range, and
1891          *              "entry" should refer to the region before the new
1892          *                      range, and
1893          *
1894          *              the map should be locked.
1895          */
1896
1897         if (vmk_flags.vmkf_guard_before) {
1898                 /* go back for the front guard page */
1899                 start -= VM_MAP_PAGE_SIZE(map);
1900         }
1901         *address = start;
1902
1903         assert(start < end);
1904         new_entry->vme_start = start;
1905         new_entry->vme_end = end;
1906         assert(page_aligned(new_entry->vme_start));
1907         assert(page_aligned(new_entry->vme_end));
1908         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1909             VM_MAP_PAGE_MASK(map)));
1910         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1911             VM_MAP_PAGE_MASK(map)));
1912
1913         new_entry->is_shared = FALSE;
1914         new_entry->is_sub_map = FALSE;
1915         new_entry->use_pmap = TRUE;
1916         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1917         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1918
1919         new_entry->needs_copy = FALSE;
1920
1921         new_entry->inheritance = VM_INHERIT_DEFAULT;
1922         new_entry->protection = VM_PROT_DEFAULT;
1923         new_entry->max_protection = VM_PROT_ALL;
1924         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1925         new_entry->wired_count = 0;
1926         new_entry->user_wired_count = 0;
1927
1928         new_entry->in_transition = FALSE;
1929         new_entry->needs_wakeup = FALSE;
1930         new_entry->no_cache = FALSE;
1931         new_entry->permanent = FALSE;
1932         new_entry->superpage_size = FALSE;
1933         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1934                 new_entry->map_aligned = TRUE;
1935         } else {
1936                 new_entry->map_aligned = FALSE;
1937         }
1938
1939         new_entry->used_for_jit = FALSE;
1940         new_entry->pmap_cs_associated = FALSE;
1941         new_entry->zero_wired_pages = FALSE;
1942         new_entry->iokit_acct = FALSE;
1943         new_entry->vme_resilient_codesign = FALSE;
1944         new_entry->vme_resilient_media = FALSE;
1945         if (vmk_flags.vmkf_atomic_entry) {
1946                 new_entry->vme_atomic = TRUE;
1947         } else {
1948                 new_entry->vme_atomic = FALSE;
1949         }
1950
1951         VME_ALIAS_SET(new_entry, tag);
1952
1953         /*
1954          *      Insert the new entry into the list
1955          */
1956
1957         vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1958
1959         map->size += size;
1960
1961         /*
1962          *      Update the lookup hint
1963          */
1964         SAVE_HINT_MAP_WRITE(map, new_entry);
1965
1966         *o_entry = new_entry;
1967         return KERN_SUCCESS;
1968 }
1969
1970 int vm_map_pmap_enter_print = FALSE;
1971 int vm_map_pmap_enter_enable = FALSE;
1972
1973 /*
1974  *      Routine:        vm_map_pmap_enter [internal only]
1975  *
1976  *      Description:
1977  *              Force pages from the specified object to be entered into
1978  *              the pmap at the specified address if they are present.
1979  *              As soon as a page not found in the object the scan ends.
1980  *
1981  *      Returns:
1982  *              Nothing.
1983  *
1984  *      In/out conditions:
1985  *              The source map should not be locked on entry.
1986  */
1987 __unused static void
1988 vm_map_pmap_enter(
1989         vm_map_t                map,
1990         vm_map_offset_t         addr,
1991         vm_map_offset_t         end_addr,
1992         vm_object_t             object,
1993         vm_object_offset_t      offset,
1994         vm_prot_t               protection)
1995 {
1996         int                     type_of_fault;
1997         kern_return_t           kr;
1998         struct vm_object_fault_info fault_info = {};
1999
2000         if (map->pmap == 0) {
2001                 return;
2002         }
2003
2004         while (addr < end_addr) {
2005                 vm_page_t       m;
2006
2007
2008                 /*
2009                  * TODO:
2010                  * From vm_map_enter(), we come into this function without the map
2011                  * lock held or the object lock held.
2012                  * We haven't taken a reference on the object either.
2013                  * We should do a proper lookup on the map to make sure
2014                  * that things are sane before we go locking objects that
2015                  * could have been deallocated from under us.
2016                  */
2017
2018                 vm_object_lock(object);
2019
2020                 m = vm_page_lookup(object, offset);
2021
2022                 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2023                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2024                         vm_object_unlock(object);
2025                         return;
2026                 }
2027
2028                 if (vm_map_pmap_enter_print) {
2029                         printf("vm_map_pmap_enter:");
2030                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2031                             map, (unsigned long long)addr, object, (unsigned long long)offset);
2032                 }
2033                 type_of_fault = DBG_CACHE_HIT_FAULT;
2034                 kr = vm_fault_enter(m, map->pmap,
2035                     addr, protection, protection,
2036                     VM_PAGE_WIRED(m),
2037                     FALSE,                 /* change_wiring */
2038                     VM_KERN_MEMORY_NONE,                 /* tag - not wiring */
2039                     &fault_info,
2040                     NULL,                  /* need_retry */
2041                     &type_of_fault);
2042
2043                 vm_object_unlock(object);
2044
2045                 offset += PAGE_SIZE_64;
2046                 addr += PAGE_SIZE;
2047         }
2048 }
2049
2050 boolean_t vm_map_pmap_is_empty(
2051         vm_map_t        map,
2052         vm_map_offset_t start,
2053         vm_map_offset_t end);
2054 boolean_t
2055 vm_map_pmap_is_empty(
2056         vm_map_t        map,
2057         vm_map_offset_t start,
2058         vm_map_offset_t end)
2059 {
2060 #ifdef MACHINE_PMAP_IS_EMPTY
2061         return pmap_is_empty(map->pmap, start, end);
2062 #else   /* MACHINE_PMAP_IS_EMPTY */
2063         vm_map_offset_t offset;
2064         ppnum_t         phys_page;
2065
2066         if (map->pmap == NULL) {
2067                 return TRUE;
2068         }
2069
2070         for (offset = start;
2071             offset < end;
2072             offset += PAGE_SIZE) {
2073                 phys_page = pmap_find_phys(map->pmap, offset);
2074                 if (phys_page) {
2075                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2076                             "page %d at 0x%llx\n",
2077                             map, (long long)start, (long long)end,
2078                             phys_page, (long long)offset);
2079                         return FALSE;
2080                 }
2081         }
2082         return TRUE;
2083 #endif  /* MACHINE_PMAP_IS_EMPTY */
2084 }
2085
2086 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2087 kern_return_t
2088 vm_map_random_address_for_size(
2089         vm_map_t        map,
2090         vm_map_offset_t *address,
2091         vm_map_size_t   size)
2092 {
2093         kern_return_t   kr = KERN_SUCCESS;
2094         int             tries = 0;
2095         vm_map_offset_t random_addr = 0;
2096         vm_map_offset_t hole_end;
2097
2098         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2099         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2100         vm_map_size_t   vm_hole_size = 0;
2101         vm_map_size_t   addr_space_size;
2102
2103         addr_space_size = vm_map_max(map) - vm_map_min(map);
2104
2105         assert(page_aligned(size));
2106
2107         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2108                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2109                 random_addr = vm_map_trunc_page(
2110                         vm_map_min(map) + (random_addr % addr_space_size),
2111                         VM_MAP_PAGE_MASK(map));
2112
2113                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2114                         if (prev_entry == vm_map_to_entry(map)) {
2115                                 next_entry = vm_map_first_entry(map);
2116                         } else {
2117                                 next_entry = prev_entry->vme_next;
2118                         }
2119                         if (next_entry == vm_map_to_entry(map)) {
2120                                 hole_end = vm_map_max(map);
2121                         } else {
2122                                 hole_end = next_entry->vme_start;
2123                         }
2124                         vm_hole_size = hole_end - random_addr;
2125                         if (vm_hole_size >= size) {
2126                                 *address = random_addr;
2127                                 break;
2128                         }
2129                 }
2130                 tries++;
2131         }
2132
2133         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2134                 kr = KERN_NO_SPACE;
2135         }
2136         return kr;
2137 }
2138
2139 static boolean_t
2140 vm_memory_malloc_no_cow(
2141         int alias)
2142 {
2143         uint64_t alias_mask;
2144
2145         if (alias > 63) {
2146                 return FALSE;
2147         }
2148
2149         alias_mask = 1ULL << alias;
2150         if (alias_mask & vm_memory_malloc_no_cow_mask) {
2151                 return TRUE;
2152         }
2153         return FALSE;
2154 }
2155
2156 /*
2157  *      Routine:        vm_map_enter
2158  *
2159  *      Description:
2160  *              Allocate a range in the specified virtual address map.
2161  *              The resulting range will refer to memory defined by
2162  *              the given memory object and offset into that object.
2163  *
2164  *              Arguments are as defined in the vm_map call.
2165  */
2166 int _map_enter_debug = 0;
2167 static unsigned int vm_map_enter_restore_successes = 0;
2168 static unsigned int vm_map_enter_restore_failures = 0;
2169 kern_return_t
2170 vm_map_enter(
2171         vm_map_t                map,
2172         vm_map_offset_t         *address,       /* IN/OUT */
2173         vm_map_size_t           size,
2174         vm_map_offset_t         mask,
2175         int                     flags,
2176         vm_map_kernel_flags_t   vmk_flags,
2177         vm_tag_t                alias,
2178         vm_object_t             object,
2179         vm_object_offset_t      offset,
2180         boolean_t               needs_copy,
2181         vm_prot_t               cur_protection,
2182         vm_prot_t               max_protection,
2183         vm_inherit_t            inheritance)
2184 {
2185         vm_map_entry_t          entry, new_entry;
2186         vm_map_offset_t         start, tmp_start, tmp_offset;
2187         vm_map_offset_t         end, tmp_end;
2188         vm_map_offset_t         tmp2_start, tmp2_end;
2189         vm_map_offset_t         desired_empty_end;
2190         vm_map_offset_t         step;
2191         kern_return_t           result = KERN_SUCCESS;
2192         vm_map_t                zap_old_map = VM_MAP_NULL;
2193         vm_map_t                zap_new_map = VM_MAP_NULL;
2194         boolean_t               map_locked = FALSE;
2195         boolean_t               pmap_empty = TRUE;
2196         boolean_t               new_mapping_established = FALSE;
2197         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2198         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2199         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2200         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2201         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2202         boolean_t               is_submap = vmk_flags.vmkf_submap;
2203         boolean_t               permanent = vmk_flags.vmkf_permanent;
2204         boolean_t               no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2205         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2206         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2207         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2208         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2209         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2210         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2211         vm_tag_t                user_alias;
2212         vm_map_offset_t         effective_min_offset, effective_max_offset;
2213         kern_return_t           kr;
2214         boolean_t               clear_map_aligned = FALSE;
2215         vm_map_entry_t          hole_entry;
2216         vm_map_size_t           chunk_size = 0;
2217
2218         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2219
2220         if (flags & VM_FLAGS_4GB_CHUNK) {
2221 #if defined(__LP64__)
2222                 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2223 #else /* __LP64__ */
2224                 chunk_size = ANON_CHUNK_SIZE;
2225 #endif /* __LP64__ */
2226         } else {
2227                 chunk_size = ANON_CHUNK_SIZE;
2228         }
2229
2230         if (superpage_size) {
2231                 switch (superpage_size) {
2232                         /*
2233                          * Note that the current implementation only supports
2234                          * a single size for superpages, SUPERPAGE_SIZE, per
2235                          * architecture. As soon as more sizes are supposed
2236                          * to be supported, SUPERPAGE_SIZE has to be replaced
2237                          * with a lookup of the size depending on superpage_size.
2238                          */
2239 #ifdef __x86_64__
2240                 case SUPERPAGE_SIZE_ANY:
2241                         /* handle it like 2 MB and round up to page size */
2242                         size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2243                 case SUPERPAGE_SIZE_2MB:
2244                         break;
2245 #endif
2246                 default:
2247                         return KERN_INVALID_ARGUMENT;
2248                 }
2249                 mask = SUPERPAGE_SIZE - 1;
2250                 if (size & (SUPERPAGE_SIZE - 1)) {
2251                         return KERN_INVALID_ARGUMENT;
2252                 }
2253                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2254         }
2255
2256
2257         if ((cur_protection & VM_PROT_WRITE) &&
2258             (cur_protection & VM_PROT_EXECUTE) &&
2259 #if !CONFIG_EMBEDDED
2260             map != kernel_map &&
2261             (cs_process_global_enforcement() ||
2262             (vmk_flags.vmkf_cs_enforcement_override
2263             ? vmk_flags.vmkf_cs_enforcement
2264             : cs_process_enforcement(NULL))) &&
2265 #endif /* !CONFIG_EMBEDDED */
2266             !entry_for_jit) {
2267                 DTRACE_VM3(cs_wx,
2268                     uint64_t, 0,
2269                     uint64_t, 0,
2270                     vm_prot_t, cur_protection);
2271                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2272 #if VM_PROTECT_WX_FAIL
2273                     "failing\n",
2274 #else /* VM_PROTECT_WX_FAIL */
2275                     "turning off execute\n",
2276 #endif /* VM_PROTECT_WX_FAIL */
2277                     proc_selfpid(),
2278                     (current_task()->bsd_info
2279                     ? proc_name_address(current_task()->bsd_info)
2280                     : "?"),
2281                     __FUNCTION__);
2282                 cur_protection &= ~VM_PROT_EXECUTE;
2283 #if VM_PROTECT_WX_FAIL
2284                 return KERN_PROTECTION_FAILURE;
2285 #endif /* VM_PROTECT_WX_FAIL */
2286         }
2287
2288         /*
2289          * If the task has requested executable lockdown,
2290          * deny any new executable mapping.
2291          */
2292         if (map->map_disallow_new_exec == TRUE) {
2293                 if (cur_protection & VM_PROT_EXECUTE) {
2294                         return KERN_PROTECTION_FAILURE;
2295                 }
2296         }
2297
2298         if (resilient_codesign) {
2299                 assert(!is_submap);
2300                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2301                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2302                         return KERN_PROTECTION_FAILURE;
2303                 }
2304         }
2305
2306         if (resilient_media) {
2307                 assert(!is_submap);
2308 //              assert(!needs_copy);
2309                 if (object != VM_OBJECT_NULL &&
2310                     !object->internal) {
2311                         /*
2312                          * This mapping is directly backed by an external
2313                          * memory manager (e.g. a vnode pager for a file):
2314                          * we would not have any safe place to inject
2315                          * a zero-filled page if an actual page is not
2316                          * available, without possibly impacting the actual
2317                          * contents of the mapped object (e.g. the file),
2318                          * so we can't provide any media resiliency here.
2319                          */
2320                         return KERN_INVALID_ARGUMENT;
2321                 }
2322         }
2323
2324         if (is_submap) {
2325                 if (purgable) {
2326                         /* submaps can not be purgeable */
2327                         return KERN_INVALID_ARGUMENT;
2328                 }
2329                 if (object == VM_OBJECT_NULL) {
2330                         /* submaps can not be created lazily */
2331                         return KERN_INVALID_ARGUMENT;
2332                 }
2333         }
2334         if (vmk_flags.vmkf_already) {
2335                 /*
2336                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2337                  * is already present.  For it to be meaningul, the requested
2338                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2339                  * we shouldn't try and remove what was mapped there first
2340                  * (!VM_FLAGS_OVERWRITE).
2341                  */
2342                 if ((flags & VM_FLAGS_ANYWHERE) ||
2343                     (flags & VM_FLAGS_OVERWRITE)) {
2344                         return KERN_INVALID_ARGUMENT;
2345                 }
2346         }
2347
2348         effective_min_offset = map->min_offset;
2349
2350         if (vmk_flags.vmkf_beyond_max) {
2351                 /*
2352                  * Allow an insertion beyond the map's max offset.
2353                  */
2354 #if     !defined(__arm__) && !defined(__arm64__)
2355                 if (vm_map_is_64bit(map)) {
2356                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2357                 } else
2358 #endif  /* __arm__ */
2359                 effective_max_offset = 0x00000000FFFFF000ULL;
2360         } else {
2361 #if     !defined(CONFIG_EMBEDDED)
2362                 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2363                         effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2364                 } else {
2365                         effective_max_offset = map->max_offset;
2366                 }
2367 #else
2368                 effective_max_offset = map->max_offset;
2369 #endif
2370         }
2371
2372         if (size == 0 ||
2373             (offset & PAGE_MASK_64) != 0) {
2374                 *address = 0;
2375                 return KERN_INVALID_ARGUMENT;
2376         }
2377
2378         if (map->pmap == kernel_pmap) {
2379                 user_alias = VM_KERN_MEMORY_NONE;
2380         } else {
2381                 user_alias = alias;
2382         }
2383
2384         if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2385                 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2386         }
2387
2388 #define RETURN(value)   { result = value; goto BailOut; }
2389
2390         assert(page_aligned(*address));
2391         assert(page_aligned(size));
2392
2393         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2394                 /*
2395                  * In most cases, the caller rounds the size up to the
2396                  * map's page size.
2397                  * If we get a size that is explicitly not map-aligned here,
2398                  * we'll have to respect the caller's wish and mark the
2399                  * mapping as "not map-aligned" to avoid tripping the
2400                  * map alignment checks later.
2401                  */
2402                 clear_map_aligned = TRUE;
2403         }
2404         if (!anywhere &&
2405             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2406                 /*
2407                  * We've been asked to map at a fixed address and that
2408                  * address is not aligned to the map's specific alignment.
2409                  * The caller should know what it's doing (i.e. most likely
2410                  * mapping some fragmented copy map, transferring memory from
2411                  * a VM map with a different alignment), so clear map_aligned
2412                  * for this new VM map entry and proceed.
2413                  */
2414                 clear_map_aligned = TRUE;
2415         }
2416
2417         /*
2418          * Only zero-fill objects are allowed to be purgable.
2419          * LP64todo - limit purgable objects to 32-bits for now
2420          */
2421         if (purgable &&
2422             (offset != 0 ||
2423             (object != VM_OBJECT_NULL &&
2424             (object->vo_size != size ||
2425             object->purgable == VM_PURGABLE_DENY))
2426             || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2427                 return KERN_INVALID_ARGUMENT;
2428         }
2429
2430         if (!anywhere && overwrite) {
2431                 /*
2432                  * Create a temporary VM map to hold the old mappings in the
2433                  * affected area while we create the new one.
2434                  * This avoids releasing the VM map lock in
2435                  * vm_map_entry_delete() and allows atomicity
2436                  * when we want to replace some mappings with a new one.
2437                  * It also allows us to restore the old VM mappings if the
2438                  * new mapping fails.
2439                  */
2440                 zap_old_map = vm_map_create(PMAP_NULL,
2441                     *address,
2442                     *address + size,
2443                     map->hdr.entries_pageable);
2444                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2445                 vm_map_disable_hole_optimization(zap_old_map);
2446         }
2447
2448 StartAgain:;
2449
2450         start = *address;
2451
2452         if (anywhere) {
2453                 vm_map_lock(map);
2454                 map_locked = TRUE;
2455
2456                 if (entry_for_jit) {
2457 #if CONFIG_EMBEDDED
2458                         if (map->jit_entry_exists) {
2459                                 result = KERN_INVALID_ARGUMENT;
2460                                 goto BailOut;
2461                         }
2462                         random_address = TRUE;
2463 #endif /* CONFIG_EMBEDDED */
2464                 }
2465
2466                 if (random_address) {
2467                         /*
2468                          * Get a random start address.
2469                          */
2470                         result = vm_map_random_address_for_size(map, address, size);
2471                         if (result != KERN_SUCCESS) {
2472                                 goto BailOut;
2473                         }
2474                         start = *address;
2475                 }
2476 #if !CONFIG_EMBEDDED
2477                 else if ((start == 0 || start == vm_map_min(map)) &&
2478                     !map->disable_vmentry_reuse &&
2479                     map->vmmap_high_start != 0) {
2480                         start = map->vmmap_high_start;
2481                 }
2482 #endif
2483
2484
2485                 /*
2486                  *      Calculate the first possible address.
2487                  */
2488
2489                 if (start < effective_min_offset) {
2490                         start = effective_min_offset;
2491                 }
2492                 if (start > effective_max_offset) {
2493                         RETURN(KERN_NO_SPACE);
2494                 }
2495
2496                 /*
2497                  *      Look for the first possible address;
2498                  *      if there's already something at this
2499                  *      address, we have to start after it.
2500                  */
2501
2502                 if (map->disable_vmentry_reuse == TRUE) {
2503                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2504                 } else {
2505                         if (map->holelistenabled) {
2506                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2507
2508                                 if (hole_entry == NULL) {
2509                                         /*
2510                                          * No more space in the map?
2511                                          */
2512                                         result = KERN_NO_SPACE;
2513                                         goto BailOut;
2514                                 } else {
2515                                         boolean_t found_hole = FALSE;
2516
2517                                         do {
2518                                                 if (hole_entry->vme_start >= start) {
2519                                                         start = hole_entry->vme_start;
2520                                                         found_hole = TRUE;
2521                                                         break;
2522                                                 }
2523
2524                                                 if (hole_entry->vme_end > start) {
2525                                                         found_hole = TRUE;
2526                                                         break;
2527                                                 }
2528                                                 hole_entry = hole_entry->vme_next;
2529                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2530
2531                                         if (found_hole == FALSE) {
2532                                                 result = KERN_NO_SPACE;
2533                                                 goto BailOut;
2534                                         }
2535
2536                                         entry = hole_entry;
2537
2538                                         if (start == 0) {
2539                                                 start += PAGE_SIZE_64;
2540                                         }
2541                                 }
2542                         } else {
2543                                 assert(first_free_is_valid(map));
2544
2545                                 entry = map->first_free;
2546
2547                                 if (entry == vm_map_to_entry(map)) {
2548                                         entry = NULL;
2549                                 } else {
2550                                         if (entry->vme_next == vm_map_to_entry(map)) {
2551                                                 /*
2552                                                  * Hole at the end of the map.
2553                                                  */
2554                                                 entry = NULL;
2555                                         } else {
2556                                                 if (start < (entry->vme_next)->vme_start) {
2557                                                         start = entry->vme_end;
2558                                                         start = vm_map_round_page(start,
2559                                                             VM_MAP_PAGE_MASK(map));
2560                                                 } else {
2561                                                         /*
2562                                                          * Need to do a lookup.
2563                                                          */
2564                                                         entry = NULL;
2565                                                 }
2566                                         }
2567                                 }
2568
2569                                 if (entry == NULL) {
2570                                         vm_map_entry_t  tmp_entry;
2571                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2572                                                 assert(!entry_for_jit);
2573                                                 start = tmp_entry->vme_end;
2574                                                 start = vm_map_round_page(start,
2575                                                     VM_MAP_PAGE_MASK(map));
2576                                         }
2577                                         entry = tmp_entry;
2578                                 }
2579                         }
2580                 }
2581
2582                 /*
2583                  *      In any case, the "entry" always precedes
2584                  *      the proposed new region throughout the
2585                  *      loop:
2586                  */
2587
2588                 while (TRUE) {
2589                         vm_map_entry_t  next;
2590
2591                         /*
2592                          *      Find the end of the proposed new region.
2593                          *      Be sure we didn't go beyond the end, or
2594                          *      wrap around the address.
2595                          */
2596
2597                         end = ((start + mask) & ~mask);
2598                         end = vm_map_round_page(end,
2599                             VM_MAP_PAGE_MASK(map));
2600                         if (end < start) {
2601                                 RETURN(KERN_NO_SPACE);
2602                         }
2603                         start = end;
2604                         assert(VM_MAP_PAGE_ALIGNED(start,
2605                             VM_MAP_PAGE_MASK(map)));
2606                         end += size;
2607
2608                         /* We want an entire page of empty space, but don't increase the allocation size. */
2609                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2610
2611                         if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2612                                 if (map->wait_for_space) {
2613                                         assert(!keep_map_locked);
2614                                         if (size <= (effective_max_offset -
2615                                             effective_min_offset)) {
2616                                                 assert_wait((event_t)map,
2617                                                     THREAD_ABORTSAFE);
2618                                                 vm_map_unlock(map);
2619                                                 map_locked = FALSE;
2620                                                 thread_block(THREAD_CONTINUE_NULL);
2621                                                 goto StartAgain;
2622                                         }
2623                                 }
2624                                 RETURN(KERN_NO_SPACE);
2625                         }
2626
2627                         next = entry->vme_next;
2628
2629                         if (map->holelistenabled) {
2630                                 if (entry->vme_end >= desired_empty_end) {
2631                                         break;
2632                                 }
2633                         } else {
2634                                 /*
2635                                  *      If there are no more entries, we must win.
2636                                  *
2637                                  *      OR
2638                                  *
2639                                  *      If there is another entry, it must be
2640                                  *      after the end of the potential new region.
2641                                  */
2642
2643                                 if (next == vm_map_to_entry(map)) {
2644                                         break;
2645                                 }
2646
2647                                 if (next->vme_start >= desired_empty_end) {
2648                                         break;
2649                                 }
2650                         }
2651
2652                         /*
2653                          *      Didn't fit -- move to the next entry.
2654                          */
2655
2656                         entry = next;
2657
2658                         if (map->holelistenabled) {
2659                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2660                                         /*
2661                                          * Wrapped around
2662                                          */
2663                                         result = KERN_NO_SPACE;
2664                                         goto BailOut;
2665                                 }
2666                                 start = entry->vme_start;
2667                         } else {
2668                                 start = entry->vme_end;
2669                         }
2670
2671                         start = vm_map_round_page(start,
2672                             VM_MAP_PAGE_MASK(map));
2673                 }
2674
2675                 if (map->holelistenabled) {
2676                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2677                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2678                         }
2679                 }
2680
2681                 *address = start;
2682                 assert(VM_MAP_PAGE_ALIGNED(*address,
2683                     VM_MAP_PAGE_MASK(map)));
2684         } else {
2685                 /*
2686                  *      Verify that:
2687                  *              the address doesn't itself violate
2688                  *              the mask requirement.
2689                  */
2690
2691                 vm_map_lock(map);
2692                 map_locked = TRUE;
2693                 if ((start & mask) != 0) {
2694                         RETURN(KERN_NO_SPACE);
2695                 }
2696
2697                 /*
2698                  *      ...     the address is within bounds
2699                  */
2700
2701                 end = start + size;
2702
2703                 if ((start < effective_min_offset) ||
2704                     (end > effective_max_offset) ||
2705                     (start >= end)) {
2706                         RETURN(KERN_INVALID_ADDRESS);
2707                 }
2708
2709                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2710                         int remove_flags;
2711                         /*
2712                          * Fixed mapping and "overwrite" flag: attempt to
2713                          * remove all existing mappings in the specified
2714                          * address range, saving them in our "zap_old_map".
2715                          */
2716                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2717                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2718                         if (vmk_flags.vmkf_overwrite_immutable) {
2719                                 /* we can overwrite immutable mappings */
2720                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2721                         }
2722                         (void) vm_map_delete(map, start, end,
2723                             remove_flags,
2724                             zap_old_map);
2725                 }
2726
2727                 /*
2728                  *      ...     the starting address isn't allocated
2729                  */
2730
2731                 if (vm_map_lookup_entry(map, start, &entry)) {
2732                         if (!(vmk_flags.vmkf_already)) {
2733                                 RETURN(KERN_NO_SPACE);
2734                         }
2735                         /*
2736                          * Check if what's already there is what we want.
2737                          */
2738                         tmp_start = start;
2739                         tmp_offset = offset;
2740                         if (entry->vme_start < start) {
2741                                 tmp_start -= start - entry->vme_start;
2742                                 tmp_offset -= start - entry->vme_start;
2743                         }
2744                         for (; entry->vme_start < end;
2745                             entry = entry->vme_next) {
2746                                 /*
2747                                  * Check if the mapping's attributes
2748                                  * match the existing map entry.
2749                                  */
2750                                 if (entry == vm_map_to_entry(map) ||
2751                                     entry->vme_start != tmp_start ||
2752                                     entry->is_sub_map != is_submap ||
2753                                     VME_OFFSET(entry) != tmp_offset ||
2754                                     entry->needs_copy != needs_copy ||
2755                                     entry->protection != cur_protection ||
2756                                     entry->max_protection != max_protection ||
2757                                     entry->inheritance != inheritance ||
2758                                     entry->iokit_acct != iokit_acct ||
2759                                     VME_ALIAS(entry) != alias) {
2760                                         /* not the same mapping ! */
2761                                         RETURN(KERN_NO_SPACE);
2762                                 }
2763                                 /*
2764                                  * Check if the same object is being mapped.
2765                                  */
2766                                 if (is_submap) {
2767                                         if (VME_SUBMAP(entry) !=
2768                                             (vm_map_t) object) {
2769                                                 /* not the same submap */
2770                                                 RETURN(KERN_NO_SPACE);
2771                                         }
2772                                 } else {
2773                                         if (VME_OBJECT(entry) != object) {
2774                                                 /* not the same VM object... */
2775                                                 vm_object_t obj2;
2776
2777                                                 obj2 = VME_OBJECT(entry);
2778                                                 if ((obj2 == VM_OBJECT_NULL ||
2779                                                     obj2->internal) &&
2780                                                     (object == VM_OBJECT_NULL ||
2781                                                     object->internal)) {
2782                                                         /*
2783                                                          * ... but both are
2784                                                          * anonymous memory,
2785                                                          * so equivalent.
2786                                                          */
2787                                                 } else {
2788                                                         RETURN(KERN_NO_SPACE);
2789                                                 }
2790                                         }
2791                                 }
2792
2793                                 tmp_offset += entry->vme_end - entry->vme_start;
2794                                 tmp_start += entry->vme_end - entry->vme_start;
2795                                 if (entry->vme_end >= end) {
2796                                         /* reached the end of our mapping */
2797                                         break;
2798                                 }
2799                         }
2800                         /* it all matches:  let's use what's already there ! */
2801                         RETURN(KERN_MEMORY_PRESENT);
2802                 }
2803
2804                 /*
2805                  *      ...     the next region doesn't overlap the
2806                  *              end point.
2807                  */
2808
2809                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2810                     (entry->vme_next->vme_start < end)) {
2811                         RETURN(KERN_NO_SPACE);
2812                 }
2813         }
2814
2815         /*
2816          *      At this point,
2817          *              "start" and "end" should define the endpoints of the
2818          *                      available new range, and
2819          *              "entry" should refer to the region before the new
2820          *                      range, and
2821          *
2822          *              the map should be locked.
2823          */
2824
2825         /*
2826          *      See whether we can avoid creating a new entry (and object) by
2827          *      extending one of our neighbors.  [So far, we only attempt to
2828          *      extend from below.]  Note that we can never extend/join
2829          *      purgable objects because they need to remain distinct
2830          *      entities in order to implement their "volatile object"
2831          *      semantics.
2832          */
2833
2834         if (purgable ||
2835             entry_for_jit ||
2836             vm_memory_malloc_no_cow(user_alias)) {
2837                 if (object == VM_OBJECT_NULL) {
2838                         object = vm_object_allocate(size);
2839                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2840                         object->true_share = FALSE;
2841                         if (purgable) {
2842                                 task_t owner;
2843                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2844                                 if (map->pmap == kernel_pmap) {
2845                                         /*
2846                                          * Purgeable mappings made in a kernel
2847                                          * map are "owned" by the kernel itself
2848                                          * rather than the current user task
2849                                          * because they're likely to be used by
2850                                          * more than this user task (see
2851                                          * execargs_purgeable_allocate(), for
2852                                          * example).
2853                                          */
2854                                         owner = kernel_task;
2855                                 } else {
2856                                         owner = current_task();
2857                                 }
2858                                 assert(object->vo_owner == NULL);
2859                                 assert(object->resident_page_count == 0);
2860                                 assert(object->wired_page_count == 0);
2861                                 vm_object_lock(object);
2862                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2863                                 vm_object_unlock(object);
2864                         }
2865                         offset = (vm_object_offset_t)0;
2866                 }
2867         } else if ((is_submap == FALSE) &&
2868             (object == VM_OBJECT_NULL) &&
2869             (entry != vm_map_to_entry(map)) &&
2870             (entry->vme_end == start) &&
2871             (!entry->is_shared) &&
2872             (!entry->is_sub_map) &&
2873             (!entry->in_transition) &&
2874             (!entry->needs_wakeup) &&
2875             (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2876             (entry->protection == cur_protection) &&
2877             (entry->max_protection == max_protection) &&
2878             (entry->inheritance == inheritance) &&
2879             ((user_alias == VM_MEMORY_REALLOC) ||
2880             (VME_ALIAS(entry) == alias)) &&
2881             (entry->no_cache == no_cache) &&
2882             (entry->permanent == permanent) &&
2883             /* no coalescing for immutable executable mappings */
2884             !((entry->protection & VM_PROT_EXECUTE) &&
2885             entry->permanent) &&
2886             (!entry->superpage_size && !superpage_size) &&
2887             /*
2888              * No coalescing if not map-aligned, to avoid propagating
2889              * that condition any further than needed:
2890              */
2891             (!entry->map_aligned || !clear_map_aligned) &&
2892             (!entry->zero_wired_pages) &&
2893             (!entry->used_for_jit && !entry_for_jit) &&
2894             (!entry->pmap_cs_associated) &&
2895             (entry->iokit_acct == iokit_acct) &&
2896             (!entry->vme_resilient_codesign) &&
2897             (!entry->vme_resilient_media) &&
2898             (!entry->vme_atomic) &&
2899             (entry->vme_no_copy_on_read == no_copy_on_read) &&
2900
2901             ((entry->vme_end - entry->vme_start) + size <=
2902             (user_alias == VM_MEMORY_REALLOC ?
2903             ANON_CHUNK_SIZE :
2904             NO_COALESCE_LIMIT)) &&
2905
2906             (entry->wired_count == 0)) {        /* implies user_wired_count == 0 */
2907                 if (vm_object_coalesce(VME_OBJECT(entry),
2908                     VM_OBJECT_NULL,
2909                     VME_OFFSET(entry),
2910                     (vm_object_offset_t) 0,
2911                     (vm_map_size_t)(entry->vme_end - entry->vme_start),
2912                     (vm_map_size_t)(end - entry->vme_end))) {
2913                         /*
2914                          *      Coalesced the two objects - can extend
2915                          *      the previous map entry to include the
2916                          *      new range.
2917                          */
2918                         map->size += (end - entry->vme_end);
2919                         assert(entry->vme_start < end);
2920                         assert(VM_MAP_PAGE_ALIGNED(end,
2921                             VM_MAP_PAGE_MASK(map)));
2922                         if (__improbable(vm_debug_events)) {
2923                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2924                         }
2925                         entry->vme_end = end;
2926                         if (map->holelistenabled) {
2927                                 vm_map_store_update_first_free(map, entry, TRUE);
2928                         } else {
2929                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2930                         }
2931                         new_mapping_established = TRUE;
2932                         RETURN(KERN_SUCCESS);
2933                 }
2934         }
2935
2936         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2937         new_entry = NULL;
2938
2939         for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2940                 tmp2_end = tmp2_start + step;
2941                 /*
2942                  *      Create a new entry
2943                  *
2944                  * XXX FBDP
2945                  * The reserved "page zero" in each process's address space can
2946                  * be arbitrarily large.  Splitting it into separate objects and
2947                  * therefore different VM map entries serves no purpose and just
2948                  * slows down operations on the VM map, so let's not split the
2949                  * allocation into chunks if the max protection is NONE.  That
2950                  * memory should never be accessible, so it will never get to the
2951                  * default pager.
2952                  */
2953                 tmp_start = tmp2_start;
2954                 if (object == VM_OBJECT_NULL &&
2955                     size > chunk_size &&
2956                     max_protection != VM_PROT_NONE &&
2957                     superpage_size == 0) {
2958                         tmp_end = tmp_start + chunk_size;
2959                 } else {
2960                         tmp_end = tmp2_end;
2961                 }
2962                 do {
2963                         new_entry = vm_map_entry_insert(
2964                                 map, entry, tmp_start, tmp_end,
2965                                 object, offset, needs_copy,
2966                                 FALSE, FALSE,
2967                                 cur_protection, max_protection,
2968                                 VM_BEHAVIOR_DEFAULT,
2969                                 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2970                                 0,
2971                                 no_cache,
2972                                 permanent,
2973                                 no_copy_on_read,
2974                                 superpage_size,
2975                                 clear_map_aligned,
2976                                 is_submap,
2977                                 entry_for_jit,
2978                                 alias);
2979
2980                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2981
2982                         if (resilient_codesign &&
2983                             !((cur_protection | max_protection) &
2984                             (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2985                                 new_entry->vme_resilient_codesign = TRUE;
2986                         }
2987
2988                         if (resilient_media &&
2989                             (object == VM_OBJECT_NULL ||
2990                             object->internal)) {
2991                                 new_entry->vme_resilient_media = TRUE;
2992                         }
2993
2994                         assert(!new_entry->iokit_acct);
2995                         if (!is_submap &&
2996                             object != VM_OBJECT_NULL &&
2997                             (object->purgable != VM_PURGABLE_DENY ||
2998                             object->vo_ledger_tag)) {
2999                                 assert(new_entry->use_pmap);
3000                                 assert(!new_entry->iokit_acct);
3001                                 /*
3002                                  * Turn off pmap accounting since
3003                                  * purgeable (or tagged) objects have their
3004                                  * own ledgers.
3005                                  */
3006                                 new_entry->use_pmap = FALSE;
3007                         } else if (!is_submap &&
3008                             iokit_acct &&
3009                             object != VM_OBJECT_NULL &&
3010                             object->internal) {
3011                                 /* alternate accounting */
3012                                 assert(!new_entry->iokit_acct);
3013                                 assert(new_entry->use_pmap);
3014                                 new_entry->iokit_acct = TRUE;
3015                                 new_entry->use_pmap = FALSE;
3016                                 DTRACE_VM4(
3017                                         vm_map_iokit_mapped_region,
3018                                         vm_map_t, map,
3019                                         vm_map_offset_t, new_entry->vme_start,
3020                                         vm_map_offset_t, new_entry->vme_end,
3021                                         int, VME_ALIAS(new_entry));
3022                                 vm_map_iokit_mapped_region(
3023                                         map,
3024                                         (new_entry->vme_end -
3025                                         new_entry->vme_start));
3026                         } else if (!is_submap) {
3027                                 assert(!new_entry->iokit_acct);
3028                                 assert(new_entry->use_pmap);
3029                         }
3030
3031                         if (is_submap) {
3032                                 vm_map_t        submap;
3033                                 boolean_t       submap_is_64bit;
3034                                 boolean_t       use_pmap;
3035
3036                                 assert(new_entry->is_sub_map);
3037                                 assert(!new_entry->use_pmap);
3038                                 assert(!new_entry->iokit_acct);
3039                                 submap = (vm_map_t) object;
3040                                 submap_is_64bit = vm_map_is_64bit(submap);
3041                                 use_pmap = vmk_flags.vmkf_nested_pmap;
3042 #ifndef NO_NESTED_PMAP
3043                                 if (use_pmap && submap->pmap == NULL) {
3044                                         ledger_t ledger = map->pmap->ledger;
3045                                         /* we need a sub pmap to nest... */
3046                                         submap->pmap = pmap_create_options(ledger, 0,
3047                                             submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3048                                         if (submap->pmap == NULL) {
3049                                                 /* let's proceed without nesting... */
3050                                         }
3051 #if     defined(__arm__) || defined(__arm64__)
3052                                         else {
3053                                                 pmap_set_nested(submap->pmap);
3054                                         }
3055 #endif
3056                                 }
3057                                 if (use_pmap && submap->pmap != NULL) {
3058                                         kr = pmap_nest(map->pmap,
3059                                             submap->pmap,
3060                                             tmp_start,
3061                                             tmp_start,
3062                                             tmp_end - tmp_start);
3063                                         if (kr != KERN_SUCCESS) {
3064                                                 printf("vm_map_enter: "
3065                                                     "pmap_nest(0x%llx,0x%llx) "
3066                                                     "error 0x%x\n",
3067                                                     (long long)tmp_start,
3068                                                     (long long)tmp_end,
3069                                                     kr);
3070                                         } else {
3071                                                 /* we're now nested ! */
3072                                                 new_entry->use_pmap = TRUE;
3073                                                 pmap_empty = FALSE;
3074                                         }
3075                                 }
3076 #endif /* NO_NESTED_PMAP */
3077                         }
3078                         entry = new_entry;
3079
3080                         if (superpage_size) {
3081                                 vm_page_t pages, m;
3082                                 vm_object_t sp_object;
3083                                 vm_object_offset_t sp_offset;
3084
3085                                 VME_OFFSET_SET(entry, 0);
3086
3087                                 /* allocate one superpage */
3088                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3089                                 if (kr != KERN_SUCCESS) {
3090                                         /* deallocate whole range... */
3091                                         new_mapping_established = TRUE;
3092                                         /* ... but only up to "tmp_end" */
3093                                         size -= end - tmp_end;
3094                                         RETURN(kr);
3095                                 }
3096
3097                                 /* create one vm_object per superpage */
3098                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3099                                 sp_object->phys_contiguous = TRUE;
3100                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3101                                 VME_OBJECT_SET(entry, sp_object);
3102                                 assert(entry->use_pmap);
3103
3104                                 /* enter the base pages into the object */
3105                                 vm_object_lock(sp_object);
3106                                 for (sp_offset = 0;
3107                                     sp_offset < SUPERPAGE_SIZE;
3108                                     sp_offset += PAGE_SIZE) {
3109                                         m = pages;
3110                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3111                                         pages = NEXT_PAGE(m);
3112                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3113                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3114                                 }
3115                                 vm_object_unlock(sp_object);
3116                         }
3117                 } while (tmp_end != tmp2_end &&
3118                     (tmp_start = tmp_end) &&
3119                     (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3120                     tmp_end + chunk_size : tmp2_end));
3121         }
3122
3123         new_mapping_established = TRUE;
3124
3125 BailOut:
3126         assert(map_locked == TRUE);
3127
3128         if (result == KERN_SUCCESS) {
3129                 vm_prot_t pager_prot;
3130                 memory_object_t pager;
3131
3132 #if DEBUG
3133                 if (pmap_empty &&
3134                     !(vmk_flags.vmkf_no_pmap_check)) {
3135                         assert(vm_map_pmap_is_empty(map,
3136                             *address,
3137                             *address + size));
3138                 }
3139 #endif /* DEBUG */
3140
3141                 /*
3142                  * For "named" VM objects, let the pager know that the
3143                  * memory object is being mapped.  Some pagers need to keep
3144                  * track of this, to know when they can reclaim the memory
3145                  * object, for example.
3146                  * VM calls memory_object_map() for each mapping (specifying
3147                  * the protection of each mapping) and calls
3148                  * memory_object_last_unmap() when all the mappings are gone.
3149                  */
3150                 pager_prot = max_protection;
3151                 if (needs_copy) {
3152                         /*
3153                          * Copy-On-Write mapping: won't modify
3154                          * the memory object.
3155                          */
3156                         pager_prot &= ~VM_PROT_WRITE;
3157                 }
3158                 if (!is_submap &&
3159                     object != VM_OBJECT_NULL &&
3160                     object->named &&
3161                     object->pager != MEMORY_OBJECT_NULL) {
3162                         vm_object_lock(object);
3163                         pager = object->pager;
3164                         if (object->named &&
3165                             pager != MEMORY_OBJECT_NULL) {
3166                                 assert(object->pager_ready);
3167                                 vm_object_mapping_wait(object, THREAD_UNINT);
3168                                 vm_object_mapping_begin(object);
3169                                 vm_object_unlock(object);
3170
3171                                 kr = memory_object_map(pager, pager_prot);
3172                                 assert(kr == KERN_SUCCESS);
3173
3174                                 vm_object_lock(object);
3175                                 vm_object_mapping_end(object);
3176                         }
3177                         vm_object_unlock(object);
3178                 }
3179         }
3180
3181         assert(map_locked == TRUE);
3182
3183         if (!keep_map_locked) {
3184                 vm_map_unlock(map);
3185                 map_locked = FALSE;
3186         }
3187
3188         /*
3189          * We can't hold the map lock if we enter this block.
3190          */
3191
3192         if (result == KERN_SUCCESS) {
3193                 /*      Wire down the new entry if the user
3194                  *      requested all new map entries be wired.
3195                  */
3196                 if ((map->wiring_required) || (superpage_size)) {
3197                         assert(!keep_map_locked);
3198                         pmap_empty = FALSE; /* pmap won't be empty */
3199                         kr = vm_map_wire_kernel(map, start, end,
3200                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
3201                             TRUE);
3202                         result = kr;
3203                 }
3204
3205         }
3206
3207         if (result != KERN_SUCCESS) {
3208                 if (new_mapping_established) {
3209                         /*
3210                          * We have to get rid of the new mappings since we
3211                          * won't make them available to the user.
3212                          * Try and do that atomically, to minimize the risk
3213                          * that someone else create new mappings that range.
3214                          */
3215                         zap_new_map = vm_map_create(PMAP_NULL,
3216                             *address,
3217                             *address + size,
3218                             map->hdr.entries_pageable);
3219                         vm_map_set_page_shift(zap_new_map,
3220                             VM_MAP_PAGE_SHIFT(map));
3221                         vm_map_disable_hole_optimization(zap_new_map);
3222
3223                         if (!map_locked) {
3224                                 vm_map_lock(map);
3225                                 map_locked = TRUE;
3226                         }
3227                         (void) vm_map_delete(map, *address, *address + size,
3228                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3229                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3230                             zap_new_map);
3231                 }
3232                 if (zap_old_map != VM_MAP_NULL &&
3233                     zap_old_map->hdr.nentries != 0) {
3234                         vm_map_entry_t  entry1, entry2;
3235
3236                         /*
3237                          * The new mapping failed.  Attempt to restore
3238                          * the old mappings, saved in the "zap_old_map".
3239                          */
3240                         if (!map_locked) {
3241                                 vm_map_lock(map);
3242                                 map_locked = TRUE;
3243                         }
3244
3245                         /* first check if the coast is still clear */
3246                         start = vm_map_first_entry(zap_old_map)->vme_start;
3247                         end = vm_map_last_entry(zap_old_map)->vme_end;
3248                         if (vm_map_lookup_entry(map, start, &entry1) ||
3249                             vm_map_lookup_entry(map, end, &entry2) ||
3250                             entry1 != entry2) {
3251                                 /*
3252                                  * Part of that range has already been
3253                                  * re-mapped:  we can't restore the old
3254                                  * mappings...
3255                                  */
3256                                 vm_map_enter_restore_failures++;
3257                         } else {
3258                                 /*
3259                                  * Transfer the saved map entries from
3260                                  * "zap_old_map" to the original "map",
3261                                  * inserting them all after "entry1".
3262                                  */
3263                                 for (entry2 = vm_map_first_entry(zap_old_map);
3264                                     entry2 != vm_map_to_entry(zap_old_map);
3265                                     entry2 = vm_map_first_entry(zap_old_map)) {
3266                                         vm_map_size_t entry_size;
3267
3268                                         entry_size = (entry2->vme_end -
3269                                             entry2->vme_start);
3270                                         vm_map_store_entry_unlink(zap_old_map,
3271                                             entry2);
3272                                         zap_old_map->size -= entry_size;
3273                                         vm_map_store_entry_link(map, entry1, entry2,
3274                                             VM_MAP_KERNEL_FLAGS_NONE);
3275                                         map->size += entry_size;
3276                                         entry1 = entry2;
3277                                 }
3278                                 if (map->wiring_required) {
3279                                         /*
3280                                          * XXX TODO: we should rewire the
3281                                          * old pages here...
3282                                          */
3283                                 }
3284                                 vm_map_enter_restore_successes++;
3285                         }
3286                 }
3287         }
3288
3289         /*
3290          * The caller is responsible for releasing the lock if it requested to
3291          * keep the map locked.
3292          */
3293         if (map_locked && !keep_map_locked) {
3294                 vm_map_unlock(map);
3295         }
3296
3297         /*
3298          * Get rid of the "zap_maps" and all the map entries that
3299          * they may still contain.
3300          */
3301         if (zap_old_map != VM_MAP_NULL) {
3302                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3303                 zap_old_map = VM_MAP_NULL;
3304         }
3305         if (zap_new_map != VM_MAP_NULL) {
3306                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3307                 zap_new_map = VM_MAP_NULL;
3308         }
3309
3310         return result;
3311
3312 #undef  RETURN
3313 }
3314
3315 #if __arm64__
3316 extern const struct memory_object_pager_ops fourk_pager_ops;
3317 kern_return_t
3318 vm_map_enter_fourk(
3319         vm_map_t                map,
3320         vm_map_offset_t         *address,       /* IN/OUT */
3321         vm_map_size_t           size,
3322         vm_map_offset_t         mask,
3323         int                     flags,
3324         vm_map_kernel_flags_t   vmk_flags,
3325         vm_tag_t                alias,
3326         vm_object_t             object,
3327         vm_object_offset_t      offset,
3328         boolean_t               needs_copy,
3329         vm_prot_t               cur_protection,
3330         vm_prot_t               max_protection,
3331         vm_inherit_t            inheritance)
3332 {
3333         vm_map_entry_t          entry, new_entry;
3334         vm_map_offset_t         start, fourk_start;
3335         vm_map_offset_t         end, fourk_end;
3336         vm_map_size_t           fourk_size;
3337         kern_return_t           result = KERN_SUCCESS;
3338         vm_map_t                zap_old_map = VM_MAP_NULL;
3339         vm_map_t                zap_new_map = VM_MAP_NULL;
3340         boolean_t               map_locked = FALSE;
3341         boolean_t               pmap_empty = TRUE;
3342         boolean_t               new_mapping_established = FALSE;
3343         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3344         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3345         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3346         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3347         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3348         boolean_t               is_submap = vmk_flags.vmkf_submap;
3349         boolean_t               permanent = vmk_flags.vmkf_permanent;
3350         boolean_t               no_copy_on_read = vmk_flags.vmkf_permanent;
3351         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3352 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3353         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3354         vm_map_offset_t         effective_min_offset, effective_max_offset;
3355         kern_return_t           kr;
3356         boolean_t               clear_map_aligned = FALSE;
3357         memory_object_t         fourk_mem_obj;
3358         vm_object_t             fourk_object;
3359         vm_map_offset_t         fourk_pager_offset;
3360         int                     fourk_pager_index_start, fourk_pager_index_num;
3361         int                     cur_idx;
3362         boolean_t               fourk_copy;
3363         vm_object_t             copy_object;
3364         vm_object_offset_t      copy_offset;
3365
3366         fourk_mem_obj = MEMORY_OBJECT_NULL;
3367         fourk_object = VM_OBJECT_NULL;
3368
3369         if (superpage_size) {
3370                 return KERN_NOT_SUPPORTED;
3371         }
3372
3373         if ((cur_protection & VM_PROT_WRITE) &&
3374             (cur_protection & VM_PROT_EXECUTE) &&
3375 #if !CONFIG_EMBEDDED
3376             map != kernel_map &&
3377             cs_process_enforcement(NULL) &&
3378 #endif /* !CONFIG_EMBEDDED */
3379             !entry_for_jit) {
3380                 DTRACE_VM3(cs_wx,
3381                     uint64_t, 0,
3382                     uint64_t, 0,
3383                     vm_prot_t, cur_protection);
3384                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3385                     "turning off execute\n",
3386                     proc_selfpid(),
3387                     (current_task()->bsd_info
3388                     ? proc_name_address(current_task()->bsd_info)
3389                     : "?"),
3390                     __FUNCTION__);
3391                 cur_protection &= ~VM_PROT_EXECUTE;
3392         }
3393
3394         /*
3395          * If the task has requested executable lockdown,
3396          * deny any new executable mapping.
3397          */
3398         if (map->map_disallow_new_exec == TRUE) {
3399                 if (cur_protection & VM_PROT_EXECUTE) {
3400                         return KERN_PROTECTION_FAILURE;
3401                 }
3402         }
3403
3404         if (is_submap) {
3405                 return KERN_NOT_SUPPORTED;
3406         }
3407         if (vmk_flags.vmkf_already) {
3408                 return KERN_NOT_SUPPORTED;
3409         }
3410         if (purgable || entry_for_jit) {
3411                 return KERN_NOT_SUPPORTED;
3412         }
3413
3414         effective_min_offset = map->min_offset;
3415
3416         if (vmk_flags.vmkf_beyond_max) {
3417                 return KERN_NOT_SUPPORTED;
3418         } else {
3419                 effective_max_offset = map->max_offset;
3420         }
3421
3422         if (size == 0 ||
3423             (offset & FOURK_PAGE_MASK) != 0) {
3424                 *address = 0;
3425                 return KERN_INVALID_ARGUMENT;
3426         }
3427
3428 #define RETURN(value)   { result = value; goto BailOut; }
3429
3430         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3431         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3432
3433         if (!anywhere && overwrite) {
3434                 return KERN_NOT_SUPPORTED;
3435         }
3436         if (!anywhere && overwrite) {
3437                 /*
3438                  * Create a temporary VM map to hold the old mappings in the
3439                  * affected area while we create the new one.
3440                  * This avoids releasing the VM map lock in
3441                  * vm_map_entry_delete() and allows atomicity
3442                  * when we want to replace some mappings with a new one.
3443                  * It also allows us to restore the old VM mappings if the
3444                  * new mapping fails.
3445                  */
3446                 zap_old_map = vm_map_create(PMAP_NULL,
3447                     *address,
3448                     *address + size,
3449                     map->hdr.entries_pageable);
3450                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3451                 vm_map_disable_hole_optimization(zap_old_map);
3452         }
3453
3454         fourk_start = *address;
3455         fourk_size = size;
3456         fourk_end = fourk_start + fourk_size;
3457
3458         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3459         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3460         size = end - start;
3461
3462         if (anywhere) {
3463                 return KERN_NOT_SUPPORTED;
3464         } else {
3465                 /*
3466                  *      Verify that:
3467                  *              the address doesn't itself violate
3468                  *              the mask requirement.
3469                  */
3470
3471                 vm_map_lock(map);
3472                 map_locked = TRUE;
3473                 if ((start & mask) != 0) {
3474                         RETURN(KERN_NO_SPACE);
3475                 }
3476
3477                 /*
3478                  *      ...     the address is within bounds
3479                  */
3480
3481                 end = start + size;
3482
3483                 if ((start < effective_min_offset) ||
3484                     (end > effective_max_offset) ||
3485                     (start >= end)) {
3486                         RETURN(KERN_INVALID_ADDRESS);
3487                 }
3488
3489                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3490                         /*
3491                          * Fixed mapping and "overwrite" flag: attempt to
3492                          * remove all existing mappings in the specified
3493                          * address range, saving them in our "zap_old_map".
3494                          */
3495                         (void) vm_map_delete(map, start, end,
3496                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3497                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3498                             zap_old_map);
3499                 }
3500
3501                 /*
3502                  *      ...     the starting address isn't allocated
3503                  */
3504                 if (vm_map_lookup_entry(map, start, &entry)) {
3505                         vm_object_t cur_object, shadow_object;
3506
3507                         /*
3508                          * We might already some 4K mappings
3509                          * in a 16K page here.
3510                          */
3511
3512                         if (entry->vme_end - entry->vme_start
3513                             != SIXTEENK_PAGE_SIZE) {
3514                                 RETURN(KERN_NO_SPACE);
3515                         }
3516                         if (entry->is_sub_map) {
3517                                 RETURN(KERN_NO_SPACE);
3518                         }
3519                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3520                                 RETURN(KERN_NO_SPACE);
3521                         }
3522
3523                         /* go all the way down the shadow chain */
3524                         cur_object = VME_OBJECT(entry);
3525                         vm_object_lock(cur_object);
3526                         while (cur_object->shadow != VM_OBJECT_NULL) {
3527                                 shadow_object = cur_object->shadow;
3528                                 vm_object_lock(shadow_object);
3529                                 vm_object_unlock(cur_object);
3530                                 cur_object = shadow_object;
3531                                 shadow_object = VM_OBJECT_NULL;
3532                         }
3533                         if (cur_object->internal ||
3534                             cur_object->pager == NULL) {
3535                                 vm_object_unlock(cur_object);
3536                                 RETURN(KERN_NO_SPACE);
3537                         }
3538                         if (cur_object->pager->mo_pager_ops
3539                             != &fourk_pager_ops) {
3540                                 vm_object_unlock(cur_object);
3541                                 RETURN(KERN_NO_SPACE);
3542                         }
3543                         fourk_object = cur_object;
3544                         fourk_mem_obj = fourk_object->pager;
3545
3546                         /* keep the "4K" object alive */
3547                         vm_object_reference_locked(fourk_object);
3548                         vm_object_unlock(fourk_object);
3549
3550                         /* merge permissions */
3551                         entry->protection |= cur_protection;
3552                         entry->max_protection |= max_protection;
3553                         if ((entry->protection & (VM_PROT_WRITE |
3554                             VM_PROT_EXECUTE)) ==
3555                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3556                             fourk_binary_compatibility_unsafe &&
3557                             fourk_binary_compatibility_allow_wx) {
3558                                 /* write+execute: need to be "jit" */
3559                                 entry->used_for_jit = TRUE;
3560                         }
3561
3562                         goto map_in_fourk_pager;
3563                 }
3564
3565                 /*
3566                  *      ...     the next region doesn't overlap the
3567                  *              end point.
3568                  */
3569
3570                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3571                     (entry->vme_next->vme_start < end)) {
3572                         RETURN(KERN_NO_SPACE);
3573                 }
3574         }
3575
3576         /*
3577          *      At this point,
3578          *              "start" and "end" should define the endpoints of the
3579          *                      available new range, and
3580          *              "entry" should refer to the region before the new
3581          *                      range, and
3582          *
3583          *              the map should be locked.
3584          */
3585
3586         /* create a new "4K" pager */
3587         fourk_mem_obj = fourk_pager_create();
3588         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3589         assert(fourk_object);
3590
3591         /* keep the "4" object alive */
3592         vm_object_reference(fourk_object);
3593
3594         /* create a "copy" object, to map the "4K" object copy-on-write */
3595         fourk_copy = TRUE;
3596         result = vm_object_copy_strategically(fourk_object,
3597             0,
3598             end - start,
3599             &copy_object,
3600             &copy_offset,
3601             &fourk_copy);
3602         assert(result == KERN_SUCCESS);
3603         assert(copy_object != VM_OBJECT_NULL);
3604         assert(copy_offset == 0);
3605
3606         /* take a reference on the copy object, for this mapping */
3607         vm_object_reference(copy_object);
3608
3609         /* map the "4K" pager's copy object */
3610         new_entry =
3611             vm_map_entry_insert(map, entry,
3612             vm_map_trunc_page(start,
3613             VM_MAP_PAGE_MASK(map)),
3614             vm_map_round_page(end,
3615             VM_MAP_PAGE_MASK(map)),
3616             copy_object,
3617             0,                         /* offset */
3618             FALSE,                         /* needs_copy */
3619             FALSE,
3620             FALSE,
3621             cur_protection, max_protection,
3622             VM_BEHAVIOR_DEFAULT,
3623             ((entry_for_jit)
3624             ? VM_INHERIT_NONE
3625             : inheritance),
3626             0,
3627             no_cache,
3628             permanent,
3629             no_copy_on_read,
3630             superpage_size,
3631             clear_map_aligned,
3632             is_submap,
3633             FALSE,                         /* jit */
3634             alias);
3635         entry = new_entry;
3636
3637 #if VM_MAP_DEBUG_FOURK
3638         if (vm_map_debug_fourk) {
3639                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3640                     map,
3641                     (uint64_t) entry->vme_start,
3642                     (uint64_t) entry->vme_end,
3643                     fourk_mem_obj);
3644         }
3645 #endif /* VM_MAP_DEBUG_FOURK */
3646
3647         new_mapping_established = TRUE;
3648
3649 map_in_fourk_pager:
3650         /* "map" the original "object" where it belongs in the "4K" pager */
3651         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3652         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3653         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3654                 fourk_pager_index_num = 4;
3655         } else {
3656                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3657         }
3658         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3659                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3660         }
3661         for (cur_idx = 0;
3662             cur_idx < fourk_pager_index_num;
3663             cur_idx++) {
3664                 vm_object_t             old_object;
3665                 vm_object_offset_t      old_offset;
3666
3667                 kr = fourk_pager_populate(fourk_mem_obj,
3668                     TRUE,                       /* overwrite */
3669                     fourk_pager_index_start + cur_idx,
3670                     object,
3671                     (object
3672                     ? (offset +
3673                     (cur_idx * FOURK_PAGE_SIZE))
3674                     : 0),
3675                     &old_object,
3676                     &old_offset);
3677 #if VM_MAP_DEBUG_FOURK
3678                 if (vm_map_debug_fourk) {
3679                         if (old_object == (vm_object_t) -1 &&
3680                             old_offset == (vm_object_offset_t) -1) {
3681                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3682                                     "pager [%p:0x%llx] "
3683                                     "populate[%d] "
3684                                     "[object:%p,offset:0x%llx]\n",
3685                                     map,
3686                                     (uint64_t) entry->vme_start,
3687                                     (uint64_t) entry->vme_end,
3688                                     fourk_mem_obj,
3689                                     VME_OFFSET(entry),
3690                                     fourk_pager_index_start + cur_idx,
3691                                     object,
3692                                     (object
3693                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3694                                     : 0));
3695                         } else {
3696                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3697                                     "pager [%p:0x%llx] "
3698                                     "populate[%d] [object:%p,offset:0x%llx] "
3699                                     "old [%p:0x%llx]\n",
3700                                     map,
3701                                     (uint64_t) entry->vme_start,
3702                                     (uint64_t) entry->vme_end,
3703                                     fourk_mem_obj,
3704                                     VME_OFFSET(entry),
3705                                     fourk_pager_index_start + cur_idx,
3706                                     object,
3707                                     (object
3708                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3709                                     : 0),
3710                                     old_object,
3711                                     old_offset);
3712                         }
3713                 }
3714 #endif /* VM_MAP_DEBUG_FOURK */
3715
3716                 assert(kr == KERN_SUCCESS);
3717                 if (object != old_object &&
3718                     object != VM_OBJECT_NULL &&
3719                     object != (vm_object_t) -1) {
3720                         vm_object_reference(object);
3721                 }
3722                 if (object != old_object &&
3723                     old_object != VM_OBJECT_NULL &&
3724                     old_object != (vm_object_t) -1) {
3725                         vm_object_deallocate(old_object);
3726                 }
3727         }
3728
3729 BailOut:
3730         assert(map_locked == TRUE);
3731
3732         if (fourk_object != VM_OBJECT_NULL) {
3733                 vm_object_deallocate(fourk_object);
3734                 fourk_object = VM_OBJECT_NULL;
3735                 fourk_mem_obj = MEMORY_OBJECT_NULL;
3736         }
3737
3738         if (result == KERN_SUCCESS) {
3739                 vm_prot_t pager_prot;
3740                 memory_object_t pager;
3741
3742 #if DEBUG
3743                 if (pmap_empty &&
3744                     !(vmk_flags.vmkf_no_pmap_check)) {
3745                         assert(vm_map_pmap_is_empty(map,
3746                             *address,
3747                             *address + size));
3748                 }
3749 #endif /* DEBUG */
3750
3751                 /*
3752                  * For "named" VM objects, let the pager know that the
3753                  * memory object is being mapped.  Some pagers need to keep
3754                  * track of this, to know when they can reclaim the memory
3755                  * object, for example.
3756                  * VM calls memory_object_map() for each mapping (specifying
3757                  * the protection of each mapping) and calls
3758                  * memory_object_last_unmap() when all the mappings are gone.
3759                  */
3760                 pager_prot = max_protection;
3761                 if (needs_copy) {
3762                         /*
3763                          * Copy-On-Write mapping: won't modify
3764                          * the memory object.
3765                          */
3766                         pager_prot &= ~VM_PROT_WRITE;
3767                 }
3768                 if (!is_submap &&
3769                     object != VM_OBJECT_NULL &&
3770                     object->named &&
3771                     object->pager != MEMORY_OBJECT_NULL) {
3772                         vm_object_lock(object);
3773                         pager = object->pager;
3774                         if (object->named &&
3775                             pager != MEMORY_OBJECT_NULL) {
3776                                 assert(object->pager_ready);
3777                                 vm_object_mapping_wait(object, THREAD_UNINT);
3778                                 vm_object_mapping_begin(object);
3779                                 vm_object_unlock(object);
3780
3781                                 kr = memory_object_map(pager, pager_prot);
3782                                 assert(kr == KERN_SUCCESS);
3783
3784                                 vm_object_lock(object);
3785                                 vm_object_mapping_end(object);
3786                         }
3787                         vm_object_unlock(object);
3788                 }
3789                 if (!is_submap &&
3790                     fourk_object != VM_OBJECT_NULL &&
3791                     fourk_object->named &&
3792                     fourk_object->pager != MEMORY_OBJECT_NULL) {
3793                         vm_object_lock(fourk_object);
3794                         pager = fourk_object->pager;
3795                         if (fourk_object->named &&
3796                             pager != MEMORY_OBJECT_NULL) {
3797                                 assert(fourk_object->pager_ready);
3798                                 vm_object_mapping_wait(fourk_object,
3799                                     THREAD_UNINT);
3800                                 vm_object_mapping_begin(fourk_object);
3801                                 vm_object_unlock(fourk_object);
3802
3803                                 kr = memory_object_map(pager, VM_PROT_READ);
3804                                 assert(kr == KERN_SUCCESS);
3805
3806                                 vm_object_lock(fourk_object);
3807                                 vm_object_mapping_end(fourk_object);
3808                         }
3809                         vm_object_unlock(fourk_object);
3810                 }
3811         }
3812
3813         assert(map_locked == TRUE);
3814
3815         if (!keep_map_locked) {
3816                 vm_map_unlock(map);
3817                 map_locked = FALSE;
3818         }
3819
3820         /*
3821          * We can't hold the map lock if we enter this block.
3822          */
3823
3824         if (result == KERN_SUCCESS) {
3825                 /*      Wire down the new entry if the user
3826                  *      requested all new map entries be wired.
3827                  */
3828                 if ((map->wiring_required) || (superpage_size)) {
3829                         assert(!keep_map_locked);
3830                         pmap_empty = FALSE; /* pmap won't be empty */
3831                         kr = vm_map_wire_kernel(map, start, end,
3832                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
3833                             TRUE);
3834                         result = kr;
3835                 }
3836
3837         }
3838
3839         if (result != KERN_SUCCESS) {
3840                 if (new_mapping_established) {
3841                         /*
3842                          * We have to get rid of the new mappings since we
3843                          * won't make them available to the user.
3844                          * Try and do that atomically, to minimize the risk
3845                          * that someone else create new mappings that range.
3846                          */
3847                         zap_new_map = vm_map_create(PMAP_NULL,
3848                             *address,
3849                             *address + size,
3850                             map->hdr.entries_pageable);
3851                         vm_map_set_page_shift(zap_new_map,
3852                             VM_MAP_PAGE_SHIFT(map));
3853                         vm_map_disable_hole_optimization(zap_new_map);
3854
3855                         if (!map_locked) {
3856                                 vm_map_lock(map);
3857                                 map_locked = TRUE;
3858                         }
3859                         (void) vm_map_delete(map, *address, *address + size,
3860                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3861                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3862                             zap_new_map);
3863                 }
3864                 if (zap_old_map != VM_MAP_NULL &&
3865                     zap_old_map->hdr.nentries != 0) {
3866                         vm_map_entry_t  entry1, entry2;
3867
3868                         /*
3869                          * The new mapping failed.  Attempt to restore
3870                          * the old mappings, saved in the "zap_old_map".
3871                          */
3872                         if (!map_locked) {
3873                                 vm_map_lock(map);
3874                                 map_locked = TRUE;
3875                         }
3876
3877                         /* first check if the coast is still clear */
3878                         start = vm_map_first_entry(zap_old_map)->vme_start;
3879                         end = vm_map_last_entry(zap_old_map)->vme_end;
3880                         if (vm_map_lookup_entry(map, start, &entry1) ||
3881                             vm_map_lookup_entry(map, end, &entry2) ||
3882                             entry1 != entry2) {
3883                                 /*
3884                                  * Part of that range has already been
3885                                  * re-mapped:  we can't restore the old
3886                                  * mappings...
3887                                  */
3888                                 vm_map_enter_restore_failures++;
3889                         } else {
3890                                 /*
3891                                  * Transfer the saved map entries from
3892                                  * "zap_old_map" to the original "map",
3893                                  * inserting them all after "entry1".
3894                                  */
3895                                 for (entry2 = vm_map_first_entry(zap_old_map);
3896                                     entry2 != vm_map_to_entry(zap_old_map);
3897                                     entry2 = vm_map_first_entry(zap_old_map)) {
3898                                         vm_map_size_t entry_size;
3899
3900                                         entry_size = (entry2->vme_end -
3901                                             entry2->vme_start);
3902                                         vm_map_store_entry_unlink(zap_old_map,
3903                                             entry2);
3904                                         zap_old_map->size -= entry_size;
3905                                         vm_map_store_entry_link(map, entry1, entry2,
3906                                             VM_MAP_KERNEL_FLAGS_NONE);
3907                                         map->size += entry_size;
3908                                         entry1 = entry2;
3909                                 }
3910                                 if (map->wiring_required) {
3911                                         /*
3912                                          * XXX TODO: we should rewire the
3913                                          * old pages here...
3914                                          */
3915                                 }
3916                                 vm_map_enter_restore_successes++;
3917                         }
3918                 }
3919         }
3920
3921         /*
3922          * The caller is responsible for releasing the lock if it requested to
3923          * keep the map locked.
3924          */
3925         if (map_locked && !keep_map_locked) {
3926                 vm_map_unlock(map);
3927         }
3928
3929         /*
3930          * Get rid of the "zap_maps" and all the map entries that
3931          * they may still contain.
3932          */
3933         if (zap_old_map != VM_MAP_NULL) {
3934                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3935                 zap_old_map = VM_MAP_NULL;
3936         }
3937         if (zap_new_map != VM_MAP_NULL) {
3938                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3939                 zap_new_map = VM_MAP_NULL;
3940         }
3941
3942         return result;
3943
3944 #undef  RETURN
3945 }
3946 #endif /* __arm64__ */
3947
3948 /*
3949  * Counters for the prefault optimization.
3950  */
3951 int64_t vm_prefault_nb_pages = 0;
3952 int64_t vm_prefault_nb_bailout = 0;
3953
3954 static kern_return_t
3955 vm_map_enter_mem_object_helper(
3956         vm_map_t                target_map,
3957         vm_map_offset_t         *address,
3958         vm_map_size_t           initial_size,
3959         vm_map_offset_t         mask,
3960         int                     flags,
3961         vm_map_kernel_flags_t   vmk_flags,
3962         vm_tag_t                tag,
3963         ipc_port_t              port,
3964         vm_object_offset_t      offset,
3965         boolean_t               copy,
3966         vm_prot_t               cur_protection,
3967         vm_prot_t               max_protection,
3968         vm_inherit_t            inheritance,
3969         upl_page_list_ptr_t     page_list,
3970         unsigned int            page_list_count)
3971 {
3972         vm_map_address_t        map_addr;
3973         vm_map_size_t           map_size;
3974         vm_object_t             object;
3975         vm_object_size_t        size;
3976         kern_return_t           result;
3977         boolean_t               mask_cur_protection, mask_max_protection;
3978         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
3979         vm_map_offset_t         offset_in_mapping = 0;
3980 #if __arm64__
3981         boolean_t               fourk = vmk_flags.vmkf_fourk;
3982 #endif /* __arm64__ */
3983
3984         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3985
3986         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3987         mask_max_protection = max_protection & VM_PROT_IS_MASK;
3988         cur_protection &= ~VM_PROT_IS_MASK;
3989         max_protection &= ~VM_PROT_IS_MASK;
3990
3991         /*
3992          * Check arguments for validity
3993          */
3994         if ((target_map == VM_MAP_NULL) ||
3995             (cur_protection & ~VM_PROT_ALL) ||
3996             (max_protection & ~VM_PROT_ALL) ||
3997             (inheritance > VM_INHERIT_LAST_VALID) ||
3998             (try_prefault && (copy || !page_list)) ||
3999             initial_size == 0) {
4000                 return KERN_INVALID_ARGUMENT;
4001         }
4002
4003 #if __arm64__
4004         if (fourk) {
4005                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4006                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4007         } else
4008 #endif /* __arm64__ */
4009         {
4010                 map_addr = vm_map_trunc_page(*address,
4011                     VM_MAP_PAGE_MASK(target_map));
4012                 map_size = vm_map_round_page(initial_size,
4013                     VM_MAP_PAGE_MASK(target_map));
4014         }
4015         size = vm_object_round_page(initial_size);
4016
4017         /*
4018          * Find the vm object (if any) corresponding to this port.
4019          */
4020         if (!IP_VALID(port)) {
4021                 object = VM_OBJECT_NULL;
4022                 offset = 0;
4023                 copy = FALSE;
4024         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4025                 vm_named_entry_t        named_entry;
4026
4027                 named_entry = (vm_named_entry_t) ip_get_kobject(port);
4028
4029                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4030                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4031                         offset += named_entry->data_offset;
4032                 }
4033
4034                 /* a few checks to make sure user is obeying rules */
4035                 if (size == 0) {
4036                         if (offset >= named_entry->size) {
4037                                 return KERN_INVALID_RIGHT;
4038                         }
4039                         size = named_entry->size - offset;
4040                 }
4041                 if (mask_max_protection) {
4042                         max_protection &= named_entry->protection;
4043                 }
4044                 if (mask_cur_protection) {
4045                         cur_protection &= named_entry->protection;
4046                 }
4047                 if ((named_entry->protection & max_protection) !=
4048                     max_protection) {
4049                         return KERN_INVALID_RIGHT;
4050                 }
4051                 if ((named_entry->protection & cur_protection) !=
4052                     cur_protection) {
4053                         return KERN_INVALID_RIGHT;
4054                 }
4055                 if (offset + size < offset) {
4056                         /* overflow */
4057                         return KERN_INVALID_ARGUMENT;
4058                 }
4059                 if (named_entry->size < (offset + initial_size)) {
4060                         return KERN_INVALID_ARGUMENT;
4061                 }
4062
4063                 if (named_entry->is_copy) {
4064                         /* for a vm_map_copy, we can only map it whole */
4065                         if ((size != named_entry->size) &&
4066                             (vm_map_round_page(size,
4067                             VM_MAP_PAGE_MASK(target_map)) ==
4068                             named_entry->size)) {
4069                                 /* XXX FBDP use the rounded size... */
4070                                 size = vm_map_round_page(
4071                                         size,
4072                                         VM_MAP_PAGE_MASK(target_map));
4073                         }
4074
4075                         if (!(flags & VM_FLAGS_ANYWHERE) &&
4076                             (offset != 0 ||
4077                             size != named_entry->size)) {
4078                                 /*
4079                                  * XXX for a mapping at a "fixed" address,
4080                                  * we can't trim after mapping the whole
4081                                  * memory entry, so reject a request for a
4082                                  * partial mapping.
4083                                  */
4084                                 return KERN_INVALID_ARGUMENT;
4085                         }
4086                 }
4087
4088                 /* the callers parameter offset is defined to be the */
4089                 /* offset from beginning of named entry offset in object */
4090                 offset = offset + named_entry->offset;
4091
4092                 if (!VM_MAP_PAGE_ALIGNED(size,
4093                     VM_MAP_PAGE_MASK(target_map))) {
4094                         /*
4095                          * Let's not map more than requested;
4096                          * vm_map_enter() will handle this "not map-aligned"
4097                          * case.
4098                          */
4099                         map_size = size;
4100                 }
4101
4102                 named_entry_lock(named_entry);
4103                 if (named_entry->is_sub_map) {
4104                         vm_map_t                submap;
4105
4106                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4107                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4108                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4109                         }
4110
4111                         submap = named_entry->backing.map;
4112                         vm_map_lock(submap);
4113                         vm_map_reference(submap);
4114                         vm_map_unlock(submap);
4115                         named_entry_unlock(named_entry);
4116
4117                         vmk_flags.vmkf_submap = TRUE;
4118
4119                         result = vm_map_enter(target_map,
4120                             &map_addr,
4121                             map_size,
4122                             mask,
4123                             flags,
4124                             vmk_flags,
4125                             tag,
4126                             (vm_object_t)(uintptr_t) submap,
4127                             offset,
4128                             copy,
4129                             cur_protection,
4130                             max_protection,
4131                             inheritance);
4132                         if (result != KERN_SUCCESS) {
4133                                 vm_map_deallocate(submap);
4134                         } else {
4135                                 /*
4136                                  * No need to lock "submap" just to check its
4137                                  * "mapped" flag: that flag is never reset
4138                                  * once it's been set and if we race, we'll
4139                                  * just end up setting it twice, which is OK.
4140                                  */
4141                                 if (submap->mapped_in_other_pmaps == FALSE &&
4142                                     vm_map_pmap(submap) != PMAP_NULL &&
4143                                     vm_map_pmap(submap) !=
4144                                     vm_map_pmap(target_map)) {
4145                                         /*
4146                                          * This submap is being mapped in a map
4147                                          * that uses a different pmap.
4148                                          * Set its "mapped_in_other_pmaps" flag
4149                                          * to indicate that we now need to
4150                                          * remove mappings from all pmaps rather
4151                                          * than just the submap's pmap.
4152                                          */
4153                                         vm_map_lock(submap);
4154                                         submap->mapped_in_other_pmaps = TRUE;
4155                                         vm_map_unlock(submap);
4156                                 }
4157                                 *address = map_addr;
4158                         }
4159                         return result;
4160                 } else if (named_entry->is_copy) {
4161                         kern_return_t   kr;
4162                         vm_map_copy_t   copy_map;
4163                         vm_map_entry_t  copy_entry;
4164                         vm_map_offset_t copy_addr;
4165
4166                         if (flags & ~(VM_FLAGS_FIXED |
4167                             VM_FLAGS_ANYWHERE |
4168                             VM_FLAGS_OVERWRITE |
4169                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4170                             VM_FLAGS_RETURN_DATA_ADDR |
4171                             VM_FLAGS_ALIAS_MASK)) {
4172                                 named_entry_unlock(named_entry);
4173                                 return KERN_INVALID_ARGUMENT;
4174                         }
4175
4176                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4177                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4178                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4179                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4180                                         offset_in_mapping &= ~((signed)(0xFFF));
4181                                 }
4182                                 offset = vm_object_trunc_page(offset);
4183                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4184                         }
4185
4186                         copy_map = named_entry->backing.copy;
4187                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4188                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4189                                 /* unsupported type; should not happen */
4190                                 printf("vm_map_enter_mem_object: "
4191                                     "memory_entry->backing.copy "
4192                                     "unsupported type 0x%x\n",
4193                                     copy_map->type);
4194                                 named_entry_unlock(named_entry);
4195                                 return KERN_INVALID_ARGUMENT;
4196                         }
4197
4198                         /* reserve a contiguous range */
4199                         kr = vm_map_enter(target_map,
4200                             &map_addr,
4201                             /* map whole mem entry, trim later: */
4202                             named_entry->size,
4203                             mask,
4204                             flags & (VM_FLAGS_ANYWHERE |
4205                             VM_FLAGS_OVERWRITE |
4206                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4207                             VM_FLAGS_RETURN_DATA_ADDR),
4208                             vmk_flags,
4209                             tag,
4210                             VM_OBJECT_NULL,
4211                             0,
4212                             FALSE,               /* copy */
4213                             cur_protection,
4214                             max_protection,
4215                             inheritance);
4216                         if (kr != KERN_SUCCESS) {
4217                                 named_entry_unlock(named_entry);
4218                                 return kr;
4219                         }
4220
4221                         copy_addr = map_addr;
4222
4223                         for (copy_entry = vm_map_copy_first_entry(copy_map);
4224                             copy_entry != vm_map_copy_to_entry(copy_map);
4225                             copy_entry = copy_entry->vme_next) {
4226                                 int                     remap_flags;
4227                                 vm_map_kernel_flags_t   vmk_remap_flags;
4228                                 vm_map_t                copy_submap;
4229                                 vm_object_t             copy_object;
4230                                 vm_map_size_t           copy_size;
4231                                 vm_object_offset_t      copy_offset;
4232                                 int                     copy_vm_alias;
4233
4234                                 remap_flags = 0;
4235                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4236
4237                                 copy_object = VME_OBJECT(copy_entry);
4238                                 copy_offset = VME_OFFSET(copy_entry);
4239                                 copy_size = (copy_entry->vme_end -
4240                                     copy_entry->vme_start);
4241                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4242                                 if (copy_vm_alias == 0) {
4243                                         /*
4244                                          * Caller does not want a specific
4245                                          * alias for this new mapping:  use
4246                                          * the alias of the original mapping.
4247                                          */
4248                                         copy_vm_alias = VME_ALIAS(copy_entry);
4249                                 }
4250
4251                                 /* sanity check */
4252                                 if ((copy_addr + copy_size) >
4253                                     (map_addr +
4254                                     named_entry->size /* XXX full size */)) {
4255                                         /* over-mapping too much !? */
4256                                         kr = KERN_INVALID_ARGUMENT;
4257                                         /* abort */
4258                                         break;
4259                                 }
4260
4261                                 /* take a reference on the object */
4262                                 if (copy_entry->is_sub_map) {
4263                                         vmk_remap_flags.vmkf_submap = TRUE;
4264                                         copy_submap = VME_SUBMAP(copy_entry);
4265                                         vm_map_lock(copy_submap);
4266                                         vm_map_reference(copy_submap);
4267                                         vm_map_unlock(copy_submap);
4268                                         copy_object = (vm_object_t)(uintptr_t) copy_submap;
4269                                 } else if (!copy &&
4270                                     copy_object != VM_OBJECT_NULL &&
4271                                     (copy_entry->needs_copy ||
4272                                     copy_object->shadowed ||
4273                                     (!copy_object->true_share &&
4274                                     !copy_entry->is_shared &&
4275                                     copy_object->vo_size > copy_size))) {
4276                                         /*
4277                                          * We need to resolve our side of this
4278                                          * "symmetric" copy-on-write now; we
4279                                          * need a new object to map and share,
4280                                          * instead of the current one which
4281                                          * might still be shared with the
4282                                          * original mapping.
4283                                          *
4284                                          * Note: A "vm_map_copy_t" does not
4285                                          * have a lock but we're protected by
4286                                          * the named entry's lock here.
4287                                          */
4288                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4289                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
4290                                         if (!copy_entry->needs_copy &&
4291                                             copy_entry->protection & VM_PROT_WRITE) {
4292                                                 vm_prot_t prot;
4293
4294                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
4295                                                 vm_object_pmap_protect(copy_object,
4296                                                     copy_offset,
4297                                                     copy_size,
4298                                                     PMAP_NULL,
4299                                                     0,
4300                                                     prot);
4301                                         }
4302
4303                                         copy_entry->needs_copy = FALSE;
4304                                         copy_entry->is_shared = TRUE;
4305                                         copy_object = VME_OBJECT(copy_entry);
4306                                         copy_offset = VME_OFFSET(copy_entry);
4307                                         vm_object_lock(copy_object);
4308                                         vm_object_reference_locked(copy_object);
4309                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4310                                                 /* we're about to make a shared mapping of this object */
4311                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4312                                                 copy_object->true_share = TRUE;
4313                                         }
4314                                         vm_object_unlock(copy_object);
4315                                 } else {
4316                                         /*
4317                                          * We already have the right object
4318                                          * to map.
4319                                          */
4320                                         copy_object = VME_OBJECT(copy_entry);
4321                                         vm_object_reference(copy_object);
4322                                 }
4323
4324                                 /* over-map the object into destination */
4325                                 remap_flags |= flags;
4326                                 remap_flags |= VM_FLAGS_FIXED;
4327                                 remap_flags |= VM_FLAGS_OVERWRITE;
4328                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4329                                 if (!copy && !copy_entry->is_sub_map) {
4330                                         /*
4331                                          * copy-on-write should have been
4332                                          * resolved at this point, or we would
4333                                          * end up sharing instead of copying.
4334                                          */
4335                                         assert(!copy_entry->needs_copy);
4336                                 }
4337 #if !CONFIG_EMBEDDED
4338                                 if (copy_entry->used_for_jit) {
4339                                         vmk_remap_flags.vmkf_map_jit = TRUE;
4340                                 }
4341 #endif /* !CONFIG_EMBEDDED */
4342                                 kr = vm_map_enter(target_map,
4343                                     &copy_addr,
4344                                     copy_size,
4345                                     (vm_map_offset_t) 0,
4346                                     remap_flags,
4347                                     vmk_remap_flags,
4348                                     copy_vm_alias,
4349                                     copy_object,
4350                                     copy_offset,
4351                                     ((copy_object == NULL) ? FALSE : copy),
4352                                     cur_protection,
4353                                     max_protection,
4354                                     inheritance);
4355                                 if (kr != KERN_SUCCESS) {
4356                                         if (copy_entry->is_sub_map) {
4357                                                 vm_map_deallocate(copy_submap);
4358                                         } else {
4359                                                 vm_object_deallocate(copy_object);
4360                                         }
4361                                         /* abort */
4362                                         break;
4363                                 }
4364
4365                                 /* next mapping */
4366                                 copy_addr += copy_size;
4367                         }
4368
4369                         if (kr == KERN_SUCCESS) {
4370                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4371                                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4372                                         *address = map_addr + offset_in_mapping;
4373                                 } else {
4374                                         *address = map_addr;
4375                                 }
4376
4377                                 if (offset) {
4378                                         /*
4379                                          * Trim in front, from 0 to "offset".
4380                                          */
4381                                         vm_map_remove(target_map,
4382                                             map_addr,
4383                                             map_addr + offset,
4384                                             VM_MAP_REMOVE_NO_FLAGS);
4385                                         *address += offset;
4386                                 }
4387                                 if (offset + map_size < named_entry->size) {
4388                                         /*
4389                                          * Trim in back, from
4390                                          * "offset + map_size" to
4391                                          * "named_entry->size".
4392                                          */
4393                                         vm_map_remove(target_map,
4394                                             (map_addr +
4395                                             offset + map_size),
4396                                             (map_addr +
4397                                             named_entry->size),
4398                                             VM_MAP_REMOVE_NO_FLAGS);
4399                                 }
4400                         }
4401                         named_entry_unlock(named_entry);
4402
4403                         if (kr != KERN_SUCCESS) {
4404                                 if (!(flags & VM_FLAGS_OVERWRITE)) {
4405                                         /* deallocate the contiguous range */
4406                                         (void) vm_deallocate(target_map,
4407                                             map_addr,
4408                                             map_size);
4409                                 }
4410                         }
4411
4412                         return kr;
4413                 } else {
4414                         unsigned int    access;
4415                         vm_prot_t       protections;
4416                         unsigned int    wimg_mode;
4417
4418                         /* we are mapping a VM object */
4419
4420                         protections = named_entry->protection & VM_PROT_ALL;
4421                         access = GET_MAP_MEM(named_entry->protection);
4422
4423                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4424                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4425                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4426                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4427                                         offset_in_mapping &= ~((signed)(0xFFF));
4428                                 }
4429                                 offset = vm_object_trunc_page(offset);
4430                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4431                         }
4432
4433                         object = named_entry->backing.object;
4434                         assert(object != VM_OBJECT_NULL);
4435                         vm_object_lock(object);
4436                         named_entry_unlock(named_entry);
4437
4438                         vm_object_reference_locked(object);
4439
4440                         wimg_mode = object->wimg_bits;
4441                         vm_prot_to_wimg(access, &wimg_mode);
4442                         if (object->wimg_bits != wimg_mode) {
4443                                 vm_object_change_wimg_mode(object, wimg_mode);
4444                         }
4445
4446                         vm_object_unlock(object);
4447                 }
4448         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4449                 /*
4450                  * JMM - This is temporary until we unify named entries
4451                  * and raw memory objects.
4452                  *
4453                  * Detected fake ip_kotype for a memory object.  In
4454                  * this case, the port isn't really a port at all, but
4455                  * instead is just a raw memory object.
4456                  */
4457                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4458                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4459                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4460                 }
4461
4462                 object = memory_object_to_vm_object((memory_object_t)port);
4463                 if (object == VM_OBJECT_NULL) {
4464                         return KERN_INVALID_OBJECT;
4465                 }
4466                 vm_object_reference(object);
4467
4468                 /* wait for object (if any) to be ready */
4469                 if (object != VM_OBJECT_NULL) {
4470                         if (object == kernel_object) {
4471                                 printf("Warning: Attempt to map kernel object"
4472                                     " by a non-private kernel entity\n");
4473                                 return KERN_INVALID_OBJECT;
4474                         }
4475                         if (!object->pager_ready) {
4476                                 vm_object_lock(object);
4477
4478                                 while (!object->pager_ready) {
4479                                         vm_object_wait(object,
4480                                             VM_OBJECT_EVENT_PAGER_READY,
4481                                             THREAD_UNINT);
4482                                         vm_object_lock(object);
4483                                 }
4484                                 vm_object_unlock(object);
4485                         }
4486                 }
4487         } else {
4488                 return KERN_INVALID_OBJECT;
4489         }
4490
4491         if (object != VM_OBJECT_NULL &&
4492             object->named &&
4493             object->pager != MEMORY_OBJECT_NULL &&
4494             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4495                 memory_object_t pager;
4496                 vm_prot_t       pager_prot;
4497                 kern_return_t   kr;
4498
4499                 /*
4500                  * For "named" VM objects, let the pager know that the
4501                  * memory object is being mapped.  Some pagers need to keep
4502                  * track of this, to know when they can reclaim the memory
4503                  * object, for example.
4504                  * VM calls memory_object_map() for each mapping (specifying
4505                  * the protection of each mapping) and calls
4506                  * memory_object_last_unmap() when all the mappings are gone.
4507                  */
4508                 pager_prot = max_protection;
4509                 if (copy) {
4510                         /*
4511                          * Copy-On-Write mapping: won't modify the
4512                          * memory object.
4513                          */
4514                         pager_prot &= ~VM_PROT_WRITE;
4515                 }
4516                 vm_object_lock(object);
4517                 pager = object->pager;
4518                 if (object->named &&
4519                     pager != MEMORY_OBJECT_NULL &&
4520                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4521                         assert(object->pager_ready);
4522                         vm_object_mapping_wait(object, THREAD_UNINT);
4523                         vm_object_mapping_begin(object);
4524                         vm_object_unlock(object);
4525
4526                         kr = memory_object_map(pager, pager_prot);
4527                         assert(kr == KERN_SUCCESS);
4528
4529                         vm_object_lock(object);
4530                         vm_object_mapping_end(object);
4531                 }
4532                 vm_object_unlock(object);
4533         }
4534
4535         /*
4536          *      Perform the copy if requested
4537          */
4538
4539         if (copy) {
4540                 vm_object_t             new_object;
4541                 vm_object_offset_t      new_offset;
4542
4543                 result = vm_object_copy_strategically(object, offset,
4544                     map_size,
4545                     &new_object, &new_offset,
4546                     &copy);
4547
4548
4549                 if (result == KERN_MEMORY_RESTART_COPY) {
4550                         boolean_t success;
4551                         boolean_t src_needs_copy;
4552
4553                         /*
4554                          * XXX
4555                          * We currently ignore src_needs_copy.
4556                          * This really is the issue of how to make
4557                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4558                          * non-kernel users to use. Solution forthcoming.
4559                          * In the meantime, since we don't allow non-kernel
4560                          * memory managers to specify symmetric copy,
4561                          * we won't run into problems here.
4562                          */
4563                         new_object = object;
4564                         new_offset = offset;
4565                         success = vm_object_copy_quickly(&new_object,
4566                             new_offset,
4567                             map_size,
4568                             &src_needs_copy,
4569                             &copy);
4570                         assert(success);
4571                         result = KERN_SUCCESS;
4572                 }
4573                 /*
4574                  *      Throw away the reference to the
4575                  *      original object, as it won't be mapped.
4576                  */
4577
4578                 vm_object_deallocate(object);
4579
4580                 if (result != KERN_SUCCESS) {
4581                         return result;
4582                 }
4583
4584                 object = new_object;
4585                 offset = new_offset;
4586         }
4587
4588         /*
4589          * If non-kernel users want to try to prefault pages, the mapping and prefault
4590          * needs to be atomic.
4591          */
4592         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4593         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4594
4595 #if __arm64__
4596         if (fourk) {
4597                 /* map this object in a "4K" pager */
4598                 result = vm_map_enter_fourk(target_map,
4599                     &map_addr,
4600                     map_size,
4601                     (vm_map_offset_t) mask,
4602                     flags,
4603                     vmk_flags,
4604                     tag,
4605                     object,
4606                     offset,
4607                     copy,
4608                     cur_protection,
4609                     max_protection,
4610                     inheritance);
4611         } else
4612 #endif /* __arm64__ */
4613         {
4614                 result = vm_map_enter(target_map,
4615                     &map_addr, map_size,
4616                     (vm_map_offset_t)mask,
4617                     flags,
4618                     vmk_flags,
4619                     tag,
4620                     object, offset,
4621                     copy,
4622                     cur_protection, max_protection,
4623                     inheritance);
4624         }
4625         if (result != KERN_SUCCESS) {
4626                 vm_object_deallocate(object);
4627         }
4628
4629         /*
4630          * Try to prefault, and do not forget to release the vm map lock.
4631          */
4632         if (result == KERN_SUCCESS && try_prefault) {
4633                 mach_vm_address_t va = map_addr;
4634                 kern_return_t kr = KERN_SUCCESS;
4635                 unsigned int i = 0;
4636                 int pmap_options;
4637
4638                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4639                 if (object->internal) {
4640                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4641                 }
4642
4643                 for (i = 0; i < page_list_count; ++i) {
4644                         if (!UPL_VALID_PAGE(page_list, i)) {
4645                                 if (kernel_prefault) {
4646                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4647                                         result = KERN_MEMORY_ERROR;
4648                                         break;
4649                                 }
4650                         } else {
4651                                 /*
4652                                  * If this function call failed, we should stop
4653                                  * trying to optimize, other calls are likely
4654                                  * going to fail too.
4655                                  *
4656                                  * We are not gonna report an error for such
4657                                  * failure though. That's an optimization, not
4658                                  * something critical.
4659                                  */
4660                                 kr = pmap_enter_options(target_map->pmap,
4661                                     va, UPL_PHYS_PAGE(page_list, i),
4662                                     cur_protection, VM_PROT_NONE,
4663                                     0, TRUE, pmap_options, NULL);
4664                                 if (kr != KERN_SUCCESS) {
4665                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4666                                         if (kernel_prefault) {
4667                                                 result = kr;
4668                                         }
4669                                         break;
4670                                 }
4671                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4672                         }
4673
4674                         /* Next virtual address */
4675                         va += PAGE_SIZE;
4676                 }
4677                 if (vmk_flags.vmkf_keep_map_locked) {
4678                         vm_map_unlock(target_map);
4679                 }
4680         }
4681
4682         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4683             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4684                 *address = map_addr + offset_in_mapping;
4685         } else {
4686                 *address = map_addr;
4687         }
4688         return result;
4689 }
4690
4691 kern_return_t
4692 vm_map_enter_mem_object(
4693         vm_map_t                target_map,
4694         vm_map_offset_t         *address,
4695         vm_map_size_t           initial_size,
4696         vm_map_offset_t         mask,
4697         int                     flags,
4698         vm_map_kernel_flags_t   vmk_flags,
4699         vm_tag_t                tag,
4700         ipc_port_t              port,
4701         vm_object_offset_t      offset,
4702         boolean_t               copy,
4703         vm_prot_t               cur_protection,
4704         vm_prot_t               max_protection,
4705         vm_inherit_t            inheritance)
4706 {
4707         kern_return_t ret;
4708
4709         ret = vm_map_enter_mem_object_helper(target_map,
4710             address,
4711             initial_size,
4712             mask,
4713             flags,
4714             vmk_flags,
4715             tag,
4716             port,
4717             offset,
4718             copy,
4719             cur_protection,
4720             max_protection,
4721             inheritance,
4722             NULL,
4723             0);
4724
4725 #if KASAN
4726         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4727                 kasan_notify_address(*address, initial_size);
4728         }
4729 #endif
4730
4731         return ret;
4732 }
4733
4734 kern_return_t
4735 vm_map_enter_mem_object_prefault(
4736         vm_map_t                target_map,
4737         vm_map_offset_t         *address,
4738         vm_map_size_t           initial_size,
4739         vm_map_offset_t         mask,
4740         int                     flags,
4741         vm_map_kernel_flags_t   vmk_flags,
4742         vm_tag_t                tag,
4743         ipc_port_t              port,
4744         vm_object_offset_t      offset,
4745         vm_prot_t               cur_protection,
4746         vm_prot_t               max_protection,
4747         upl_page_list_ptr_t     page_list,
4748         unsigned int            page_list_count)
4749 {
4750         kern_return_t ret;
4751
4752         ret = vm_map_enter_mem_object_helper(target_map,
4753             address,
4754             initial_size,
4755             mask,
4756             flags,
4757             vmk_flags,
4758             tag,
4759             port,
4760             offset,
4761             FALSE,
4762             cur_protection,
4763             max_protection,
4764             VM_INHERIT_DEFAULT,
4765             page_list,
4766             page_list_count);
4767
4768 #if KASAN
4769         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4770                 kasan_notify_address(*address, initial_size);
4771         }
4772 #endif
4773
4774         return ret;
4775 }
4776
4777
4778 kern_return_t
4779 vm_map_enter_mem_object_control(
4780         vm_map_t                target_map,
4781         vm_map_offset_t         *address,
4782         vm_map_size_t           initial_size,
4783         vm_map_offset_t         mask,
4784         int                     flags,
4785         vm_map_kernel_flags_t   vmk_flags,
4786         vm_tag_t                tag,
4787         memory_object_control_t control,
4788         vm_object_offset_t      offset,
4789         boolean_t               copy,
4790         vm_prot_t               cur_protection,
4791         vm_prot_t               max_protection,
4792         vm_inherit_t            inheritance)
4793 {
4794         vm_map_address_t        map_addr;
4795         vm_map_size_t           map_size;
4796         vm_object_t             object;
4797         vm_object_size_t        size;
4798         kern_return_t           result;
4799         memory_object_t         pager;
4800         vm_prot_t               pager_prot;
4801         kern_return_t           kr;
4802 #if __arm64__
4803         boolean_t               fourk = vmk_flags.vmkf_fourk;
4804 #endif /* __arm64__ */
4805
4806         /*
4807          * Check arguments for validity
4808          */
4809         if ((target_map == VM_MAP_NULL) ||
4810             (cur_protection & ~VM_PROT_ALL) ||
4811             (max_protection & ~VM_PROT_ALL) ||
4812             (inheritance > VM_INHERIT_LAST_VALID) ||
4813             initial_size == 0) {
4814                 return KERN_INVALID_ARGUMENT;
4815         }
4816
4817 #if __arm64__
4818         if (fourk) {
4819                 map_addr = vm_map_trunc_page(*address,
4820                     FOURK_PAGE_MASK);
4821                 map_size = vm_map_round_page(initial_size,
4822                     FOURK_PAGE_MASK);
4823         } else
4824 #endif /* __arm64__ */
4825         {
4826                 map_addr = vm_map_trunc_page(*address,
4827                     VM_MAP_PAGE_MASK(target_map));
4828                 map_size = vm_map_round_page(initial_size,
4829                     VM_MAP_PAGE_MASK(target_map));
4830         }
4831         size = vm_object_round_page(initial_size);
4832
4833         object = memory_object_control_to_vm_object(control);
4834
4835         if (object == VM_OBJECT_NULL) {
4836                 return KERN_INVALID_OBJECT;
4837         }
4838
4839         if (object == kernel_object) {
4840                 printf("Warning: Attempt to map kernel object"
4841                     " by a non-private kernel entity\n");
4842                 return KERN_INVALID_OBJECT;
4843         }
4844
4845         vm_object_lock(object);
4846         object->ref_count++;
4847         vm_object_res_reference(object);
4848
4849         /*
4850          * For "named" VM objects, let the pager know that the
4851          * memory object is being mapped.  Some pagers need to keep
4852          * track of this, to know when they can reclaim the memory
4853          * object, for example.
4854          * VM calls memory_object_map() for each mapping (specifying
4855          * the protection of each mapping) and calls
4856          * memory_object_last_unmap() when all the mappings are gone.
4857          */
4858         pager_prot = max_protection;
4859         if (copy) {
4860                 pager_prot &= ~VM_PROT_WRITE;
4861         }
4862         pager = object->pager;
4863         if (object->named &&
4864             pager != MEMORY_OBJECT_NULL &&
4865             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4866                 assert(object->pager_ready);
4867                 vm_object_mapping_wait(object, THREAD_UNINT);
4868                 vm_object_mapping_begin(object);
4869                 vm_object_unlock(object);
4870
4871                 kr = memory_object_map(pager, pager_prot);
4872                 assert(kr == KERN_SUCCESS);
4873
4874                 vm_object_lock(object);
4875                 vm_object_mapping_end(object);
4876         }
4877         vm_object_unlock(object);
4878
4879         /*
4880          *      Perform the copy if requested
4881          */
4882
4883         if (copy) {
4884                 vm_object_t             new_object;
4885                 vm_object_offset_t      new_offset;
4886
4887                 result = vm_object_copy_strategically(object, offset, size,
4888                     &new_object, &new_offset,
4889                     &copy);
4890
4891
4892                 if (result == KERN_MEMORY_RESTART_COPY) {
4893                         boolean_t success;
4894                         boolean_t src_needs_copy;
4895
4896                         /*
4897                          * XXX
4898                          * We currently ignore src_needs_copy.
4899                          * This really is the issue of how to make
4900                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4901                          * non-kernel users to use. Solution forthcoming.
4902                          * In the meantime, since we don't allow non-kernel
4903                          * memory managers to specify symmetric copy,
4904                          * we won't run into problems here.
4905                          */
4906                         new_object = object;
4907                         new_offset = offset;
4908                         success = vm_object_copy_quickly(&new_object,
4909                             new_offset, size,
4910                             &src_needs_copy,
4911                             &copy);
4912                         assert(success);
4913                         result = KERN_SUCCESS;
4914                 }
4915                 /*
4916                  *      Throw away the reference to the
4917                  *      original object, as it won't be mapped.
4918                  */
4919
4920                 vm_object_deallocate(object);
4921
4922                 if (result != KERN_SUCCESS) {
4923                         return result;
4924                 }
4925
4926                 object = new_object;
4927                 offset = new_offset;
4928         }
4929
4930 #if __arm64__
4931         if (fourk) {
4932                 result = vm_map_enter_fourk(target_map,
4933                     &map_addr,
4934                     map_size,
4935                     (vm_map_offset_t)mask,
4936                     flags,
4937                     vmk_flags,
4938                     tag,
4939                     object, offset,
4940                     copy,
4941                     cur_protection, max_protection,
4942                     inheritance);
4943         } else
4944 #endif /* __arm64__ */
4945         {
4946                 result = vm_map_enter(target_map,
4947                     &map_addr, map_size,
4948                     (vm_map_offset_t)mask,
4949                     flags,
4950                     vmk_flags,
4951                     tag,
4952                     object, offset,
4953                     copy,
4954                     cur_protection, max_protection,
4955                     inheritance);
4956         }
4957         if (result != KERN_SUCCESS) {
4958                 vm_object_deallocate(object);
4959         }
4960         *address = map_addr;
4961
4962         return result;
4963 }
4964
4965
4966 #if     VM_CPM
4967
4968 #ifdef MACH_ASSERT
4969 extern pmap_paddr_t     avail_start, avail_end;
4970 #endif
4971
4972 /*
4973  *      Allocate memory in the specified map, with the caveat that
4974  *      the memory is physically contiguous.  This call may fail
4975  *      if the system can't find sufficient contiguous memory.
4976  *      This call may cause or lead to heart-stopping amounts of
4977  *      paging activity.
4978  *
4979  *      Memory obtained from this call should be freed in the
4980  *      normal way, viz., via vm_deallocate.
4981  */
4982 kern_return_t
4983 vm_map_enter_cpm(
4984         vm_map_t                map,
4985         vm_map_offset_t *addr,
4986         vm_map_size_t           size,
4987         int                     flags)
4988 {
4989         vm_object_t             cpm_obj;
4990         pmap_t                  pmap;
4991         vm_page_t               m, pages;
4992         kern_return_t           kr;
4993         vm_map_offset_t         va, start, end, offset;
4994 #if     MACH_ASSERT
4995         vm_map_offset_t         prev_addr = 0;
4996 #endif  /* MACH_ASSERT */
4997
4998         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4999         vm_tag_t tag;
5000
5001         VM_GET_FLAGS_ALIAS(flags, tag);
5002
5003         if (size == 0) {
5004                 *addr = 0;
5005                 return KERN_SUCCESS;
5006         }
5007         if (anywhere) {
5008                 *addr = vm_map_min(map);
5009         } else {
5010                 *addr = vm_map_trunc_page(*addr,
5011                     VM_MAP_PAGE_MASK(map));
5012         }
5013         size = vm_map_round_page(size,
5014             VM_MAP_PAGE_MASK(map));
5015
5016         /*
5017          * LP64todo - cpm_allocate should probably allow
5018          * allocations of >4GB, but not with the current
5019          * algorithm, so just cast down the size for now.
5020          */
5021         if (size > VM_MAX_ADDRESS) {
5022                 return KERN_RESOURCE_SHORTAGE;
5023         }
5024         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5025             &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5026                 return kr;
5027         }
5028
5029         cpm_obj = vm_object_allocate((vm_object_size_t)size);
5030         assert(cpm_obj != VM_OBJECT_NULL);
5031         assert(cpm_obj->internal);
5032         assert(cpm_obj->vo_size == (vm_object_size_t)size);
5033         assert(cpm_obj->can_persist == FALSE);
5034         assert(cpm_obj->pager_created == FALSE);
5035         assert(cpm_obj->pageout == FALSE);
5036         assert(cpm_obj->shadow == VM_OBJECT_NULL);
5037
5038         /*
5039          *      Insert pages into object.
5040          */
5041
5042         vm_object_lock(cpm_obj);
5043         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5044                 m = pages;
5045                 pages = NEXT_PAGE(m);
5046                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5047
5048                 assert(!m->vmp_gobbled);
5049                 assert(!m->vmp_wanted);
5050                 assert(!m->vmp_pageout);
5051                 assert(!m->vmp_tabled);
5052                 assert(VM_PAGE_WIRED(m));
5053                 assert(m->vmp_busy);
5054                 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5055
5056                 m->vmp_busy = FALSE;
5057                 vm_page_insert(m, cpm_obj, offset);
5058         }
5059         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5060         vm_object_unlock(cpm_obj);
5061
5062         /*
5063          *      Hang onto a reference on the object in case a
5064          *      multi-threaded application for some reason decides
5065          *      to deallocate the portion of the address space into
5066          *      which we will insert this object.
5067          *
5068          *      Unfortunately, we must insert the object now before
5069          *      we can talk to the pmap module about which addresses
5070          *      must be wired down.  Hence, the race with a multi-
5071          *      threaded app.
5072          */
5073         vm_object_reference(cpm_obj);
5074
5075         /*
5076          *      Insert object into map.
5077          */
5078
5079         kr = vm_map_enter(
5080                 map,
5081                 addr,
5082                 size,
5083                 (vm_map_offset_t)0,
5084                 flags,
5085                 VM_MAP_KERNEL_FLAGS_NONE,
5086                 cpm_obj,
5087                 (vm_object_offset_t)0,
5088                 FALSE,
5089                 VM_PROT_ALL,
5090                 VM_PROT_ALL,
5091                 VM_INHERIT_DEFAULT);
5092
5093         if (kr != KERN_SUCCESS) {
5094                 /*
5095                  *      A CPM object doesn't have can_persist set,
5096                  *      so all we have to do is deallocate it to
5097                  *      free up these pages.
5098                  */
5099                 assert(cpm_obj->pager_created == FALSE);
5100                 assert(cpm_obj->can_persist == FALSE);
5101                 assert(cpm_obj->pageout == FALSE);
5102                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5103                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5104                 vm_object_deallocate(cpm_obj); /* kill creation ref */
5105         }
5106
5107         /*
5108          *      Inform the physical mapping system that the
5109          *      range of addresses may not fault, so that
5110          *      page tables and such can be locked down as well.
5111          */
5112         start = *addr;
5113         end = start + size;
5114         pmap = vm_map_pmap(map);
5115         pmap_pageable(pmap, start, end, FALSE);
5116
5117         /*
5118          *      Enter each page into the pmap, to avoid faults.
5119          *      Note that this loop could be coded more efficiently,
5120          *      if the need arose, rather than looking up each page
5121          *      again.
5122          */
5123         for (offset = 0, va = start; offset < size;
5124             va += PAGE_SIZE, offset += PAGE_SIZE) {
5125                 int type_of_fault;
5126
5127                 vm_object_lock(cpm_obj);
5128                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5129                 assert(m != VM_PAGE_NULL);
5130
5131                 vm_page_zero_fill(m);
5132
5133                 type_of_fault = DBG_ZERO_FILL_FAULT;
5134
5135                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5136                     VM_PAGE_WIRED(m),
5137                     FALSE,                             /* change_wiring */
5138                     VM_KERN_MEMORY_NONE,                             /* tag - not wiring */
5139                     FALSE,                             /* no_cache */
5140                     FALSE,                             /* cs_bypass */
5141                     0,                                 /* user_tag */
5142                     0,                             /* pmap_options */
5143                     NULL,                              /* need_retry */
5144                     &type_of_fault);
5145
5146                 vm_object_unlock(cpm_obj);
5147         }
5148
5149 #if     MACH_ASSERT
5150         /*
5151          *      Verify ordering in address space.
5152          */
5153         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5154                 vm_object_lock(cpm_obj);
5155                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5156                 vm_object_unlock(cpm_obj);
5157                 if (m == VM_PAGE_NULL) {
5158                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5159                             cpm_obj, (uint64_t)offset);
5160                 }
5161                 assert(m->vmp_tabled);
5162                 assert(!m->vmp_busy);
5163                 assert(!m->vmp_wanted);
5164                 assert(!m->vmp_fictitious);
5165                 assert(!m->vmp_private);
5166                 assert(!m->vmp_absent);
5167                 assert(!m->vmp_error);
5168                 assert(!m->vmp_cleaning);
5169                 assert(!m->vmp_laundry);
5170                 assert(!m->vmp_precious);
5171                 assert(!m->vmp_clustered);
5172                 if (offset != 0) {
5173                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5174                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5175                                     (uint64_t)start, (uint64_t)end, (uint64_t)va);
5176                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5177                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5178                                 panic("vm_allocate_cpm:  pages not contig!");
5179                         }
5180                 }
5181                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5182         }
5183 #endif  /* MACH_ASSERT */
5184
5185         vm_object_deallocate(cpm_obj); /* kill extra ref */
5186
5187         return kr;
5188 }
5189
5190
5191 #else   /* VM_CPM */
5192
5193 /*
5194  *      Interface is defined in all cases, but unless the kernel
5195  *      is built explicitly for this option, the interface does
5196  *      nothing.
5197  */
5198
5199 kern_return_t
5200 vm_map_enter_cpm(
5201         __unused vm_map_t       map,
5202         __unused vm_map_offset_t        *addr,
5203         __unused vm_map_size_t  size,
5204         __unused int            flags)
5205 {
5206         return KERN_FAILURE;
5207 }
5208 #endif /* VM_CPM */
5209
5210 /* Not used without nested pmaps */
5211 #ifndef NO_NESTED_PMAP
5212 /*
5213  * Clip and unnest a portion of a nested submap mapping.
5214  */
5215
5216
5217 static void
5218 vm_map_clip_unnest(
5219         vm_map_t        map,
5220         vm_map_entry_t  entry,
5221         vm_map_offset_t start_unnest,
5222         vm_map_offset_t end_unnest)
5223 {
5224         vm_map_offset_t old_start_unnest = start_unnest;
5225         vm_map_offset_t old_end_unnest = end_unnest;
5226
5227         assert(entry->is_sub_map);
5228         assert(VME_SUBMAP(entry) != NULL);
5229         assert(entry->use_pmap);
5230
5231         /*
5232          * Query the platform for the optimal unnest range.
5233          * DRK: There's some duplication of effort here, since
5234          * callers may have adjusted the range to some extent. This
5235          * routine was introduced to support 1GiB subtree nesting
5236          * for x86 platforms, which can also nest on 2MiB boundaries
5237          * depending on size/alignment.
5238          */
5239         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5240                 assert(VME_SUBMAP(entry)->is_nested_map);
5241                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5242                 log_unnest_badness(map,
5243                     old_start_unnest,
5244                     old_end_unnest,
5245                     VME_SUBMAP(entry)->is_nested_map,
5246                     (entry->vme_start +
5247                     VME_SUBMAP(entry)->lowest_unnestable_start -
5248                     VME_OFFSET(entry)));
5249         }
5250
5251         if (entry->vme_start > start_unnest ||
5252             entry->vme_end < end_unnest) {
5253                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5254                     "bad nested entry: start=0x%llx end=0x%llx\n",
5255                     (long long)start_unnest, (long long)end_unnest,
5256                     (long long)entry->vme_start, (long long)entry->vme_end);
5257         }
5258
5259         if (start_unnest > entry->vme_start) {
5260                 _vm_map_clip_start(&map->hdr,
5261                     entry,
5262                     start_unnest);
5263                 if (map->holelistenabled) {
5264                         vm_map_store_update_first_free(map, NULL, FALSE);
5265                 } else {
5266                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5267                 }
5268         }
5269         if (entry->vme_end > end_unnest) {
5270                 _vm_map_clip_end(&map->hdr,
5271                     entry,
5272                     end_unnest);
5273                 if (map->holelistenabled) {
5274                         vm_map_store_update_first_free(map, NULL, FALSE);
5275                 } else {
5276                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5277                 }
5278         }
5279
5280         pmap_unnest(map->pmap,
5281             entry->vme_start,
5282             entry->vme_end - entry->vme_start);
5283         if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5284                 /* clean up parent map/maps */
5285                 vm_map_submap_pmap_clean(
5286                         map, entry->vme_start,
5287                         entry->vme_end,
5288                         VME_SUBMAP(entry),
5289                         VME_OFFSET(entry));
5290         }
5291         entry->use_pmap = FALSE;
5292         if ((map->pmap != kernel_pmap) &&
5293             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5294                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5295         }
5296 }
5297 #endif  /* NO_NESTED_PMAP */
5298
5299 /*
5300  *      vm_map_clip_start:      [ internal use only ]
5301  *
5302  *      Asserts that the given entry begins at or after
5303  *      the specified address; if necessary,
5304  *      it splits the entry into two.
5305  */
5306 void
5307 vm_map_clip_start(
5308         vm_map_t        map,
5309         vm_map_entry_t  entry,
5310         vm_map_offset_t startaddr)
5311 {
5312 #ifndef NO_NESTED_PMAP
5313         if (entry->is_sub_map &&
5314             entry->use_pmap &&
5315             startaddr >= entry->vme_start) {
5316                 vm_map_offset_t start_unnest, end_unnest;
5317
5318                 /*
5319                  * Make sure "startaddr" is no longer in a nested range
5320                  * before we clip.  Unnest only the minimum range the platform
5321                  * can handle.
5322                  * vm_map_clip_unnest may perform additional adjustments to
5323                  * the unnest range.
5324                  */
5325                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5326                 end_unnest = start_unnest + pmap_nesting_size_min;
5327                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5328         }
5329 #endif /* NO_NESTED_PMAP */
5330         if (startaddr > entry->vme_start) {
5331                 if (VME_OBJECT(entry) &&
5332                     !entry->is_sub_map &&
5333                     VME_OBJECT(entry)->phys_contiguous) {
5334                         pmap_remove(map->pmap,
5335                             (addr64_t)(entry->vme_start),
5336                             (addr64_t)(entry->vme_end));
5337                 }
5338                 if (entry->vme_atomic) {
5339                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5340                 }
5341
5342                 DTRACE_VM5(
5343                         vm_map_clip_start,
5344                         vm_map_t, map,
5345                         vm_map_offset_t, entry->vme_start,
5346                         vm_map_offset_t, entry->vme_end,
5347                         vm_map_offset_t, startaddr,
5348                         int, VME_ALIAS(entry));
5349
5350                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5351                 if (map->holelistenabled) {
5352                         vm_map_store_update_first_free(map, NULL, FALSE);
5353                 } else {
5354                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5355                 }
5356         }
5357 }
5358
5359
5360 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5361         MACRO_BEGIN \
5362         if ((startaddr) > (entry)->vme_start) \
5363                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5364         MACRO_END
5365
5366 /*
5367  *      This routine is called only when it is known that
5368  *      the entry must be split.
5369  */
5370 static void
5371 _vm_map_clip_start(
5372         struct vm_map_header    *map_header,
5373         vm_map_entry_t          entry,
5374         vm_map_offset_t         start)
5375 {
5376         vm_map_entry_t  new_entry;
5377
5378         /*
5379          *      Split off the front portion --
5380          *      note that we must insert the new
5381          *      entry BEFORE this one, so that
5382          *      this entry has the specified starting
5383          *      address.
5384          */
5385
5386         if (entry->map_aligned) {
5387                 assert(VM_MAP_PAGE_ALIGNED(start,
5388                     VM_MAP_HDR_PAGE_MASK(map_header)));
5389         }
5390
5391         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5392         vm_map_entry_copy_full(new_entry, entry);
5393
5394         new_entry->vme_end = start;
5395         assert(new_entry->vme_start < new_entry->vme_end);
5396         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5397         assert(start < entry->vme_end);
5398         entry->vme_start = start;
5399
5400         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5401
5402         if (entry->is_sub_map) {
5403                 vm_map_reference(VME_SUBMAP(new_entry));
5404         } else {
5405                 vm_object_reference(VME_OBJECT(new_entry));
5406         }
5407 }
5408
5409
5410 /*
5411  *      vm_map_clip_end:        [ internal use only ]
5412  *
5413  *      Asserts that the given entry ends at or before
5414  *      the specified address; if necessary,
5415  *      it splits the entry into two.
5416  */
5417 void
5418 vm_map_clip_end(
5419         vm_map_t        map,
5420         vm_map_entry_t  entry,
5421         vm_map_offset_t endaddr)
5422 {
5423         if (endaddr > entry->vme_end) {
5424                 /*
5425                  * Within the scope of this clipping, limit "endaddr" to
5426                  * the end of this map entry...
5427                  */
5428                 endaddr = entry->vme_end;
5429         }
5430 #ifndef NO_NESTED_PMAP
5431         if (entry->is_sub_map && entry->use_pmap) {
5432                 vm_map_offset_t start_unnest, end_unnest;
5433
5434                 /*
5435                  * Make sure the range between the start of this entry and
5436                  * the new "endaddr" is no longer nested before we clip.
5437                  * Unnest only the minimum range the platform can handle.
5438                  * vm_map_clip_unnest may perform additional adjustments to
5439                  * the unnest range.
5440                  */
5441                 start_unnest = entry->vme_start;
5442                 end_unnest =
5443                     (endaddr + pmap_nesting_size_min - 1) &
5444                     ~(pmap_nesting_size_min - 1);
5445                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5446         }
5447 #endif /* NO_NESTED_PMAP */
5448         if (endaddr < entry->vme_end) {
5449                 if (VME_OBJECT(entry) &&
5450                     !entry->is_sub_map &&
5451                     VME_OBJECT(entry)->phys_contiguous) {
5452                         pmap_remove(map->pmap,
5453                             (addr64_t)(entry->vme_start),
5454                             (addr64_t)(entry->vme_end));
5455                 }
5456                 if (entry->vme_atomic) {
5457                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5458                 }
5459                 DTRACE_VM5(
5460                         vm_map_clip_end,
5461                         vm_map_t, map,
5462                         vm_map_offset_t, entry->vme_start,
5463                         vm_map_offset_t, entry->vme_end,
5464                         vm_map_offset_t, endaddr,
5465                         int, VME_ALIAS(entry));
5466
5467                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5468                 if (map->holelistenabled) {
5469                         vm_map_store_update_first_free(map, NULL, FALSE);
5470                 } else {
5471                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5472                 }
5473         }
5474 }
5475
5476
5477 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5478         MACRO_BEGIN \
5479         if ((endaddr) < (entry)->vme_end) \
5480                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5481         MACRO_END
5482
5483 /*
5484  *      This routine is called only when it is known that
5485  *      the entry must be split.
5486  */
5487 static void
5488 _vm_map_clip_end(
5489         struct vm_map_header    *map_header,
5490         vm_map_entry_t          entry,
5491         vm_map_offset_t         end)
5492 {
5493         vm_map_entry_t  new_entry;
5494
5495         /*
5496          *      Create a new entry and insert it
5497          *      AFTER the specified entry
5498          */
5499
5500         if (entry->map_aligned) {
5501                 assert(VM_MAP_PAGE_ALIGNED(end,
5502                     VM_MAP_HDR_PAGE_MASK(map_header)));
5503         }
5504
5505         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5506         vm_map_entry_copy_full(new_entry, entry);
5507
5508         assert(entry->vme_start < end);
5509         new_entry->vme_start = entry->vme_end = end;
5510         VME_OFFSET_SET(new_entry,
5511             VME_OFFSET(new_entry) + (end - entry->vme_start));
5512         assert(new_entry->vme_start < new_entry->vme_end);
5513
5514         _vm_map_store_entry_link(map_header, entry, new_entry);
5515
5516         if (entry->is_sub_map) {
5517                 vm_map_reference(VME_SUBMAP(new_entry));
5518         } else {
5519                 vm_object_reference(VME_OBJECT(new_entry));
5520         }
5521 }
5522
5523
5524 /*
5525  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5526  *
5527  *      Asserts that the starting and ending region
5528  *      addresses fall within the valid range of the map.
5529  */
5530 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5531         MACRO_BEGIN                             \
5532         if (start < vm_map_min(map))            \
5533                 start = vm_map_min(map);        \
5534         if (end > vm_map_max(map))              \
5535                 end = vm_map_max(map);          \
5536         if (start > end)                        \
5537                 start = end;                    \
5538         MACRO_END
5539
5540 /*
5541  *      vm_map_range_check:     [ internal use only ]
5542  *
5543  *      Check that the region defined by the specified start and
5544  *      end addresses are wholly contained within a single map
5545  *      entry or set of adjacent map entries of the spacified map,
5546  *      i.e. the specified region contains no unmapped space.
5547  *      If any or all of the region is unmapped, FALSE is returned.
5548  *      Otherwise, TRUE is returned and if the output argument 'entry'
5549  *      is not NULL it points to the map entry containing the start
5550  *      of the region.
5551  *
5552  *      The map is locked for reading on entry and is left locked.
5553  */
5554 static boolean_t
5555 vm_map_range_check(
5556         vm_map_t                map,
5557         vm_map_offset_t         start,
5558         vm_map_offset_t         end,
5559         vm_map_entry_t          *entry)
5560 {
5561         vm_map_entry_t          cur;
5562         vm_map_offset_t         prev;
5563
5564         /*
5565          *      Basic sanity checks first
5566          */
5567         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5568                 return FALSE;
5569         }
5570
5571         /*
5572          *      Check first if the region starts within a valid
5573          *      mapping for the map.
5574          */
5575         if (!vm_map_lookup_entry(map, start, &cur)) {
5576                 return FALSE;
5577         }
5578
5579         /*
5580          *      Optimize for the case that the region is contained
5581          *      in a single map entry.
5582          */
5583         if (entry != (vm_map_entry_t *) NULL) {
5584                 *entry = cur;
5585         }
5586         if (end <= cur->vme_end) {
5587                 return TRUE;
5588         }
5589
5590         /*
5591          *      If the region is not wholly contained within a
5592          *      single entry, walk the entries looking for holes.
5593          */
5594         prev = cur->vme_end;
5595         cur = cur->vme_next;
5596         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5597                 if (end <= cur->vme_end) {
5598                         return TRUE;
5599                 }
5600                 prev = cur->vme_end;
5601                 cur = cur->vme_next;
5602         }
5603         return FALSE;
5604 }
5605
5606 /*
5607  *      vm_map_submap:          [ kernel use only ]
5608  *
5609  *      Mark the given range as handled by a subordinate map.
5610  *
5611  *      This range must have been created with vm_map_find using
5612  *      the vm_submap_object, and no other operations may have been
5613  *      performed on this range prior to calling vm_map_submap.
5614  *
5615  *      Only a limited number of operations can be performed
5616  *      within this rage after calling vm_map_submap:
5617  *              vm_fault
5618  *      [Don't try vm_map_copyin!]
5619  *
5620  *      To remove a submapping, one must first remove the
5621  *      range from the superior map, and then destroy the
5622  *      submap (if desired).  [Better yet, don't try it.]
5623  */
5624 kern_return_t
5625 vm_map_submap(
5626         vm_map_t        map,
5627         vm_map_offset_t start,
5628         vm_map_offset_t end,
5629         vm_map_t        submap,
5630         vm_map_offset_t offset,
5631 #ifdef NO_NESTED_PMAP
5632         __unused
5633 #endif  /* NO_NESTED_PMAP */
5634         boolean_t       use_pmap)
5635 {
5636         vm_map_entry_t          entry;
5637         kern_return_t           result = KERN_INVALID_ARGUMENT;
5638         vm_object_t             object;
5639
5640         vm_map_lock(map);
5641
5642         if (!vm_map_lookup_entry(map, start, &entry)) {
5643                 entry = entry->vme_next;
5644         }
5645
5646         if (entry == vm_map_to_entry(map) ||
5647             entry->is_sub_map) {
5648                 vm_map_unlock(map);
5649                 return KERN_INVALID_ARGUMENT;
5650         }
5651
5652         vm_map_clip_start(map, entry, start);
5653         vm_map_clip_end(map, entry, end);
5654
5655         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5656             (!entry->is_sub_map) &&
5657             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5658             (object->resident_page_count == 0) &&
5659             (object->copy == VM_OBJECT_NULL) &&
5660             (object->shadow == VM_OBJECT_NULL) &&
5661             (!object->pager_created)) {
5662                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5663                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5664                 vm_object_deallocate(object);
5665                 entry->is_sub_map = TRUE;
5666                 entry->use_pmap = FALSE;
5667                 VME_SUBMAP_SET(entry, submap);
5668                 vm_map_reference(submap);
5669                 if (submap->mapped_in_other_pmaps == FALSE &&
5670                     vm_map_pmap(submap) != PMAP_NULL &&
5671                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5672                         /*
5673                          * This submap is being mapped in a map
5674                          * that uses a different pmap.
5675                          * Set its "mapped_in_other_pmaps" flag
5676                          * to indicate that we now need to
5677                          * remove mappings from all pmaps rather
5678                          * than just the submap's pmap.
5679                          */
5680                         submap->mapped_in_other_pmaps = TRUE;
5681                 }
5682
5683 #ifndef NO_NESTED_PMAP
5684                 if (use_pmap) {
5685                         /* nest if platform code will allow */
5686                         if (submap->pmap == NULL) {
5687                                 ledger_t ledger = map->pmap->ledger;
5688                                 submap->pmap = pmap_create_options(ledger,
5689                                     (vm_map_size_t) 0, 0);
5690                                 if (submap->pmap == PMAP_NULL) {
5691                                         vm_map_unlock(map);
5692                                         return KERN_NO_SPACE;
5693                                 }
5694 #if     defined(__arm__) || defined(__arm64__)
5695                                 pmap_set_nested(submap->pmap);
5696 #endif
5697                         }
5698                         result = pmap_nest(map->pmap,
5699                             (VME_SUBMAP(entry))->pmap,
5700                             (addr64_t)start,
5701                             (addr64_t)start,
5702                             (uint64_t)(end - start));
5703                         if (result) {
5704                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5705                         }
5706                         entry->use_pmap = TRUE;
5707                 }
5708 #else   /* NO_NESTED_PMAP */
5709                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5710 #endif  /* NO_NESTED_PMAP */
5711                 result = KERN_SUCCESS;
5712         }
5713         vm_map_unlock(map);
5714
5715         return result;
5716 }
5717
5718 /*
5719  *      vm_map_protect:
5720  *
5721  *      Sets the protection of the specified address
5722  *      region in the target map.  If "set_max" is
5723  *      specified, the maximum protection is to be set;
5724  *      otherwise, only the current protection is affected.
5725  */
5726 kern_return_t
5727 vm_map_protect(
5728         vm_map_t        map,
5729         vm_map_offset_t start,
5730         vm_map_offset_t end,
5731         vm_prot_t       new_prot,
5732         boolean_t       set_max)
5733 {
5734         vm_map_entry_t                  current;
5735         vm_map_offset_t                 prev;
5736         vm_map_entry_t                  entry;
5737         vm_prot_t                       new_max;
5738         int                             pmap_options = 0;
5739         kern_return_t                   kr;
5740
5741         if (new_prot & VM_PROT_COPY) {
5742                 vm_map_offset_t         new_start;
5743                 vm_prot_t               cur_prot, max_prot;
5744                 vm_map_kernel_flags_t   kflags;
5745
5746                 /* LP64todo - see below */
5747                 if (start >= map->max_offset) {
5748                         return KERN_INVALID_ADDRESS;
5749                 }
5750
5751 #if VM_PROTECT_WX_FAIL
5752                 if ((new_prot & VM_PROT_EXECUTE) &&
5753                     map != kernel_map &&
5754                     cs_process_enforcement(NULL)) {
5755                         DTRACE_VM3(cs_wx,
5756                             uint64_t, (uint64_t) start,
5757                             uint64_t, (uint64_t) end,
5758                             vm_prot_t, new_prot);
5759                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5760                             proc_selfpid(),
5761                             (current_task()->bsd_info
5762                             ? proc_name_address(current_task()->bsd_info)
5763                             : "?"),
5764                             __FUNCTION__);
5765                         return KERN_PROTECTION_FAILURE;
5766                 }
5767 #endif /* VM_PROTECT_WX_FAIL */
5768
5769                 /*
5770                  * Let vm_map_remap_extract() know that it will need to:
5771                  * + make a copy of the mapping
5772                  * + add VM_PROT_WRITE to the max protections
5773                  * + remove any protections that are no longer allowed from the
5774                  *   max protections (to avoid any WRITE/EXECUTE conflict, for
5775                  *   example).
5776                  * Note that "max_prot" is an IN/OUT parameter only for this
5777                  * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
5778                  * only.
5779                  */
5780                 max_prot = new_prot & VM_PROT_ALL;
5781                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5782                 kflags.vmkf_remap_prot_copy = TRUE;
5783                 kflags.vmkf_overwrite_immutable = TRUE;
5784                 new_start = start;
5785                 kr = vm_map_remap(map,
5786                     &new_start,
5787                     end - start,
5788                     0,               /* mask */
5789                     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5790                     kflags,
5791                     0,
5792                     map,
5793                     start,
5794                     TRUE,               /* copy-on-write remapping! */
5795                     &cur_prot,
5796                     &max_prot,
5797                     VM_INHERIT_DEFAULT);
5798                 if (kr != KERN_SUCCESS) {
5799                         return kr;
5800                 }
5801                 new_prot &= ~VM_PROT_COPY;
5802         }
5803
5804         vm_map_lock(map);
5805
5806         /* LP64todo - remove this check when vm_map_commpage64()
5807          * no longer has to stuff in a map_entry for the commpage
5808          * above the map's max_offset.
5809          */
5810         if (start >= map->max_offset) {
5811                 vm_map_unlock(map);
5812                 return KERN_INVALID_ADDRESS;
5813         }
5814
5815         while (1) {
5816                 /*
5817                  *      Lookup the entry.  If it doesn't start in a valid
5818                  *      entry, return an error.
5819                  */
5820                 if (!vm_map_lookup_entry(map, start, &entry)) {
5821                         vm_map_unlock(map);
5822                         return KERN_INVALID_ADDRESS;
5823                 }
5824
5825                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5826                         start = SUPERPAGE_ROUND_DOWN(start);
5827                         continue;
5828                 }
5829                 break;
5830         }
5831         if (entry->superpage_size) {
5832                 end = SUPERPAGE_ROUND_UP(end);
5833         }
5834
5835         /*
5836          *      Make a first pass to check for protection and address
5837          *      violations.
5838          */
5839
5840         current = entry;
5841         prev = current->vme_start;
5842         while ((current != vm_map_to_entry(map)) &&
5843             (current->vme_start < end)) {
5844                 /*
5845                  * If there is a hole, return an error.
5846                  */
5847                 if (current->vme_start != prev) {
5848                         vm_map_unlock(map);
5849                         return KERN_INVALID_ADDRESS;
5850                 }
5851
5852                 new_max = current->max_protection;
5853                 if ((new_prot & new_max) != new_prot) {
5854                         vm_map_unlock(map);
5855                         return KERN_PROTECTION_FAILURE;
5856                 }
5857
5858                 if ((new_prot & VM_PROT_WRITE) &&
5859                     (new_prot & VM_PROT_EXECUTE) &&
5860 #if !CONFIG_EMBEDDED
5861                     map != kernel_map &&
5862                     cs_process_enforcement(NULL) &&
5863 #endif /* !CONFIG_EMBEDDED */
5864                     !(current->used_for_jit)) {
5865                         DTRACE_VM3(cs_wx,
5866                             uint64_t, (uint64_t) current->vme_start,
5867                             uint64_t, (uint64_t) current->vme_end,
5868                             vm_prot_t, new_prot);
5869                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5870                             proc_selfpid(),
5871                             (current_task()->bsd_info
5872                             ? proc_name_address(current_task()->bsd_info)
5873                             : "?"),
5874                             __FUNCTION__);
5875                         new_prot &= ~VM_PROT_EXECUTE;
5876 #if VM_PROTECT_WX_FAIL
5877                         vm_map_unlock(map);
5878                         return KERN_PROTECTION_FAILURE;
5879 #endif /* VM_PROTECT_WX_FAIL */
5880                 }
5881
5882                 /*
5883                  * If the task has requested executable lockdown,
5884                  * deny both:
5885                  * - adding executable protections OR
5886                  * - adding write protections to an existing executable mapping.
5887                  */
5888                 if (map->map_disallow_new_exec == TRUE) {
5889                         if ((new_prot & VM_PROT_EXECUTE) ||
5890                             ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5891                                 vm_map_unlock(map);
5892                                 return KERN_PROTECTION_FAILURE;
5893                         }
5894                 }
5895
5896                 prev = current->vme_end;
5897                 current = current->vme_next;
5898         }
5899
5900 #if __arm64__
5901         if (end > prev &&
5902             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5903                 vm_map_entry_t prev_entry;
5904
5905                 prev_entry = current->vme_prev;
5906                 if (prev_entry != vm_map_to_entry(map) &&
5907                     !prev_entry->map_aligned &&
5908                     (vm_map_round_page(prev_entry->vme_end,
5909                     VM_MAP_PAGE_MASK(map))
5910                     == end)) {
5911                         /*
5912                          * The last entry in our range is not "map-aligned"
5913                          * but it would have reached all the way to "end"
5914                          * if it had been map-aligned, so this is not really
5915                          * a hole in the range and we can proceed.
5916                          */
5917                         prev = end;
5918                 }
5919         }
5920 #endif /* __arm64__ */
5921
5922         if (end > prev) {
5923                 vm_map_unlock(map);
5924                 return KERN_INVALID_ADDRESS;
5925         }
5926
5927         /*
5928          *      Go back and fix up protections.
5929          *      Clip to start here if the range starts within
5930          *      the entry.
5931          */
5932
5933         current = entry;
5934         if (current != vm_map_to_entry(map)) {
5935                 /* clip and unnest if necessary */
5936                 vm_map_clip_start(map, current, start);
5937         }
5938
5939         while ((current != vm_map_to_entry(map)) &&
5940             (current->vme_start < end)) {
5941                 vm_prot_t       old_prot;
5942
5943                 vm_map_clip_end(map, current, end);
5944
5945                 if (current->is_sub_map) {
5946                         /* clipping did unnest if needed */
5947                         assert(!current->use_pmap);
5948                 }
5949
5950                 old_prot = current->protection;
5951
5952                 if (set_max) {
5953                         current->max_protection = new_prot;
5954                         current->protection = new_prot & old_prot;
5955                 } else {
5956                         current->protection = new_prot;
5957                 }
5958
5959                 /*
5960                  *      Update physical map if necessary.
5961                  *      If the request is to turn off write protection,
5962                  *      we won't do it for real (in pmap). This is because
5963                  *      it would cause copy-on-write to fail.  We've already
5964                  *      set, the new protection in the map, so if a
5965                  *      write-protect fault occurred, it will be fixed up
5966                  *      properly, COW or not.
5967                  */
5968                 if (current->protection != old_prot) {
5969                         /* Look one level in we support nested pmaps */
5970                         /* from mapped submaps which are direct entries */
5971                         /* in our map */
5972
5973                         vm_prot_t prot;
5974
5975                         prot = current->protection;
5976                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5977                                 prot &= ~VM_PROT_WRITE;
5978                         } else {
5979                                 assert(!VME_OBJECT(current)->code_signed);
5980                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5981                         }
5982
5983                         if (override_nx(map, VME_ALIAS(current)) && prot) {
5984                                 prot |= VM_PROT_EXECUTE;
5985                         }
5986
5987 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5988                         if (!(old_prot & VM_PROT_EXECUTE) &&
5989                             (prot & VM_PROT_EXECUTE) &&
5990                             panic_on_unsigned_execute &&
5991                             (proc_selfcsflags() & CS_KILL)) {
5992                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5993                         }
5994 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5995
5996                         if (pmap_has_prot_policy(prot)) {
5997                                 if (current->wired_count) {
5998                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5999                                             map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6000                                 }
6001
6002                                 /* If the pmap layer cares about this
6003                                  * protection type, force a fault for
6004                                  * each page so that vm_fault will
6005                                  * repopulate the page with the full
6006                                  * set of protections.
6007                                  */
6008                                 /*
6009                                  * TODO: We don't seem to need this,
6010                                  * but this is due to an internal
6011                                  * implementation detail of
6012                                  * pmap_protect.  Do we want to rely
6013                                  * on this?
6014                                  */
6015                                 prot = VM_PROT_NONE;
6016                         }
6017
6018                         if (current->is_sub_map && current->use_pmap) {
6019                                 pmap_protect(VME_SUBMAP(current)->pmap,
6020                                     current->vme_start,
6021                                     current->vme_end,
6022                                     prot);
6023                         } else {
6024                                 if (prot & VM_PROT_WRITE) {
6025                                         if (VME_OBJECT(current) == compressor_object) {
6026                                                 /*
6027                                                  * For write requests on the
6028                                                  * compressor, we wil ask the
6029                                                  * pmap layer to prevent us from
6030                                                  * taking a write fault when we
6031                                                  * attempt to access the mapping
6032                                                  * next.
6033                                                  */
6034                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6035                                         }
6036                                 }
6037
6038                                 pmap_protect_options(map->pmap,
6039                                     current->vme_start,
6040                                     current->vme_end,
6041                                     prot,
6042                                     pmap_options,
6043                                     NULL);
6044                         }
6045                 }
6046                 current = current->vme_next;
6047         }
6048
6049         current = entry;
6050         while ((current != vm_map_to_entry(map)) &&
6051             (current->vme_start <= end)) {
6052                 vm_map_simplify_entry(map, current);
6053                 current = current->vme_next;
6054         }
6055
6056         vm_map_unlock(map);
6057         return KERN_SUCCESS;
6058 }
6059
6060 /*
6061  *      vm_map_inherit:
6062  *
6063  *      Sets the inheritance of the specified address
6064  *      range in the target map.  Inheritance
6065  *      affects how the map will be shared with
6066  *      child maps at the time of vm_map_fork.
6067  */
6068 kern_return_t
6069 vm_map_inherit(
6070         vm_map_t        map,
6071         vm_map_offset_t start,
6072         vm_map_offset_t end,
6073         vm_inherit_t    new_inheritance)
6074 {
6075         vm_map_entry_t  entry;
6076         vm_map_entry_t  temp_entry;
6077
6078         vm_map_lock(map);
6079
6080         VM_MAP_RANGE_CHECK(map, start, end);
6081
6082         if (vm_map_lookup_entry(map, start, &temp_entry)) {
6083                 entry = temp_entry;
6084         } else {
6085                 temp_entry = temp_entry->vme_next;
6086                 entry = temp_entry;
6087         }
6088
6089         /* first check entire range for submaps which can't support the */
6090         /* given inheritance. */
6091         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6092                 if (entry->is_sub_map) {
6093                         if (new_inheritance == VM_INHERIT_COPY) {
6094                                 vm_map_unlock(map);
6095                                 return KERN_INVALID_ARGUMENT;
6096                         }
6097                 }
6098
6099                 entry = entry->vme_next;
6100         }
6101
6102         entry = temp_entry;
6103         if (entry != vm_map_to_entry(map)) {
6104                 /* clip and unnest if necessary */
6105                 vm_map_clip_start(map, entry, start);
6106         }
6107
6108         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6109                 vm_map_clip_end(map, entry, end);
6110                 if (entry->is_sub_map) {
6111                         /* clip did unnest if needed */
6112                         assert(!entry->use_pmap);
6113                 }
6114
6115                 entry->inheritance = new_inheritance;
6116
6117                 entry = entry->vme_next;
6118         }
6119
6120         vm_map_unlock(map);
6121         return KERN_SUCCESS;
6122 }
6123
6124 /*
6125  * Update the accounting for the amount of wired memory in this map.  If the user has
6126  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
6127  */
6128
6129 static kern_return_t
6130 add_wire_counts(
6131         vm_map_t        map,
6132         vm_map_entry_t  entry,
6133         boolean_t       user_wire)
6134 {
6135         vm_map_size_t   size;
6136
6137         if (user_wire) {
6138                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
6139
6140                 /*
6141                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6142                  * this map entry.
6143                  */
6144
6145                 if (entry->user_wired_count == 0) {
6146                         size = entry->vme_end - entry->vme_start;
6147
6148                         /*
6149                          * Since this is the first time the user is wiring this map entry, check to see if we're
6150                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6151                          * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value.  There is also
6152                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6153                          * limit, then we fail.
6154                          */
6155
6156                         if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6157                             size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6158                                 return KERN_RESOURCE_SHORTAGE;
6159                         }
6160
6161                         /*
6162                          * The first time the user wires an entry, we also increment the wired_count and add this to
6163                          * the total that has been wired in the map.
6164                          */
6165
6166                         if (entry->wired_count >= MAX_WIRE_COUNT) {
6167                                 return KERN_FAILURE;
6168                         }
6169
6170                         entry->wired_count++;
6171                         map->user_wire_size += size;
6172                 }
6173
6174                 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6175                         return KERN_FAILURE;
6176                 }
6177
6178                 entry->user_wired_count++;
6179         } else {
6180                 /*
6181                  * The kernel's wiring the memory.  Just bump the count and continue.
6182                  */
6183
6184                 if (entry->wired_count >= MAX_WIRE_COUNT) {
6185                         panic("vm_map_wire: too many wirings");
6186                 }
6187
6188                 entry->wired_count++;
6189         }
6190
6191         return KERN_SUCCESS;
6192 }
6193
6194 /*
6195  * Update the memory wiring accounting now that the given map entry is being unwired.
6196  */
6197
6198 static void
6199 subtract_wire_counts(
6200         vm_map_t        map,
6201         vm_map_entry_t  entry,
6202         boolean_t       user_wire)
6203 {
6204         if (user_wire) {
6205                 /*
6206                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6207                  */
6208
6209                 if (entry->user_wired_count == 1) {
6210                         /*
6211                          * We're removing the last user wire reference.  Decrement the wired_count and the total
6212                          * user wired memory for this map.
6213                          */
6214
6215                         assert(entry->wired_count >= 1);
6216                         entry->wired_count--;
6217                         map->user_wire_size -= entry->vme_end - entry->vme_start;
6218                 }
6219
6220                 assert(entry->user_wired_count >= 1);
6221                 entry->user_wired_count--;
6222         } else {
6223                 /*
6224                  * The kernel is unwiring the memory.   Just update the count.
6225                  */
6226
6227                 assert(entry->wired_count >= 1);
6228                 entry->wired_count--;
6229         }
6230 }
6231
6232 int cs_executable_wire = 0;
6233
6234 /*
6235  *      vm_map_wire:
6236  *
6237  *      Sets the pageability of the specified address range in the
6238  *      target map as wired.  Regions specified as not pageable require
6239  *      locked-down physical memory and physical page maps.  The
6240  *      access_type variable indicates types of accesses that must not
6241  *      generate page faults.  This is checked against protection of
6242  *      memory being locked-down.
6243  *
6244  *      The map must not be locked, but a reference must remain to the
6245  *      map throughout the call.
6246  */
6247 static kern_return_t
6248 vm_map_wire_nested(
6249         vm_map_t                map,
6250         vm_map_offset_t         start,
6251         vm_map_offset_t         end,
6252         vm_prot_t               caller_prot,
6253         vm_tag_t                tag,
6254         boolean_t               user_wire,
6255         pmap_t                  map_pmap,
6256         vm_map_offset_t         pmap_addr,
6257         ppnum_t                 *physpage_p)
6258 {
6259         vm_map_entry_t          entry;
6260         vm_prot_t               access_type;
6261         struct vm_map_entry     *first_entry, tmp_entry;
6262         vm_map_t                real_map;
6263         vm_map_offset_t         s, e;
6264         kern_return_t           rc;
6265         boolean_t               need_wakeup;
6266         boolean_t               main_map = FALSE;
6267         wait_interrupt_t        interruptible_state;
6268         thread_t                cur_thread;
6269         unsigned int            last_timestamp;
6270         vm_map_size_t           size;
6271         boolean_t               wire_and_extract;
6272
6273         access_type = (caller_prot & VM_PROT_ALL);
6274
6275         wire_and_extract = FALSE;
6276         if (physpage_p != NULL) {
6277                 /*
6278                  * The caller wants the physical page number of the
6279                  * wired page.  We return only one physical page number
6280                  * so this works for only one page at a time.
6281                  */
6282                 if ((end - start) != PAGE_SIZE) {
6283                         return KERN_INVALID_ARGUMENT;
6284                 }
6285                 wire_and_extract = TRUE;
6286                 *physpage_p = 0;
6287         }
6288
6289         vm_map_lock(map);
6290         if (map_pmap == NULL) {
6291                 main_map = TRUE;
6292         }
6293         last_timestamp = map->timestamp;
6294
6295         VM_MAP_RANGE_CHECK(map, start, end);
6296         assert(page_aligned(start));
6297         assert(page_aligned(end));
6298         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6299         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6300         if (start == end) {
6301                 /* We wired what the caller asked for, zero pages */
6302                 vm_map_unlock(map);
6303                 return KERN_SUCCESS;
6304         }
6305
6306         need_wakeup = FALSE;
6307         cur_thread = current_thread();
6308
6309         s = start;
6310         rc = KERN_SUCCESS;
6311
6312         if (vm_map_lookup_entry(map, s, &first_entry)) {
6313                 entry = first_entry;
6314                 /*
6315                  * vm_map_clip_start will be done later.
6316                  * We don't want to unnest any nested submaps here !
6317                  */
6318         } else {
6319                 /* Start address is not in map */
6320                 rc = KERN_INVALID_ADDRESS;
6321                 goto done;
6322         }
6323
6324         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6325                 /*
6326                  * At this point, we have wired from "start" to "s".
6327                  * We still need to wire from "s" to "end".
6328                  *
6329                  * "entry" hasn't been clipped, so it could start before "s"
6330                  * and/or end after "end".
6331                  */
6332
6333                 /* "e" is how far we want to wire in this entry */
6334                 e = entry->vme_end;
6335                 if (e > end) {
6336                         e = end;
6337                 }
6338
6339                 /*
6340                  * If another thread is wiring/unwiring this entry then
6341                  * block after informing other thread to wake us up.
6342                  */
6343                 if (entry->in_transition) {
6344                         wait_result_t wait_result;
6345
6346                         /*
6347                          * We have not clipped the entry.  Make sure that
6348                          * the start address is in range so that the lookup
6349                          * below will succeed.
6350                          * "s" is the current starting point: we've already
6351                          * wired from "start" to "s" and we still have
6352                          * to wire from "s" to "end".
6353                          */
6354
6355                         entry->needs_wakeup = TRUE;
6356
6357                         /*
6358                          * wake up anybody waiting on entries that we have
6359                          * already wired.
6360                          */
6361                         if (need_wakeup) {
6362                                 vm_map_entry_wakeup(map);
6363                                 need_wakeup = FALSE;
6364                         }
6365                         /*
6366                          * User wiring is interruptible
6367                          */
6368                         wait_result = vm_map_entry_wait(map,
6369                             (user_wire) ? THREAD_ABORTSAFE :
6370                             THREAD_UNINT);
6371                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
6372                                 /*
6373                                  * undo the wirings we have done so far
6374                                  * We do not clear the needs_wakeup flag,
6375                                  * because we cannot tell if we were the
6376                                  * only one waiting.
6377                                  */
6378                                 rc = KERN_FAILURE;
6379                                 goto done;
6380                         }
6381
6382                         /*
6383                          * Cannot avoid a lookup here. reset timestamp.
6384                          */
6385                         last_timestamp = map->timestamp;
6386
6387                         /*
6388                          * The entry could have been clipped, look it up again.
6389                          * Worse that can happen is, it may not exist anymore.
6390                          */
6391                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6392                                 /*
6393                                  * User: undo everything upto the previous
6394                                  * entry.  let vm_map_unwire worry about
6395                                  * checking the validity of the range.
6396                                  */
6397                                 rc = KERN_FAILURE;
6398                                 goto done;
6399                         }
6400                         entry = first_entry;
6401                         continue;
6402                 }
6403
6404                 if (entry->is_sub_map) {
6405                         vm_map_offset_t sub_start;
6406                         vm_map_offset_t sub_end;
6407                         vm_map_offset_t local_start;
6408                         vm_map_offset_t local_end;
6409                         pmap_t          pmap;
6410
6411                         if (wire_and_extract) {
6412                                 /*
6413                                  * Wiring would result in copy-on-write
6414                                  * which would not be compatible with
6415                                  * the sharing we have with the original
6416                                  * provider of this memory.
6417                                  */
6418                                 rc = KERN_INVALID_ARGUMENT;
6419                                 goto done;
6420                         }
6421
6422                         vm_map_clip_start(map, entry, s);
6423                         vm_map_clip_end(map, entry, end);
6424
6425                         sub_start = VME_OFFSET(entry);
6426                         sub_end = entry->vme_end;
6427                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6428
6429                         local_end = entry->vme_end;
6430                         if (map_pmap == NULL) {
6431                                 vm_object_t             object;
6432                                 vm_object_offset_t      offset;
6433                                 vm_prot_t               prot;
6434                                 boolean_t               wired;
6435                                 vm_map_entry_t          local_entry;
6436                                 vm_map_version_t         version;
6437                                 vm_map_t                lookup_map;
6438
6439                                 if (entry->use_pmap) {
6440                                         pmap = VME_SUBMAP(entry)->pmap;
6441                                         /* ppc implementation requires that */
6442                                         /* submaps pmap address ranges line */
6443                                         /* up with parent map */
6444 #ifdef notdef
6445                                         pmap_addr = sub_start;
6446 #endif
6447                                         pmap_addr = s;
6448                                 } else {
6449                                         pmap = map->pmap;
6450                                         pmap_addr = s;
6451                                 }
6452
6453                                 if (entry->wired_count) {
6454                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6455                                                 goto done;
6456                                         }
6457
6458                                         /*
6459                                          * The map was not unlocked:
6460                                          * no need to goto re-lookup.
6461                                          * Just go directly to next entry.
6462                                          */
6463                                         entry = entry->vme_next;
6464                                         s = entry->vme_start;
6465                                         continue;
6466                                 }
6467
6468                                 /* call vm_map_lookup_locked to */
6469                                 /* cause any needs copy to be   */
6470                                 /* evaluated */
6471                                 local_start = entry->vme_start;
6472                                 lookup_map = map;
6473                                 vm_map_lock_write_to_read(map);
6474                                 if (vm_map_lookup_locked(
6475                                             &lookup_map, local_start,
6476                                             access_type | VM_PROT_COPY,
6477                                             OBJECT_LOCK_EXCLUSIVE,
6478                                             &version, &object,
6479                                             &offset, &prot, &wired,
6480                                             NULL,
6481                                             &real_map)) {
6482                                         vm_map_unlock_read(lookup_map);
6483                                         assert(map_pmap == NULL);
6484                                         vm_map_unwire(map, start,
6485                                             s, user_wire);
6486                                         return KERN_FAILURE;
6487                                 }
6488                                 vm_object_unlock(object);
6489                                 if (real_map != lookup_map) {
6490                                         vm_map_unlock(real_map);
6491                                 }
6492                                 vm_map_unlock_read(lookup_map);
6493                                 vm_map_lock(map);
6494
6495                                 /* we unlocked, so must re-lookup */
6496                                 if (!vm_map_lookup_entry(map,
6497                                     local_start,
6498                                     &local_entry)) {
6499                                         rc = KERN_FAILURE;
6500                                         goto done;
6501                                 }
6502
6503                                 /*
6504                                  * entry could have been "simplified",
6505                                  * so re-clip
6506                                  */
6507                                 entry = local_entry;
6508                                 assert(s == local_start);
6509                                 vm_map_clip_start(map, entry, s);
6510                                 vm_map_clip_end(map, entry, end);
6511                                 /* re-compute "e" */
6512                                 e = entry->vme_end;
6513                                 if (e > end) {
6514                                         e = end;
6515                                 }
6516
6517                                 /* did we have a change of type? */
6518                                 if (!entry->is_sub_map) {
6519                                         last_timestamp = map->timestamp;
6520                                         continue;
6521                                 }
6522                         } else {
6523                                 local_start = entry->vme_start;
6524                                 pmap = map_pmap;
6525                         }
6526
6527                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6528                                 goto done;
6529                         }
6530
6531                         entry->in_transition = TRUE;
6532
6533                         vm_map_unlock(map);
6534                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6535                             sub_start, sub_end,
6536                             caller_prot, tag,
6537                             user_wire, pmap, pmap_addr,
6538                             NULL);
6539                         vm_map_lock(map);
6540
6541                         /*
6542                          * Find the entry again.  It could have been clipped
6543                          * after we unlocked the map.
6544                          */
6545                         if (!vm_map_lookup_entry(map, local_start,
6546                             &first_entry)) {
6547                                 panic("vm_map_wire: re-lookup failed");
6548                         }
6549                         entry = first_entry;
6550
6551                         assert(local_start == s);
6552                         /* re-compute "e" */
6553                         e = entry->vme_end;
6554                         if (e > end) {
6555                                 e = end;
6556                         }
6557
6558                         last_timestamp = map->timestamp;
6559                         while ((entry != vm_map_to_entry(map)) &&
6560                             (entry->vme_start < e)) {
6561                                 assert(entry->in_transition);
6562                                 entry->in_transition = FALSE;
6563                                 if (entry->needs_wakeup) {
6564                                         entry->needs_wakeup = FALSE;
6565                                         need_wakeup = TRUE;
6566                                 }
6567                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6568                                         subtract_wire_counts(map, entry, user_wire);
6569                                 }
6570                                 entry = entry->vme_next;
6571                         }
6572                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6573                                 goto done;
6574                         }
6575
6576                         /* no need to relookup again */
6577                         s = entry->vme_start;
6578                         continue;
6579                 }
6580
6581                 /*
6582                  * If this entry is already wired then increment
6583                  * the appropriate wire reference count.
6584                  */
6585                 if (entry->wired_count) {
6586                         if ((entry->protection & access_type) != access_type) {
6587                                 /* found a protection problem */
6588
6589                                 /*
6590                                  * XXX FBDP
6591                                  * We should always return an error
6592                                  * in this case but since we didn't
6593                                  * enforce it before, let's do
6594                                  * it only for the new "wire_and_extract"
6595                                  * code path for now...
6596                                  */
6597                                 if (wire_and_extract) {
6598                                         rc = KERN_PROTECTION_FAILURE;
6599                                         goto done;
6600                                 }
6601                         }
6602
6603                         /*
6604                          * entry is already wired down, get our reference
6605                          * after clipping to our range.
6606                          */
6607                         vm_map_clip_start(map, entry, s);
6608                         vm_map_clip_end(map, entry, end);
6609
6610                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6611                                 goto done;
6612                         }
6613
6614                         if (wire_and_extract) {
6615                                 vm_object_t             object;
6616                                 vm_object_offset_t      offset;
6617                                 vm_page_t               m;
6618
6619                                 /*
6620                                  * We don't have to "wire" the page again
6621                                  * bit we still have to "extract" its
6622                                  * physical page number, after some sanity
6623                                  * checks.
6624                                  */
6625                                 assert((entry->vme_end - entry->vme_start)
6626                                     == PAGE_SIZE);
6627                                 assert(!entry->needs_copy);
6628                                 assert(!entry->is_sub_map);
6629                                 assert(VME_OBJECT(entry));
6630                                 if (((entry->vme_end - entry->vme_start)
6631                                     != PAGE_SIZE) ||
6632                                     entry->needs_copy ||
6633                                     entry->is_sub_map ||
6634                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6635                                         rc = KERN_INVALID_ARGUMENT;
6636                                         goto done;
6637                                 }
6638
6639                                 object = VME_OBJECT(entry);
6640                                 offset = VME_OFFSET(entry);
6641                                 /* need exclusive lock to update m->dirty */
6642                                 if (entry->protection & VM_PROT_WRITE) {
6643                                         vm_object_lock(object);
6644                                 } else {
6645                                         vm_object_lock_shared(object);
6646                                 }
6647                                 m = vm_page_lookup(object, offset);
6648                                 assert(m != VM_PAGE_NULL);
6649                                 assert(VM_PAGE_WIRED(m));
6650                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6651                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6652                                         if (entry->protection & VM_PROT_WRITE) {
6653                                                 vm_object_lock_assert_exclusive(
6654                                                         object);
6655                                                 m->vmp_dirty = TRUE;
6656                                         }
6657                                 } else {
6658                                         /* not already wired !? */
6659                                         *physpage_p = 0;
6660                                 }
6661                                 vm_object_unlock(object);
6662                         }
6663
6664                         /* map was not unlocked: no need to relookup */
6665                         entry = entry->vme_next;
6666                         s = entry->vme_start;
6667                         continue;
6668                 }
6669
6670                 /*
6671                  * Unwired entry or wire request transmitted via submap
6672                  */
6673
6674                 /*
6675                  * Wiring would copy the pages to the shadow object.
6676                  * The shadow object would not be code-signed so
6677                  * attempting to execute code from these copied pages
6678                  * would trigger a code-signing violation.
6679                  */
6680
6681                 if ((entry->protection & VM_PROT_EXECUTE)
6682 #if !CONFIG_EMBEDDED
6683                     &&
6684                     map != kernel_map &&
6685                     cs_process_enforcement(NULL)
6686 #endif /* !CONFIG_EMBEDDED */
6687                     ) {
6688 #if MACH_ASSERT
6689                         printf("pid %d[%s] wiring executable range from "
6690                             "0x%llx to 0x%llx: rejected to preserve "
6691                             "code-signing\n",
6692                             proc_selfpid(),
6693                             (current_task()->bsd_info
6694                             ? proc_name_address(current_task()->bsd_info)
6695                             : "?"),
6696                             (uint64_t) entry->vme_start,
6697                             (uint64_t) entry->vme_end);
6698 #endif /* MACH_ASSERT */
6699                         DTRACE_VM2(cs_executable_wire,
6700                             uint64_t, (uint64_t)entry->vme_start,
6701                             uint64_t, (uint64_t)entry->vme_end);
6702                         cs_executable_wire++;
6703                         rc = KERN_PROTECTION_FAILURE;
6704                         goto done;
6705                 }
6706
6707                 /*
6708                  * Perform actions of vm_map_lookup that need the write
6709                  * lock on the map: create a shadow object for a
6710                  * copy-on-write region, or an object for a zero-fill
6711                  * region.
6712                  */
6713                 size = entry->vme_end - entry->vme_start;
6714                 /*
6715                  * If wiring a copy-on-write page, we need to copy it now
6716                  * even if we're only (currently) requesting read access.
6717                  * This is aggressive, but once it's wired we can't move it.
6718                  */
6719                 if (entry->needs_copy) {
6720                         if (wire_and_extract) {
6721                                 /*
6722                                  * We're supposed to share with the original
6723                                  * provider so should not be "needs_copy"
6724                                  */
6725                                 rc = KERN_INVALID_ARGUMENT;
6726                                 goto done;
6727                         }
6728
6729                         VME_OBJECT_SHADOW(entry, size);
6730                         entry->needs_copy = FALSE;
6731                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6732                         if (wire_and_extract) {
6733                                 /*
6734                                  * We're supposed to share with the original
6735                                  * provider so should already have an object.
6736                                  */
6737                                 rc = KERN_INVALID_ARGUMENT;
6738                                 goto done;
6739                         }
6740                         VME_OBJECT_SET(entry, vm_object_allocate(size));
6741                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6742                         assert(entry->use_pmap);
6743                 }
6744
6745                 vm_map_clip_start(map, entry, s);
6746                 vm_map_clip_end(map, entry, end);
6747
6748                 /* re-compute "e" */
6749                 e = entry->vme_end;
6750                 if (e > end) {
6751                         e = end;
6752                 }
6753
6754                 /*
6755                  * Check for holes and protection mismatch.
6756                  * Holes: Next entry should be contiguous unless this
6757                  *        is the end of the region.
6758                  * Protection: Access requested must be allowed, unless
6759                  *      wiring is by protection class
6760                  */
6761                 if ((entry->vme_end < end) &&
6762                     ((entry->vme_next == vm_map_to_entry(map)) ||
6763                     (entry->vme_next->vme_start > entry->vme_end))) {
6764                         /* found a hole */
6765                         rc = KERN_INVALID_ADDRESS;
6766                         goto done;
6767                 }
6768                 if ((entry->protection & access_type) != access_type) {
6769                         /* found a protection problem */
6770                         rc = KERN_PROTECTION_FAILURE;
6771                         goto done;
6772                 }
6773
6774                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6775
6776                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6777                         goto done;
6778                 }
6779
6780                 entry->in_transition = TRUE;
6781
6782                 /*
6783                  * This entry might get split once we unlock the map.
6784                  * In vm_fault_wire(), we need the current range as
6785                  * defined by this entry.  In order for this to work
6786                  * along with a simultaneous clip operation, we make a
6787                  * temporary copy of this entry and use that for the
6788                  * wiring.  Note that the underlying objects do not
6789                  * change during a clip.
6790                  */
6791                 tmp_entry = *entry;
6792
6793                 /*
6794                  * The in_transition state guarentees that the entry
6795                  * (or entries for this range, if split occured) will be
6796                  * there when the map lock is acquired for the second time.
6797                  */
6798                 vm_map_unlock(map);
6799
6800                 if (!user_wire && cur_thread != THREAD_NULL) {
6801                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
6802                 } else {
6803                         interruptible_state = THREAD_UNINT;
6804                 }
6805
6806                 if (map_pmap) {
6807                         rc = vm_fault_wire(map,
6808                             &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6809                             physpage_p);
6810                 } else {
6811                         rc = vm_fault_wire(map,
6812                             &tmp_entry, caller_prot, tag, map->pmap,
6813                             tmp_entry.vme_start,
6814                             physpage_p);
6815                 }
6816
6817                 if (!user_wire && cur_thread != THREAD_NULL) {
6818                         thread_interrupt_level(interruptible_state);
6819                 }
6820
6821                 vm_map_lock(map);
6822
6823                 if (last_timestamp + 1 != map->timestamp) {
6824                         /*
6825                          * Find the entry again.  It could have been clipped
6826                          * after we unlocked the map.
6827                          */
6828                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6829                             &first_entry)) {
6830                                 panic("vm_map_wire: re-lookup failed");
6831                         }
6832
6833                         entry = first_entry;
6834                 }
6835
6836                 last_timestamp = map->timestamp;
6837
6838                 while ((entry != vm_map_to_entry(map)) &&
6839                     (entry->vme_start < tmp_entry.vme_end)) {
6840                         assert(entry->in_transition);
6841                         entry->in_transition = FALSE;
6842                         if (entry->needs_wakeup) {
6843                                 entry->needs_wakeup = FALSE;
6844                                 need_wakeup = TRUE;
6845                         }
6846                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6847                                 subtract_wire_counts(map, entry, user_wire);
6848                         }
6849                         entry = entry->vme_next;
6850                 }
6851
6852                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
6853                         goto done;
6854                 }
6855
6856                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6857                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
6858                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6859                         /* found a "new" hole */
6860                         s = tmp_entry.vme_end;
6861                         rc = KERN_INVALID_ADDRESS;
6862                         goto done;
6863                 }
6864
6865                 s = entry->vme_start;
6866         } /* end while loop through map entries */
6867
6868 done:
6869         if (rc == KERN_SUCCESS) {
6870                 /* repair any damage we may have made to the VM map */
6871                 vm_map_simplify_range(map, start, end);
6872         }
6873
6874         vm_map_unlock(map);
6875
6876         /*
6877          * wake up anybody waiting on entries we wired.
6878          */
6879         if (need_wakeup) {
6880                 vm_map_entry_wakeup(map);
6881         }
6882
6883         if (rc != KERN_SUCCESS) {
6884                 /* undo what has been wired so far */
6885                 vm_map_unwire_nested(map, start, s, user_wire,
6886                     map_pmap, pmap_addr);
6887                 if (physpage_p) {
6888                         *physpage_p = 0;
6889                 }
6890         }
6891
6892         return rc;
6893 }
6894
6895 kern_return_t
6896 vm_map_wire_external(
6897         vm_map_t                map,
6898         vm_map_offset_t         start,
6899         vm_map_offset_t         end,
6900         vm_prot_t               caller_prot,
6901         boolean_t               user_wire)
6902 {
6903         kern_return_t   kret;
6904
6905         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6906             user_wire, (pmap_t)NULL, 0, NULL);
6907         return kret;
6908 }
6909
6910 kern_return_t
6911 vm_map_wire_kernel(
6912         vm_map_t                map,
6913         vm_map_offset_t         start,
6914         vm_map_offset_t         end,
6915         vm_prot_t               caller_prot,
6916         vm_tag_t                tag,
6917         boolean_t               user_wire)
6918 {
6919         kern_return_t   kret;
6920
6921         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6922             user_wire, (pmap_t)NULL, 0, NULL);
6923         return kret;
6924 }
6925
6926 kern_return_t
6927 vm_map_wire_and_extract_external(
6928         vm_map_t        map,
6929         vm_map_offset_t start,
6930         vm_prot_t       caller_prot,
6931         boolean_t       user_wire,
6932         ppnum_t         *physpage_p)
6933 {
6934         kern_return_t   kret;
6935
6936         kret = vm_map_wire_nested(map,
6937             start,
6938             start + VM_MAP_PAGE_SIZE(map),
6939             caller_prot,
6940             vm_tag_bt(),
6941             user_wire,
6942             (pmap_t)NULL,
6943             0,
6944             physpage_p);
6945         if (kret != KERN_SUCCESS &&
6946             physpage_p != NULL) {
6947                 *physpage_p = 0;
6948         }
6949         return kret;
6950 }
6951
6952 kern_return_t
6953 vm_map_wire_and_extract_kernel(
6954         vm_map_t        map,
6955         vm_map_offset_t start,
6956         vm_prot_t       caller_prot,
6957         vm_tag_t        tag,
6958         boolean_t       user_wire,
6959         ppnum_t         *physpage_p)
6960 {
6961         kern_return_t   kret;
6962
6963         kret = vm_map_wire_nested(map,
6964             start,
6965             start + VM_MAP_PAGE_SIZE(map),
6966             caller_prot,
6967             tag,
6968             user_wire,
6969             (pmap_t)NULL,
6970             0,
6971             physpage_p);
6972         if (kret != KERN_SUCCESS &&
6973             physpage_p != NULL) {
6974                 *physpage_p = 0;
6975         }
6976         return kret;
6977 }
6978
6979 /*
6980  *      vm_map_unwire:
6981  *
6982  *      Sets the pageability of the specified address range in the target
6983  *      as pageable.  Regions specified must have been wired previously.
6984  *
6985  *      The map must not be locked, but a reference must remain to the map
6986  *      throughout the call.
6987  *
6988  *      Kernel will panic on failures.  User unwire ignores holes and
6989  *      unwired and intransition entries to avoid losing memory by leaving
6990  *      it unwired.
6991  */
6992 static kern_return_t
6993 vm_map_unwire_nested(
6994         vm_map_t                map,
6995         vm_map_offset_t         start,
6996         vm_map_offset_t         end,
6997         boolean_t               user_wire,
6998         pmap_t                  map_pmap,
6999         vm_map_offset_t         pmap_addr)
7000 {
7001         vm_map_entry_t          entry;
7002         struct vm_map_entry     *first_entry, tmp_entry;
7003         boolean_t               need_wakeup;
7004         boolean_t               main_map = FALSE;
7005         unsigned int            last_timestamp;
7006
7007         vm_map_lock(map);
7008         if (map_pmap == NULL) {
7009                 main_map = TRUE;
7010         }
7011         last_timestamp = map->timestamp;
7012
7013         VM_MAP_RANGE_CHECK(map, start, end);
7014         assert(page_aligned(start));
7015         assert(page_aligned(end));
7016         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7017         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7018
7019         if (start == end) {
7020                 /* We unwired what the caller asked for: zero pages */
7021                 vm_map_unlock(map);
7022                 return KERN_SUCCESS;
7023         }
7024
7025         if (vm_map_lookup_entry(map, start, &first_entry)) {
7026                 entry = first_entry;
7027                 /*
7028                  * vm_map_clip_start will be done later.
7029                  * We don't want to unnest any nested sub maps here !
7030                  */
7031         } else {
7032                 if (!user_wire) {
7033                         panic("vm_map_unwire: start not found");
7034                 }
7035                 /*      Start address is not in map. */
7036                 vm_map_unlock(map);
7037                 return KERN_INVALID_ADDRESS;
7038         }
7039
7040         if (entry->superpage_size) {
7041                 /* superpages are always wired */
7042                 vm_map_unlock(map);
7043                 return KERN_INVALID_ADDRESS;
7044         }
7045
7046         need_wakeup = FALSE;
7047         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7048                 if (entry->in_transition) {
7049                         /*
7050                          * 1)
7051                          * Another thread is wiring down this entry. Note
7052                          * that if it is not for the other thread we would
7053                          * be unwiring an unwired entry.  This is not
7054                          * permitted.  If we wait, we will be unwiring memory
7055                          * we did not wire.
7056                          *
7057                          * 2)
7058                          * Another thread is unwiring this entry.  We did not
7059                          * have a reference to it, because if we did, this
7060                          * entry will not be getting unwired now.
7061                          */
7062                         if (!user_wire) {
7063                                 /*
7064                                  * XXX FBDP
7065                                  * This could happen:  there could be some
7066                                  * overlapping vslock/vsunlock operations
7067                                  * going on.
7068                                  * We should probably just wait and retry,
7069                                  * but then we have to be careful that this
7070                                  * entry could get "simplified" after
7071                                  * "in_transition" gets unset and before
7072                                  * we re-lookup the entry, so we would
7073                                  * have to re-clip the entry to avoid
7074                                  * re-unwiring what we have already unwired...
7075                                  * See vm_map_wire_nested().
7076                                  *
7077                                  * Or we could just ignore "in_transition"
7078                                  * here and proceed to decement the wired
7079                                  * count(s) on this entry.  That should be fine
7080                                  * as long as "wired_count" doesn't drop all
7081                                  * the way to 0 (and we should panic if THAT
7082                                  * happens).
7083                                  */
7084                                 panic("vm_map_unwire: in_transition entry");
7085                         }
7086
7087                         entry = entry->vme_next;
7088                         continue;
7089                 }
7090
7091                 if (entry->is_sub_map) {
7092                         vm_map_offset_t sub_start;
7093                         vm_map_offset_t sub_end;
7094                         vm_map_offset_t local_end;
7095                         pmap_t          pmap;
7096
7097                         vm_map_clip_start(map, entry, start);
7098                         vm_map_clip_end(map, entry, end);
7099
7100                         sub_start = VME_OFFSET(entry);
7101                         sub_end = entry->vme_end - entry->vme_start;
7102                         sub_end += VME_OFFSET(entry);
7103                         local_end = entry->vme_end;
7104                         if (map_pmap == NULL) {
7105                                 if (entry->use_pmap) {
7106                                         pmap = VME_SUBMAP(entry)->pmap;
7107                                         pmap_addr = sub_start;
7108                                 } else {
7109                                         pmap = map->pmap;
7110                                         pmap_addr = start;
7111                                 }
7112                                 if (entry->wired_count == 0 ||
7113                                     (user_wire && entry->user_wired_count == 0)) {
7114                                         if (!user_wire) {
7115                                                 panic("vm_map_unwire: entry is unwired");
7116                                         }
7117                                         entry = entry->vme_next;
7118                                         continue;
7119                                 }
7120
7121                                 /*
7122                                  * Check for holes
7123                                  * Holes: Next entry should be contiguous unless
7124                                  * this is the end of the region.
7125                                  */
7126                                 if (((entry->vme_end < end) &&
7127                                     ((entry->vme_next == vm_map_to_entry(map)) ||
7128                                     (entry->vme_next->vme_start
7129                                     > entry->vme_end)))) {
7130                                         if (!user_wire) {
7131                                                 panic("vm_map_unwire: non-contiguous region");
7132                                         }
7133 /*
7134  *                                       entry = entry->vme_next;
7135  *                                       continue;
7136  */
7137                                 }
7138
7139                                 subtract_wire_counts(map, entry, user_wire);
7140
7141                                 if (entry->wired_count != 0) {
7142                                         entry = entry->vme_next;
7143                                         continue;
7144                                 }
7145
7146                                 entry->in_transition = TRUE;
7147                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
7148
7149                                 /*
7150                                  * We can unlock the map now. The in_transition state
7151                                  * guarantees existance of the entry.
7152                                  */
7153                                 vm_map_unlock(map);
7154                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7155                                     sub_start, sub_end, user_wire, pmap, pmap_addr);
7156                                 vm_map_lock(map);
7157
7158                                 if (last_timestamp + 1 != map->timestamp) {
7159                                         /*
7160                                          * Find the entry again.  It could have been
7161                                          * clipped or deleted after we unlocked the map.
7162                                          */
7163                                         if (!vm_map_lookup_entry(map,
7164                                             tmp_entry.vme_start,
7165                                             &first_entry)) {
7166                                                 if (!user_wire) {
7167                                                         panic("vm_map_unwire: re-lookup failed");
7168                                                 }
7169                                                 entry = first_entry->vme_next;
7170                                         } else {
7171                                                 entry = first_entry;
7172                                         }
7173                                 }
7174                                 last_timestamp = map->timestamp;
7175
7176                                 /*
7177                                  * clear transition bit for all constituent entries
7178                                  * that were in the original entry (saved in
7179                                  * tmp_entry).  Also check for waiters.
7180                                  */
7181                                 while ((entry != vm_map_to_entry(map)) &&
7182                                     (entry->vme_start < tmp_entry.vme_end)) {
7183                                         assert(entry->in_transition);
7184                                         entry->in_transition = FALSE;
7185                                         if (entry->needs_wakeup) {
7186                                                 entry->needs_wakeup = FALSE;
7187                                                 need_wakeup = TRUE;
7188                                         }
7189                                         entry = entry->vme_next;
7190                                 }
7191                                 continue;
7192                         } else {
7193                                 vm_map_unlock(map);
7194                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7195                                     sub_start, sub_end, user_wire, map_pmap,
7196                                     pmap_addr);
7197                                 vm_map_lock(map);
7198
7199                                 if (last_timestamp + 1 != map->timestamp) {
7200                                         /*
7201                                          * Find the entry again.  It could have been
7202                                          * clipped or deleted after we unlocked the map.
7203                                          */
7204                                         if (!vm_map_lookup_entry(map,
7205                                             tmp_entry.vme_start,
7206                                             &first_entry)) {
7207                                                 if (!user_wire) {
7208                                                         panic("vm_map_unwire: re-lookup failed");
7209                                                 }
7210                                                 entry = first_entry->vme_next;
7211                                         } else {
7212                                                 entry = first_entry;
7213                                         }
7214                                 }
7215                                 last_timestamp = map->timestamp;
7216                         }
7217                 }
7218
7219
7220                 if ((entry->wired_count == 0) ||
7221                     (user_wire && entry->user_wired_count == 0)) {
7222                         if (!user_wire) {
7223                                 panic("vm_map_unwire: entry is unwired");
7224                         }
7225
7226                         entry = entry->vme_next;
7227                         continue;
7228                 }
7229
7230                 assert(entry->wired_count > 0 &&
7231                     (!user_wire || entry->user_wired_count > 0));
7232
7233                 vm_map_clip_start(map, entry, start);
7234                 vm_map_clip_end(map, entry, end);
7235
7236                 /*
7237                  * Check for holes
7238                  * Holes: Next entry should be contiguous unless
7239                  *        this is the end of the region.
7240                  */
7241                 if (((entry->vme_end < end) &&
7242                     ((entry->vme_next == vm_map_to_entry(map)) ||
7243                     (entry->vme_next->vme_start > entry->vme_end)))) {
7244                         if (!user_wire) {
7245                                 panic("vm_map_unwire: non-contiguous region");
7246                         }
7247                         entry = entry->vme_next;
7248                         continue;
7249                 }
7250
7251                 subtract_wire_counts(map, entry, user_wire);
7252
7253                 if (entry->wired_count != 0) {
7254                         entry = entry->vme_next;
7255                         continue;
7256                 }
7257
7258                 if (entry->zero_wired_pages) {
7259                         entry->zero_wired_pages = FALSE;
7260                 }
7261
7262                 entry->in_transition = TRUE;
7263                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
7264
7265                 /*
7266                  * We can unlock the map now. The in_transition state
7267                  * guarantees existance of the entry.
7268                  */
7269                 vm_map_unlock(map);
7270                 if (map_pmap) {
7271                         vm_fault_unwire(map,
7272                             &tmp_entry, FALSE, map_pmap, pmap_addr);
7273                 } else {
7274                         vm_fault_unwire(map,
7275                             &tmp_entry, FALSE, map->pmap,
7276                             tmp_entry.vme_start);
7277                 }
7278                 vm_map_lock(map);
7279
7280                 if (last_timestamp + 1 != map->timestamp) {
7281                         /*
7282                          * Find the entry again.  It could have been clipped
7283                          * or deleted after we unlocked the map.
7284                          */
7285                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7286                             &first_entry)) {
7287                                 if (!user_wire) {
7288                                         panic("vm_map_unwire: re-lookup failed");
7289                                 }
7290                                 entry = first_entry->vme_next;
7291                         } else {
7292                                 entry = first_entry;
7293                         }
7294                 }
7295                 last_timestamp = map->timestamp;
7296
7297                 /*
7298                  * clear transition bit for all constituent entries that
7299                  * were in the original entry (saved in tmp_entry).  Also
7300                  * check for waiters.
7301                  */
7302                 while ((entry != vm_map_to_entry(map)) &&
7303                     (entry->vme_start < tmp_entry.vme_end)) {
7304                         assert(entry->in_transition);
7305                         entry->in_transition = FALSE;
7306                         if (entry->needs_wakeup) {
7307                                 entry->needs_wakeup = FALSE;
7308                                 need_wakeup = TRUE;
7309                         }
7310                         entry = entry->vme_next;
7311                 }
7312         }
7313
7314         /*
7315          * We might have fragmented the address space when we wired this
7316          * range of addresses.  Attempt to re-coalesce these VM map entries
7317          * with their neighbors now that they're no longer wired.
7318          * Under some circumstances, address space fragmentation can
7319          * prevent VM object shadow chain collapsing, which can cause
7320          * swap space leaks.
7321          */
7322         vm_map_simplify_range(map, start, end);
7323
7324         vm_map_unlock(map);
7325         /*
7326          * wake up anybody waiting on entries that we have unwired.
7327          */
7328         if (need_wakeup) {
7329                 vm_map_entry_wakeup(map);
7330         }
7331         return KERN_SUCCESS;
7332 }
7333
7334 kern_return_t
7335 vm_map_unwire(
7336         vm_map_t                map,
7337         vm_map_offset_t         start,
7338         vm_map_offset_t         end,
7339         boolean_t               user_wire)
7340 {
7341         return vm_map_unwire_nested(map, start, end,
7342                    user_wire, (pmap_t)NULL, 0);
7343 }
7344
7345
7346 /*
7347  *      vm_map_entry_delete:    [ internal use only ]
7348  *
7349  *      Deallocate the given entry from the target map.
7350  */
7351 static void
7352 vm_map_entry_delete(
7353         vm_map_t        map,
7354         vm_map_entry_t  entry)
7355 {
7356         vm_map_offset_t s, e;
7357         vm_object_t     object;
7358         vm_map_t        submap;
7359
7360         s = entry->vme_start;
7361         e = entry->vme_end;
7362         assert(page_aligned(s));
7363         assert(page_aligned(e));
7364         if (entry->map_aligned == TRUE) {
7365                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7366                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7367         }
7368         assert(entry->wired_count == 0);
7369         assert(entry->user_wired_count == 0);
7370         assert(!entry->permanent);
7371
7372         if (entry->is_sub_map) {
7373                 object = NULL;
7374                 submap = VME_SUBMAP(entry);
7375         } else {
7376                 submap = NULL;
7377                 object = VME_OBJECT(entry);
7378         }
7379
7380         vm_map_store_entry_unlink(map, entry);
7381         map->size -= e - s;
7382
7383         vm_map_entry_dispose(map, entry);
7384
7385         vm_map_unlock(map);
7386         /*
7387          *      Deallocate the object only after removing all
7388          *      pmap entries pointing to its pages.
7389          */
7390         if (submap) {
7391                 vm_map_deallocate(submap);
7392         } else {
7393                 vm_object_deallocate(object);
7394         }
7395 }
7396
7397 void
7398 vm_map_submap_pmap_clean(
7399         vm_map_t        map,
7400         vm_map_offset_t start,
7401         vm_map_offset_t end,
7402         vm_map_t        sub_map,
7403         vm_map_offset_t offset)
7404 {
7405         vm_map_offset_t submap_start;
7406         vm_map_offset_t submap_end;
7407         vm_map_size_t   remove_size;
7408         vm_map_entry_t  entry;
7409
7410         submap_end = offset + (end - start);
7411         submap_start = offset;
7412
7413         vm_map_lock_read(sub_map);
7414         if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7415                 remove_size = (entry->vme_end - entry->vme_start);
7416                 if (offset > entry->vme_start) {
7417                         remove_size -= offset - entry->vme_start;
7418                 }
7419
7420
7421                 if (submap_end < entry->vme_end) {
7422                         remove_size -=
7423                             entry->vme_end - submap_end;
7424                 }
7425                 if (entry->is_sub_map) {
7426                         vm_map_submap_pmap_clean(
7427                                 sub_map,
7428                                 start,
7429                                 start + remove_size,
7430                                 VME_SUBMAP(entry),
7431                                 VME_OFFSET(entry));
7432                 } else {
7433                         if (map->mapped_in_other_pmaps &&
7434                             os_ref_get_count(&map->map_refcnt) != 0 &&
7435                             VME_OBJECT(entry) != NULL) {
7436                                 vm_object_pmap_protect_options(
7437                                         VME_OBJECT(entry),
7438                                         (VME_OFFSET(entry) +
7439                                         offset -
7440                                         entry->vme_start),
7441                                         remove_size,
7442                                         PMAP_NULL,
7443                                         entry->vme_start,
7444                                         VM_PROT_NONE,
7445                                         PMAP_OPTIONS_REMOVE);
7446                         } else {
7447                                 pmap_remove(map->pmap,
7448                                     (addr64_t)start,
7449                                     (addr64_t)(start + remove_size));
7450                         }
7451                 }
7452         }
7453
7454         entry = entry->vme_next;
7455
7456         while ((entry != vm_map_to_entry(sub_map))
7457             && (entry->vme_start < submap_end)) {
7458                 remove_size = (entry->vme_end - entry->vme_start);
7459                 if (submap_end < entry->vme_end) {
7460                         remove_size -= entry->vme_end - submap_end;
7461                 }
7462                 if (entry->is_sub_map) {
7463                         vm_map_submap_pmap_clean(
7464                                 sub_map,
7465                                 (start + entry->vme_start) - offset,
7466                                 ((start + entry->vme_start) - offset) + remove_size,
7467                                 VME_SUBMAP(entry),
7468                                 VME_OFFSET(entry));
7469                 } else {
7470                         if (map->mapped_in_other_pmaps &&
7471                             os_ref_get_count(&map->map_refcnt) != 0 &&
7472                             VME_OBJECT(entry) != NULL) {
7473                                 vm_object_pmap_protect_options(
7474                                         VME_OBJECT(entry),
7475                                         VME_OFFSET(entry),
7476                                         remove_size,
7477                                         PMAP_NULL,
7478                                         entry->vme_start,
7479                                         VM_PROT_NONE,
7480                                         PMAP_OPTIONS_REMOVE);
7481                         } else {
7482                                 pmap_remove(map->pmap,
7483                                     (addr64_t)((start + entry->vme_start)
7484                                     - offset),
7485                                     (addr64_t)(((start + entry->vme_start)
7486                                     - offset) + remove_size));
7487                         }
7488                 }
7489                 entry = entry->vme_next;
7490         }
7491         vm_map_unlock_read(sub_map);
7492         return;
7493 }
7494
7495 /*
7496  *     virt_memory_guard_ast:
7497  *
7498  *     Handle the AST callout for a virtual memory guard.
7499  *         raise an EXC_GUARD exception and terminate the task
7500  *     if configured to do so.
7501  */
7502 void
7503 virt_memory_guard_ast(
7504         thread_t thread,
7505         mach_exception_data_type_t code,
7506         mach_exception_data_type_t subcode)
7507 {
7508         task_t task = thread->task;
7509         assert(task != kernel_task);
7510         assert(task == current_task());
7511         uint32_t behavior;
7512
7513         behavior = task->task_exc_guard;
7514
7515         /* Is delivery enabled */
7516         if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7517                 return;
7518         }
7519
7520         /* If only once, make sure we're that once */
7521         while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7522                 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7523
7524                 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7525                         break;
7526                 }
7527                 behavior = task->task_exc_guard;
7528                 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7529                         return;
7530                 }
7531         }
7532
7533         /* Raise exception via corpse fork or synchronously */
7534         if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7535             (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7536                 task_violated_guard(code, subcode, NULL);
7537         } else {
7538                 task_exception_notify(EXC_GUARD, code, subcode);
7539         }
7540
7541         /* Terminate the task if desired */
7542         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7543                 task_bsdtask_kill(current_task());
7544         }
7545 }
7546
7547 /*
7548  *     vm_map_guard_exception:
7549  *
7550  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7551  *
7552  *     Right now, we do this when we find nothing mapped, or a
7553  *     gap in the mapping when a user address space deallocate
7554  *     was requested. We report the address of the first gap found.
7555  */
7556 static void
7557 vm_map_guard_exception(
7558         vm_map_offset_t gap_start,
7559         unsigned reason)
7560 {
7561         mach_exception_code_t code = 0;
7562         unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7563         unsigned int target = 0; /* should we pass in pid associated with map? */
7564         mach_exception_data_type_t subcode = (uint64_t)gap_start;
7565         boolean_t fatal = FALSE;
7566
7567         task_t task = current_task();
7568
7569         /* Can't deliver exceptions to kernel task */
7570         if (task == kernel_task) {
7571                 return;
7572         }
7573
7574         EXC_GUARD_ENCODE_TYPE(code, guard_type);
7575         EXC_GUARD_ENCODE_FLAVOR(code, reason);
7576         EXC_GUARD_ENCODE_TARGET(code, target);
7577
7578         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7579                 fatal = TRUE;
7580         }
7581         thread_guard_violation(current_thread(), code, subcode, fatal);
7582 }
7583
7584 /*
7585  *      vm_map_delete:  [ internal use only ]
7586  *
7587  *      Deallocates the given address range from the target map.
7588  *      Removes all user wirings. Unwires one kernel wiring if
7589  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7590  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7591  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7592  *
7593  *      This routine is called with map locked and leaves map locked.
7594  */
7595 static kern_return_t
7596 vm_map_delete(
7597         vm_map_t                map,
7598         vm_map_offset_t         start,
7599         vm_map_offset_t         end,
7600         int                     flags,
7601         vm_map_t                zap_map)
7602 {
7603         vm_map_entry_t          entry, next;
7604         struct   vm_map_entry   *first_entry, tmp_entry;
7605         vm_map_offset_t         s;
7606         vm_object_t             object;
7607         boolean_t               need_wakeup;
7608         unsigned int            last_timestamp = ~0; /* unlikely value */
7609         int                     interruptible;
7610         vm_map_offset_t         gap_start;
7611         __unused vm_map_offset_t save_start = start;
7612         __unused vm_map_offset_t save_end = end;
7613         const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7614         const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7615
7616         if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7617                 gap_start = FIND_GAP;
7618         } else {
7619                 gap_start = GAPS_OK;
7620         }
7621
7622         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7623             THREAD_ABORTSAFE : THREAD_UNINT;
7624
7625         /*
7626          * All our DMA I/O operations in IOKit are currently done by
7627          * wiring through the map entries of the task requesting the I/O.
7628          * Because of this, we must always wait for kernel wirings
7629          * to go away on the entries before deleting them.
7630          *
7631          * Any caller who wants to actually remove a kernel wiring
7632          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7633          * properly remove one wiring instead of blasting through
7634          * them all.
7635          */
7636         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7637
7638         while (1) {
7639                 /*
7640                  *      Find the start of the region, and clip it
7641                  */
7642                 if (vm_map_lookup_entry(map, start, &first_entry)) {
7643                         entry = first_entry;
7644                         if (map == kalloc_map &&
7645                             (entry->vme_start != start ||
7646                             entry->vme_end != end)) {
7647                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7648                                     "mismatched entry %p [0x%llx:0x%llx]\n",
7649                                     map,
7650                                     (uint64_t)start,
7651                                     (uint64_t)end,
7652                                     entry,
7653                                     (uint64_t)entry->vme_start,
7654                                     (uint64_t)entry->vme_end);
7655                         }
7656
7657                         /*
7658                          * If in a superpage, extend the range to include the start of the mapping.
7659                          */
7660                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7661                                 start = SUPERPAGE_ROUND_DOWN(start);
7662                                 continue;
7663                         }
7664
7665                         if (start == entry->vme_start) {
7666                                 /*
7667                                  * No need to clip.  We don't want to cause
7668                                  * any unnecessary unnesting in this case...
7669                                  */
7670                         } else {
7671                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7672                                     entry->map_aligned &&
7673                                     !VM_MAP_PAGE_ALIGNED(
7674                                             start,
7675                                             VM_MAP_PAGE_MASK(map))) {
7676                                         /*
7677                                          * The entry will no longer be
7678                                          * map-aligned after clipping
7679                                          * and the caller said it's OK.
7680                                          */
7681                                         entry->map_aligned = FALSE;
7682                                 }
7683                                 if (map == kalloc_map) {
7684                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
7685                                             " clipping %p at 0x%llx\n",
7686                                             map,
7687                                             (uint64_t)start,
7688                                             (uint64_t)end,
7689                                             entry,
7690                                             (uint64_t)start);
7691                                 }
7692                                 vm_map_clip_start(map, entry, start);
7693                         }
7694
7695                         /*
7696                          *      Fix the lookup hint now, rather than each
7697                          *      time through the loop.
7698                          */
7699                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7700                 } else {
7701                         if (map->pmap == kernel_pmap &&
7702                             os_ref_get_count(&map->map_refcnt) != 0) {
7703                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7704                                     "no map entry at 0x%llx\n",
7705                                     map,
7706                                     (uint64_t)start,
7707                                     (uint64_t)end,
7708                                     (uint64_t)start);
7709                         }
7710                         entry = first_entry->vme_next;
7711                         if (gap_start == FIND_GAP) {
7712                                 gap_start = start;
7713                         }
7714                 }
7715                 break;
7716         }
7717         if (entry->superpage_size) {
7718                 end = SUPERPAGE_ROUND_UP(end);
7719         }
7720
7721         need_wakeup = FALSE;
7722         /*
7723          *      Step through all entries in this region
7724          */
7725         s = entry->vme_start;
7726         while ((entry != vm_map_to_entry(map)) && (s < end)) {
7727                 /*
7728                  * At this point, we have deleted all the memory entries
7729                  * between "start" and "s".  We still need to delete
7730                  * all memory entries between "s" and "end".
7731                  * While we were blocked and the map was unlocked, some
7732                  * new memory entries could have been re-allocated between
7733                  * "start" and "s" and we don't want to mess with those.
7734                  * Some of those entries could even have been re-assembled
7735                  * with an entry after "s" (in vm_map_simplify_entry()), so
7736                  * we may have to vm_map_clip_start() again.
7737                  */
7738
7739                 if (entry->vme_start >= s) {
7740                         /*
7741                          * This entry starts on or after "s"
7742                          * so no need to clip its start.
7743                          */
7744                 } else {
7745                         /*
7746                          * This entry has been re-assembled by a
7747                          * vm_map_simplify_entry().  We need to
7748                          * re-clip its start.
7749                          */
7750                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7751                             entry->map_aligned &&
7752                             !VM_MAP_PAGE_ALIGNED(s,
7753                             VM_MAP_PAGE_MASK(map))) {
7754                                 /*
7755                                  * The entry will no longer be map-aligned
7756                                  * after clipping and the caller said it's OK.
7757                                  */
7758                                 entry->map_aligned = FALSE;
7759                         }
7760                         if (map == kalloc_map) {
7761                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7762                                     "clipping %p at 0x%llx\n",
7763                                     map,
7764                                     (uint64_t)start,
7765                                     (uint64_t)end,
7766                                     entry,
7767                                     (uint64_t)s);
7768                         }
7769                         vm_map_clip_start(map, entry, s);
7770                 }
7771                 if (entry->vme_end <= end) {
7772                         /*
7773                          * This entry is going away completely, so no need
7774                          * to clip and possibly cause an unnecessary unnesting.
7775                          */
7776                 } else {
7777                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7778                             entry->map_aligned &&
7779                             !VM_MAP_PAGE_ALIGNED(end,
7780                             VM_MAP_PAGE_MASK(map))) {
7781                                 /*
7782                                  * The entry will no longer be map-aligned
7783                                  * after clipping and the caller said it's OK.
7784                                  */
7785                                 entry->map_aligned = FALSE;
7786                         }
7787                         if (map == kalloc_map) {
7788                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7789                                     "clipping %p at 0x%llx\n",
7790                                     map,
7791                                     (uint64_t)start,
7792                                     (uint64_t)end,
7793                                     entry,
7794                                     (uint64_t)end);
7795                         }
7796                         vm_map_clip_end(map, entry, end);
7797                 }
7798
7799                 if (entry->permanent) {
7800                         if (map->pmap == kernel_pmap) {
7801                                 panic("%s(%p,0x%llx,0x%llx): "
7802                                     "attempt to remove permanent "
7803                                     "VM map entry "
7804                                     "%p [0x%llx:0x%llx]\n",
7805                                     __FUNCTION__,
7806                                     map,
7807                                     (uint64_t) start,
7808                                     (uint64_t) end,
7809                                     entry,
7810                                     (uint64_t) entry->vme_start,
7811                                     (uint64_t) entry->vme_end);
7812                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7813 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7814                                 entry->permanent = FALSE;
7815 #if PMAP_CS
7816                         } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7817                                 entry->permanent = FALSE;
7818
7819                                 printf("%d[%s] %s(0x%llx,0x%llx): "
7820                                     "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7821                                     "prot 0x%x/0x%x\n",
7822                                     proc_selfpid(),
7823                                     (current_task()->bsd_info
7824                                     ? proc_name_address(current_task()->bsd_info)
7825                                     : "?"),
7826                                     __FUNCTION__,
7827                                     (uint64_t) start,
7828                                     (uint64_t) end,
7829                                     (uint64_t)entry->vme_start,
7830                                     (uint64_t)entry->vme_end,
7831                                     entry->protection,
7832                                     entry->max_protection);
7833 #endif
7834                         } else {
7835                                 if (vm_map_executable_immutable_verbose) {
7836                                         printf("%d[%s] %s(0x%llx,0x%llx): "
7837                                             "permanent entry [0x%llx:0x%llx] "
7838                                             "prot 0x%x/0x%x\n",
7839                                             proc_selfpid(),
7840                                             (current_task()->bsd_info
7841                                             ? proc_name_address(current_task()->bsd_info)
7842                                             : "?"),
7843                                             __FUNCTION__,
7844                                             (uint64_t) start,
7845                                             (uint64_t) end,
7846                                             (uint64_t)entry->vme_start,
7847                                             (uint64_t)entry->vme_end,
7848                                             entry->protection,
7849                                             entry->max_protection);
7850                                 }
7851                                 /*
7852                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7853                                  */
7854                                 DTRACE_VM5(vm_map_delete_permanent,
7855                                     vm_map_offset_t, entry->vme_start,
7856                                     vm_map_offset_t, entry->vme_end,
7857                                     vm_prot_t, entry->protection,
7858                                     vm_prot_t, entry->max_protection,
7859                                     int, VME_ALIAS(entry));
7860                         }
7861                 }
7862
7863
7864                 if (entry->in_transition) {
7865                         wait_result_t wait_result;
7866
7867                         /*
7868                          * Another thread is wiring/unwiring this entry.
7869                          * Let the other thread know we are waiting.
7870                          */
7871                         assert(s == entry->vme_start);
7872                         entry->needs_wakeup = TRUE;
7873
7874                         /*
7875                          * wake up anybody waiting on entries that we have
7876                          * already unwired/deleted.
7877                          */
7878                         if (need_wakeup) {
7879                                 vm_map_entry_wakeup(map);
7880                                 need_wakeup = FALSE;
7881                         }
7882
7883                         wait_result = vm_map_entry_wait(map, interruptible);
7884
7885                         if (interruptible &&
7886                             wait_result == THREAD_INTERRUPTED) {
7887                                 /*
7888                                  * We do not clear the needs_wakeup flag,
7889                                  * since we cannot tell if we were the only one.
7890                                  */
7891                                 return KERN_ABORTED;
7892                         }
7893
7894                         /*
7895                          * The entry could have been clipped or it
7896                          * may not exist anymore.  Look it up again.
7897                          */
7898                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
7899                                 /*
7900                                  * User: use the next entry
7901                                  */
7902                                 if (gap_start == FIND_GAP) {
7903                                         gap_start = s;
7904                                 }
7905                                 entry = first_entry->vme_next;
7906                                 s = entry->vme_start;
7907                         } else {
7908                                 entry = first_entry;
7909                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7910                         }
7911                         last_timestamp = map->timestamp;
7912                         continue;
7913                 } /* end in_transition */
7914
7915                 if (entry->wired_count) {
7916                         boolean_t       user_wire;
7917
7918                         user_wire = entry->user_wired_count > 0;
7919
7920                         /*
7921                          *      Remove a kernel wiring if requested
7922                          */
7923                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
7924                                 entry->wired_count--;
7925                         }
7926
7927                         /*
7928                          *      Remove all user wirings for proper accounting
7929                          */
7930                         if (entry->user_wired_count > 0) {
7931                                 while (entry->user_wired_count) {
7932                                         subtract_wire_counts(map, entry, user_wire);
7933                                 }
7934                         }
7935
7936                         if (entry->wired_count != 0) {
7937                                 assert(map != kernel_map);
7938                                 /*
7939                                  * Cannot continue.  Typical case is when
7940                                  * a user thread has physical io pending on
7941                                  * on this page.  Either wait for the
7942                                  * kernel wiring to go away or return an
7943                                  * error.
7944                                  */
7945                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7946                                         wait_result_t wait_result;
7947
7948                                         assert(s == entry->vme_start);
7949                                         entry->needs_wakeup = TRUE;
7950                                         wait_result = vm_map_entry_wait(map,
7951                                             interruptible);
7952
7953                                         if (interruptible &&
7954                                             wait_result == THREAD_INTERRUPTED) {
7955                                                 /*
7956                                                  * We do not clear the
7957                                                  * needs_wakeup flag, since we
7958                                                  * cannot tell if we were the
7959                                                  * only one.
7960                                                  */
7961                                                 return KERN_ABORTED;
7962                                         }
7963
7964                                         /*
7965                                          * The entry could have been clipped or
7966                                          * it may not exist anymore.  Look it
7967                                          * up again.
7968                                          */
7969                                         if (!vm_map_lookup_entry(map, s,
7970                                             &first_entry)) {
7971                                                 assert(map != kernel_map);
7972                                                 /*
7973                                                  * User: use the next entry
7974                                                  */
7975                                                 if (gap_start == FIND_GAP) {
7976                                                         gap_start = s;
7977                                                 }
7978                                                 entry = first_entry->vme_next;
7979                                                 s = entry->vme_start;
7980                                         } else {
7981                                                 entry = first_entry;
7982                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7983                                         }
7984                                         last_timestamp = map->timestamp;
7985                                         continue;
7986                                 } else {
7987                                         return KERN_FAILURE;
7988                                 }
7989                         }
7990
7991                         entry->in_transition = TRUE;
7992                         /*
7993                          * copy current entry.  see comment in vm_map_wire()
7994                          */
7995                         tmp_entry = *entry;
7996                         assert(s == entry->vme_start);
7997
7998                         /*
7999                          * We can unlock the map now. The in_transition
8000                          * state guarentees existance of the entry.
8001                          */
8002                         vm_map_unlock(map);
8003
8004                         if (tmp_entry.is_sub_map) {
8005                                 vm_map_t sub_map;
8006                                 vm_map_offset_t sub_start, sub_end;
8007                                 pmap_t pmap;
8008                                 vm_map_offset_t pmap_addr;
8009
8010
8011                                 sub_map = VME_SUBMAP(&tmp_entry);
8012                                 sub_start = VME_OFFSET(&tmp_entry);
8013                                 sub_end = sub_start + (tmp_entry.vme_end -
8014                                     tmp_entry.vme_start);
8015                                 if (tmp_entry.use_pmap) {
8016                                         pmap = sub_map->pmap;
8017                                         pmap_addr = tmp_entry.vme_start;
8018                                 } else {
8019                                         pmap = map->pmap;
8020                                         pmap_addr = tmp_entry.vme_start;
8021                                 }
8022                                 (void) vm_map_unwire_nested(sub_map,
8023                                     sub_start, sub_end,
8024                                     user_wire,
8025                                     pmap, pmap_addr);
8026                         } else {
8027                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8028                                         pmap_protect_options(
8029                                                 map->pmap,
8030                                                 tmp_entry.vme_start,
8031                                                 tmp_entry.vme_end,
8032                                                 VM_PROT_NONE,
8033                                                 PMAP_OPTIONS_REMOVE,
8034                                                 NULL);
8035                                 }
8036                                 vm_fault_unwire(map, &tmp_entry,
8037                                     VME_OBJECT(&tmp_entry) == kernel_object,
8038                                     map->pmap, tmp_entry.vme_start);
8039                         }
8040
8041                         vm_map_lock(map);
8042
8043                         if (last_timestamp + 1 != map->timestamp) {
8044                                 /*
8045                                  * Find the entry again.  It could have
8046                                  * been clipped after we unlocked the map.
8047                                  */
8048                                 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8049                                         assert((map != kernel_map) &&
8050                                             (!entry->is_sub_map));
8051                                         if (gap_start == FIND_GAP) {
8052                                                 gap_start = s;
8053                                         }
8054                                         first_entry = first_entry->vme_next;
8055                                         s = first_entry->vme_start;
8056                                 } else {
8057                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8058                                 }
8059                         } else {
8060                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8061                                 first_entry = entry;
8062                         }
8063
8064                         last_timestamp = map->timestamp;
8065
8066                         entry = first_entry;
8067                         while ((entry != vm_map_to_entry(map)) &&
8068                             (entry->vme_start < tmp_entry.vme_end)) {
8069                                 assert(entry->in_transition);
8070                                 entry->in_transition = FALSE;
8071                                 if (entry->needs_wakeup) {
8072                                         entry->needs_wakeup = FALSE;
8073                                         need_wakeup = TRUE;
8074                                 }
8075                                 entry = entry->vme_next;
8076                         }
8077                         /*
8078                          * We have unwired the entry(s).  Go back and
8079                          * delete them.
8080                          */
8081                         entry = first_entry;
8082                         continue;
8083                 }
8084
8085                 /* entry is unwired */
8086                 assert(entry->wired_count == 0);
8087                 assert(entry->user_wired_count == 0);
8088
8089                 assert(s == entry->vme_start);
8090
8091                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8092                         /*
8093                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8094                          * vm_map_delete(), some map entries might have been
8095                          * transferred to a "zap_map", which doesn't have a
8096                          * pmap.  The original pmap has already been flushed
8097                          * in the vm_map_delete() call targeting the original
8098                          * map, but when we get to destroying the "zap_map",
8099                          * we don't have any pmap to flush, so let's just skip
8100                          * all this.
8101                          */
8102                 } else if (entry->is_sub_map) {
8103                         if (entry->use_pmap) {
8104 #ifndef NO_NESTED_PMAP
8105                                 int pmap_flags;
8106
8107                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8108                                         /*
8109                                          * This is the final cleanup of the
8110                                          * address space being terminated.
8111                                          * No new mappings are expected and
8112                                          * we don't really need to unnest the
8113                                          * shared region (and lose the "global"
8114                                          * pmap mappings, if applicable).
8115                                          *
8116                                          * Tell the pmap layer that we're
8117                                          * "clean" wrt nesting.
8118                                          */
8119                                         pmap_flags = PMAP_UNNEST_CLEAN;
8120                                 } else {
8121                                         /*
8122                                          * We're unmapping part of the nested
8123                                          * shared region, so we can't keep the
8124                                          * nested pmap.
8125                                          */
8126                                         pmap_flags = 0;
8127                                 }
8128                                 pmap_unnest_options(
8129                                         map->pmap,
8130                                         (addr64_t)entry->vme_start,
8131                                         entry->vme_end - entry->vme_start,
8132                                         pmap_flags);
8133 #endif  /* NO_NESTED_PMAP */
8134                                 if (map->mapped_in_other_pmaps &&
8135                                     os_ref_get_count(&map->map_refcnt) != 0) {
8136                                         /* clean up parent map/maps */
8137                                         vm_map_submap_pmap_clean(
8138                                                 map, entry->vme_start,
8139                                                 entry->vme_end,
8140                                                 VME_SUBMAP(entry),
8141                                                 VME_OFFSET(entry));
8142                                 }
8143                         } else {
8144                                 vm_map_submap_pmap_clean(
8145                                         map, entry->vme_start, entry->vme_end,
8146                                         VME_SUBMAP(entry),
8147                                         VME_OFFSET(entry));
8148                         }
8149                 } else if (VME_OBJECT(entry) != kernel_object &&
8150                     VME_OBJECT(entry) != compressor_object) {
8151                         object = VME_OBJECT(entry);
8152                         if (map->mapped_in_other_pmaps &&
8153                             os_ref_get_count(&map->map_refcnt) != 0) {
8154                                 vm_object_pmap_protect_options(
8155                                         object, VME_OFFSET(entry),
8156                                         entry->vme_end - entry->vme_start,
8157                                         PMAP_NULL,
8158                                         entry->vme_start,
8159                                         VM_PROT_NONE,
8160                                         PMAP_OPTIONS_REMOVE);
8161                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8162                             (map->pmap == kernel_pmap)) {
8163                                 /* Remove translations associated
8164                                  * with this range unless the entry
8165                                  * does not have an object, or
8166                                  * it's the kernel map or a descendant
8167                                  * since the platform could potentially
8168                                  * create "backdoor" mappings invisible
8169                                  * to the VM. It is expected that
8170                                  * objectless, non-kernel ranges
8171                                  * do not have such VM invisible
8172                                  * translations.
8173                                  */
8174                                 pmap_remove_options(map->pmap,
8175                                     (addr64_t)entry->vme_start,
8176                                     (addr64_t)entry->vme_end,
8177                                     PMAP_OPTIONS_REMOVE);
8178                         }
8179                 }
8180
8181                 if (entry->iokit_acct) {
8182                         /* alternate accounting */
8183                         DTRACE_VM4(vm_map_iokit_unmapped_region,
8184                             vm_map_t, map,
8185                             vm_map_offset_t, entry->vme_start,
8186                             vm_map_offset_t, entry->vme_end,
8187                             int, VME_ALIAS(entry));
8188                         vm_map_iokit_unmapped_region(map,
8189                             (entry->vme_end -
8190                             entry->vme_start));
8191                         entry->iokit_acct = FALSE;
8192                         entry->use_pmap = FALSE;
8193                 }
8194
8195                 /*
8196                  * All pmap mappings for this map entry must have been
8197                  * cleared by now.
8198                  */
8199 #if DEBUG
8200                 assert(vm_map_pmap_is_empty(map,
8201                     entry->vme_start,
8202                     entry->vme_end));
8203 #endif /* DEBUG */
8204
8205                 next = entry->vme_next;
8206
8207                 if (map->pmap == kernel_pmap &&
8208                     os_ref_get_count(&map->map_refcnt) != 0 &&
8209                     entry->vme_end < end &&
8210                     (next == vm_map_to_entry(map) ||
8211                     next->vme_start != entry->vme_end)) {
8212                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
8213                             "hole after %p at 0x%llx\n",
8214                             map,
8215                             (uint64_t)start,
8216                             (uint64_t)end,
8217                             entry,
8218                             (uint64_t)entry->vme_end);
8219                 }
8220
8221                 /*
8222                  * If the desired range didn't end with "entry", then there is a gap if
8223                  * we wrapped around to the start of the map or if "entry" and "next"
8224                  * aren't contiguous.
8225                  *
8226                  * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8227                  * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8228                  */
8229                 if (gap_start == FIND_GAP &&
8230                     vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8231                     (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8232                         gap_start = entry->vme_end;
8233                 }
8234                 s = next->vme_start;
8235                 last_timestamp = map->timestamp;
8236
8237                 if (entry->permanent) {
8238                         /*
8239                          * A permanent entry can not be removed, so leave it
8240                          * in place but remove all access permissions.
8241                          */
8242                         entry->protection = VM_PROT_NONE;
8243                         entry->max_protection = VM_PROT_NONE;
8244                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8245                     zap_map != VM_MAP_NULL) {
8246                         vm_map_size_t entry_size;
8247                         /*
8248                          * The caller wants to save the affected VM map entries
8249                          * into the "zap_map".  The caller will take care of
8250                          * these entries.
8251                          */
8252                         /* unlink the entry from "map" ... */
8253                         vm_map_store_entry_unlink(map, entry);
8254                         /* ... and add it to the end of the "zap_map" */
8255                         vm_map_store_entry_link(zap_map,
8256                             vm_map_last_entry(zap_map),
8257                             entry,
8258                             VM_MAP_KERNEL_FLAGS_NONE);
8259                         entry_size = entry->vme_end - entry->vme_start;
8260                         map->size -= entry_size;
8261                         zap_map->size += entry_size;
8262                         /* we didn't unlock the map, so no timestamp increase */
8263                         last_timestamp--;
8264                 } else {
8265                         vm_map_entry_delete(map, entry);
8266                         /* vm_map_entry_delete unlocks the map */
8267                         vm_map_lock(map);
8268                 }
8269
8270                 entry = next;
8271
8272                 if (entry == vm_map_to_entry(map)) {
8273                         break;
8274                 }
8275                 if (last_timestamp + 1 != map->timestamp) {
8276                         /*
8277                          * We are responsible for deleting everything
8278                          * from the given space. If someone has interfered,
8279                          * we pick up where we left off. Back fills should
8280                          * be all right for anyone, except map_delete, and
8281                          * we have to assume that the task has been fully
8282                          * disabled before we get here
8283                          */
8284                         if (!vm_map_lookup_entry(map, s, &entry)) {
8285                                 entry = entry->vme_next;
8286
8287                                 /*
8288                                  * Nothing found for s. If we weren't already done, then there is a gap.
8289                                  */
8290                                 if (gap_start == FIND_GAP && s < end) {
8291                                         gap_start = s;
8292                                 }
8293                                 s = entry->vme_start;
8294                         } else {
8295                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8296                         }
8297                         /*
8298                          * others can not only allocate behind us, we can
8299                          * also see coalesce while we don't have the map lock
8300                          */
8301                         if (entry == vm_map_to_entry(map)) {
8302                                 break;
8303                         }
8304                 }
8305                 last_timestamp = map->timestamp;
8306         }
8307
8308         if (map->wait_for_space) {
8309                 thread_wakeup((event_t) map);
8310         }
8311         /*
8312          * wake up anybody waiting on entries that we have already deleted.
8313          */
8314         if (need_wakeup) {
8315                 vm_map_entry_wakeup(map);
8316         }
8317
8318         if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8319                 DTRACE_VM3(kern_vm_deallocate_gap,
8320                     vm_map_offset_t, gap_start,
8321                     vm_map_offset_t, save_start,
8322                     vm_map_offset_t, save_end);
8323                 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8324                         vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8325                 }
8326         }
8327
8328         return KERN_SUCCESS;
8329 }
8330
8331
8332 /*
8333  *      vm_map_terminate:
8334  *
8335  *      Clean out a task's map.
8336  */
8337 kern_return_t
8338 vm_map_terminate(
8339         vm_map_t        map)
8340 {
8341         vm_map_lock(map);
8342         map->terminated = TRUE;
8343         vm_map_unlock(map);
8344
8345         return vm_map_remove(map,
8346                    map->min_offset,
8347                    map->max_offset,
8348                    /*
8349                     * Final cleanup:
8350                     * + no unnesting
8351                     * + remove immutable mappings
8352                     * + allow gaps in range
8353                     */
8354                    (VM_MAP_REMOVE_NO_UNNESTING |
8355                    VM_MAP_REMOVE_IMMUTABLE |
8356                    VM_MAP_REMOVE_GAPS_OK));
8357 }
8358
8359 /*
8360  *      vm_map_remove:
8361  *
8362  *      Remove the given address range from the target map.
8363  *      This is the exported form of vm_map_delete.
8364  */
8365 kern_return_t
8366 vm_map_remove(
8367         vm_map_t        map,
8368         vm_map_offset_t start,
8369         vm_map_offset_t end,
8370         boolean_t      flags)
8371 {
8372         kern_return_t   result;
8373
8374         vm_map_lock(map);
8375         VM_MAP_RANGE_CHECK(map, start, end);
8376         /*
8377          * For the zone_map, the kernel controls the allocation/freeing of memory.
8378          * Any free to the zone_map should be within the bounds of the map and
8379          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8380          * free to the zone_map into a no-op, there is a problem and we should
8381          * panic.
8382          */
8383         if ((map == zone_map) && (start == end)) {
8384                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8385         }
8386         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8387         vm_map_unlock(map);
8388
8389         return result;
8390 }
8391
8392 /*
8393  *      vm_map_remove_locked:
8394  *
8395  *      Remove the given address range from the target locked map.
8396  *      This is the exported form of vm_map_delete.
8397  */
8398 kern_return_t
8399 vm_map_remove_locked(
8400         vm_map_t        map,
8401         vm_map_offset_t start,
8402         vm_map_offset_t end,
8403         boolean_t       flags)
8404 {
8405         kern_return_t   result;
8406
8407         VM_MAP_RANGE_CHECK(map, start, end);
8408         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8409         return result;
8410 }
8411
8412
8413 /*
8414  *      Routine:        vm_map_copy_allocate
8415  *
8416  *      Description:
8417  *              Allocates and initializes a map copy object.
8418  */
8419 static vm_map_copy_t
8420 vm_map_copy_allocate(void)
8421 {
8422         vm_map_copy_t new_copy;
8423
8424         new_copy = zalloc(vm_map_copy_zone);
8425         bzero(new_copy, sizeof(*new_copy));
8426         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8427         vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8428         vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8429         return new_copy;
8430 }
8431
8432 /*
8433  *      Routine:        vm_map_copy_discard
8434  *
8435  *      Description:
8436  *              Dispose of a map copy object (returned by
8437  *              vm_map_copyin).
8438  */
8439 void
8440 vm_map_copy_discard(
8441         vm_map_copy_t   copy)
8442 {
8443         if (copy == VM_MAP_COPY_NULL) {
8444                 return;
8445         }
8446
8447         switch (copy->type) {
8448         case VM_MAP_COPY_ENTRY_LIST:
8449                 while (vm_map_copy_first_entry(copy) !=
8450                     vm_map_copy_to_entry(copy)) {
8451                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8452
8453                         vm_map_copy_entry_unlink(copy, entry);
8454                         if (entry->is_sub_map) {
8455                                 vm_map_deallocate(VME_SUBMAP(entry));
8456                         } else {
8457                                 vm_object_deallocate(VME_OBJECT(entry));
8458                         }
8459                         vm_map_copy_entry_dispose(copy, entry);
8460                 }
8461                 break;
8462         case VM_MAP_COPY_OBJECT:
8463                 vm_object_deallocate(copy->cpy_object);
8464                 break;
8465         case VM_MAP_COPY_KERNEL_BUFFER:
8466
8467                 /*
8468                  * The vm_map_copy_t and possibly the data buffer were
8469                  * allocated by a single call to kalloc(), i.e. the
8470                  * vm_map_copy_t was not allocated out of the zone.
8471                  */
8472                 if (copy->size > msg_ool_size_small || copy->offset) {
8473                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8474                             (long long)copy->size, (long long)copy->offset);
8475                 }
8476                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8477                 return;
8478         }
8479         zfree(vm_map_copy_zone, copy);
8480 }
8481
8482 /*
8483  *      Routine:        vm_map_copy_copy
8484  *
8485  *      Description:
8486  *                      Move the information in a map copy object to
8487  *                      a new map copy object, leaving the old one
8488  *                      empty.
8489  *
8490  *                      This is used by kernel routines that need
8491  *                      to look at out-of-line data (in copyin form)
8492  *                      before deciding whether to return SUCCESS.
8493  *                      If the routine returns FAILURE, the original
8494  *                      copy object will be deallocated; therefore,
8495  *                      these routines must make a copy of the copy
8496  *                      object and leave the original empty so that
8497  *                      deallocation will not fail.
8498  */
8499 vm_map_copy_t
8500 vm_map_copy_copy(
8501         vm_map_copy_t   copy)
8502 {
8503         vm_map_copy_t   new_copy;
8504
8505         if (copy == VM_MAP_COPY_NULL) {
8506                 return VM_MAP_COPY_NULL;
8507         }
8508
8509         /*
8510          * Allocate a new copy object, and copy the information
8511          * from the old one into it.
8512          */
8513
8514         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8515         *new_copy = *copy;
8516
8517         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8518                 /*
8519                  * The links in the entry chain must be
8520                  * changed to point to the new copy object.
8521                  */
8522                 vm_map_copy_first_entry(copy)->vme_prev
8523                         = vm_map_copy_to_entry(new_copy);
8524                 vm_map_copy_last_entry(copy)->vme_next
8525                         = vm_map_copy_to_entry(new_copy);
8526         }
8527
8528         /*
8529          * Change the old copy object into one that contains
8530          * nothing to be deallocated.
8531          */
8532         copy->type = VM_MAP_COPY_OBJECT;
8533         copy->cpy_object = VM_OBJECT_NULL;
8534
8535         /*
8536          * Return the new object.
8537          */
8538         return new_copy;
8539 }
8540
8541 static kern_return_t
8542 vm_map_overwrite_submap_recurse(
8543         vm_map_t        dst_map,
8544         vm_map_offset_t dst_addr,
8545         vm_map_size_t   dst_size)
8546 {
8547         vm_map_offset_t dst_end;
8548         vm_map_entry_t  tmp_entry;
8549         vm_map_entry_t  entry;
8550         kern_return_t   result;
8551         boolean_t       encountered_sub_map = FALSE;
8552
8553
8554
8555         /*
8556          *      Verify that the destination is all writeable
8557          *      initially.  We have to trunc the destination
8558          *      address and round the copy size or we'll end up
8559          *      splitting entries in strange ways.
8560          */
8561
8562         dst_end = vm_map_round_page(dst_addr + dst_size,
8563             VM_MAP_PAGE_MASK(dst_map));
8564         vm_map_lock(dst_map);
8565
8566 start_pass_1:
8567         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8568                 vm_map_unlock(dst_map);
8569                 return KERN_INVALID_ADDRESS;
8570         }
8571
8572         vm_map_clip_start(dst_map,
8573             tmp_entry,
8574             vm_map_trunc_page(dst_addr,
8575             VM_MAP_PAGE_MASK(dst_map)));
8576         if (tmp_entry->is_sub_map) {
8577                 /* clipping did unnest if needed */
8578                 assert(!tmp_entry->use_pmap);
8579         }
8580
8581         for (entry = tmp_entry;;) {
8582                 vm_map_entry_t  next;
8583
8584                 next = entry->vme_next;
8585                 while (entry->is_sub_map) {
8586                         vm_map_offset_t sub_start;
8587                         vm_map_offset_t sub_end;
8588                         vm_map_offset_t local_end;
8589
8590                         if (entry->in_transition) {
8591                                 /*
8592                                  * Say that we are waiting, and wait for entry.
8593                                  */
8594                                 entry->needs_wakeup = TRUE;
8595                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8596
8597                                 goto start_pass_1;
8598                         }
8599
8600                         encountered_sub_map = TRUE;
8601                         sub_start = VME_OFFSET(entry);
8602
8603                         if (entry->vme_end < dst_end) {
8604                                 sub_end = entry->vme_end;
8605                         } else {
8606                                 sub_end = dst_end;
8607                         }
8608                         sub_end -= entry->vme_start;
8609                         sub_end += VME_OFFSET(entry);
8610                         local_end = entry->vme_end;
8611                         vm_map_unlock(dst_map);
8612
8613                         result = vm_map_overwrite_submap_recurse(
8614                                 VME_SUBMAP(entry),
8615                                 sub_start,
8616                                 sub_end - sub_start);
8617
8618                         if (result != KERN_SUCCESS) {
8619                                 return result;
8620                         }
8621                         if (dst_end <= entry->vme_end) {
8622                                 return KERN_SUCCESS;
8623                         }
8624                         vm_map_lock(dst_map);
8625                         if (!vm_map_lookup_entry(dst_map, local_end,
8626                             &tmp_entry)) {
8627                                 vm_map_unlock(dst_map);
8628                                 return KERN_INVALID_ADDRESS;
8629                         }
8630                         entry = tmp_entry;
8631                         next = entry->vme_next;
8632                 }
8633
8634                 if (!(entry->protection & VM_PROT_WRITE)) {
8635                         vm_map_unlock(dst_map);
8636                         return KERN_PROTECTION_FAILURE;
8637                 }
8638
8639                 /*
8640                  *      If the entry is in transition, we must wait
8641                  *      for it to exit that state.  Anything could happen
8642                  *      when we unlock the map, so start over.
8643                  */
8644                 if (entry->in_transition) {
8645                         /*
8646                          * Say that we are waiting, and wait for entry.
8647                          */
8648                         entry->needs_wakeup = TRUE;
8649                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8650
8651                         goto start_pass_1;
8652                 }
8653
8654 /*
8655  *              our range is contained completely within this map entry
8656  */
8657                 if (dst_end <= entry->vme_end) {
8658                         vm_map_unlock(dst_map);
8659                         return KERN_SUCCESS;
8660                 }
8661 /*
8662  *              check that range specified is contiguous region
8663  */
8664                 if ((next == vm_map_to_entry(dst_map)) ||
8665                     (next->vme_start != entry->vme_end)) {
8666                         vm_map_unlock(dst_map);
8667                         return KERN_INVALID_ADDRESS;
8668                 }
8669
8670                 /*
8671                  *      Check for permanent objects in the destination.
8672                  */
8673                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8674                     ((!VME_OBJECT(entry)->internal) ||
8675                     (VME_OBJECT(entry)->true_share))) {
8676                         if (encountered_sub_map) {
8677                                 vm_map_unlock(dst_map);
8678                                 return KERN_FAILURE;
8679                         }
8680                 }
8681
8682
8683                 entry = next;
8684         }/* for */
8685         vm_map_unlock(dst_map);
8686         return KERN_SUCCESS;
8687 }
8688
8689 /*
8690  *      Routine:        vm_map_copy_overwrite
8691  *
8692  *      Description:
8693  *              Copy the memory described by the map copy
8694  *              object (copy; returned by vm_map_copyin) onto
8695  *              the specified destination region (dst_map, dst_addr).
8696  *              The destination must be writeable.
8697  *
8698  *              Unlike vm_map_copyout, this routine actually
8699  *              writes over previously-mapped memory.  If the
8700  *              previous mapping was to a permanent (user-supplied)
8701  *              memory object, it is preserved.
8702  *
8703  *              The attributes (protection and inheritance) of the
8704  *              destination region are preserved.
8705  *
8706  *              If successful, consumes the copy object.
8707  *              Otherwise, the caller is responsible for it.
8708  *
8709  *      Implementation notes:
8710  *              To overwrite aligned temporary virtual memory, it is
8711  *              sufficient to remove the previous mapping and insert
8712  *              the new copy.  This replacement is done either on
8713  *              the whole region (if no permanent virtual memory
8714  *              objects are embedded in the destination region) or
8715  *              in individual map entries.
8716  *
8717  *              To overwrite permanent virtual memory , it is necessary
8718  *              to copy each page, as the external memory management
8719  *              interface currently does not provide any optimizations.
8720  *
8721  *              Unaligned memory also has to be copied.  It is possible
8722  *              to use 'vm_trickery' to copy the aligned data.  This is
8723  *              not done but not hard to implement.
8724  *
8725  *              Once a page of permanent memory has been overwritten,
8726  *              it is impossible to interrupt this function; otherwise,
8727  *              the call would be neither atomic nor location-independent.
8728  *              The kernel-state portion of a user thread must be
8729  *              interruptible.
8730  *
8731  *              It may be expensive to forward all requests that might
8732  *              overwrite permanent memory (vm_write, vm_copy) to
8733  *              uninterruptible kernel threads.  This routine may be
8734  *              called by interruptible threads; however, success is
8735  *              not guaranteed -- if the request cannot be performed
8736  *              atomically and interruptibly, an error indication is
8737  *              returned.
8738  */
8739
8740 static kern_return_t
8741 vm_map_copy_overwrite_nested(
8742         vm_map_t                dst_map,
8743         vm_map_address_t        dst_addr,
8744         vm_map_copy_t           copy,
8745         boolean_t               interruptible,
8746         pmap_t                  pmap,
8747         boolean_t               discard_on_success)
8748 {
8749         vm_map_offset_t         dst_end;
8750         vm_map_entry_t          tmp_entry;
8751         vm_map_entry_t          entry;
8752         kern_return_t           kr;
8753         boolean_t               aligned = TRUE;
8754         boolean_t               contains_permanent_objects = FALSE;
8755         boolean_t               encountered_sub_map = FALSE;
8756         vm_map_offset_t         base_addr;
8757         vm_map_size_t           copy_size;
8758         vm_map_size_t           total_size;
8759
8760
8761         /*
8762          *      Check for null copy object.
8763          */
8764
8765         if (copy == VM_MAP_COPY_NULL) {
8766                 return KERN_SUCCESS;
8767         }
8768
8769         /*
8770          *      Check for special kernel buffer allocated
8771          *      by new_ipc_kmsg_copyin.
8772          */
8773
8774         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8775                 return vm_map_copyout_kernel_buffer(
8776                         dst_map, &dst_addr,
8777                         copy, copy->size, TRUE, discard_on_success);
8778         }
8779
8780         /*
8781          *      Only works for entry lists at the moment.  Will
8782          *      support page lists later.
8783          */
8784
8785         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8786
8787         if (copy->size == 0) {
8788                 if (discard_on_success) {
8789                         vm_map_copy_discard(copy);
8790                 }
8791                 return KERN_SUCCESS;
8792         }
8793
8794         /*
8795          *      Verify that the destination is all writeable
8796          *      initially.  We have to trunc the destination
8797          *      address and round the copy size or we'll end up
8798          *      splitting entries in strange ways.
8799          */
8800
8801         if (!VM_MAP_PAGE_ALIGNED(copy->size,
8802             VM_MAP_PAGE_MASK(dst_map)) ||
8803             !VM_MAP_PAGE_ALIGNED(copy->offset,
8804             VM_MAP_PAGE_MASK(dst_map)) ||
8805             !VM_MAP_PAGE_ALIGNED(dst_addr,
8806             VM_MAP_PAGE_MASK(dst_map))) {
8807                 aligned = FALSE;
8808                 dst_end = vm_map_round_page(dst_addr + copy->size,
8809                     VM_MAP_PAGE_MASK(dst_map));
8810         } else {
8811                 dst_end = dst_addr + copy->size;
8812         }
8813
8814         vm_map_lock(dst_map);
8815
8816         /* LP64todo - remove this check when vm_map_commpage64()
8817          * no longer has to stuff in a map_entry for the commpage
8818          * above the map's max_offset.
8819          */
8820         if (dst_addr >= dst_map->max_offset) {
8821                 vm_map_unlock(dst_map);
8822                 return KERN_INVALID_ADDRESS;
8823         }
8824
8825 start_pass_1:
8826         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8827                 vm_map_unlock(dst_map);
8828                 return KERN_INVALID_ADDRESS;
8829         }
8830         vm_map_clip_start(dst_map,
8831             tmp_entry,
8832             vm_map_trunc_page(dst_addr,
8833             VM_MAP_PAGE_MASK(dst_map)));
8834         for (entry = tmp_entry;;) {
8835                 vm_map_entry_t  next = entry->vme_next;
8836
8837                 while (entry->is_sub_map) {
8838                         vm_map_offset_t sub_start;
8839                         vm_map_offset_t sub_end;
8840                         vm_map_offset_t local_end;
8841
8842                         if (entry->in_transition) {
8843                                 /*
8844                                  * Say that we are waiting, and wait for entry.
8845                                  */
8846                                 entry->needs_wakeup = TRUE;
8847                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8848
8849                                 goto start_pass_1;
8850                         }
8851
8852                         local_end = entry->vme_end;
8853                         if (!(entry->needs_copy)) {
8854                                 /* if needs_copy we are a COW submap */
8855                                 /* in such a case we just replace so */
8856                                 /* there is no need for the follow-  */
8857                                 /* ing check.                        */
8858                                 encountered_sub_map = TRUE;
8859                                 sub_start = VME_OFFSET(entry);
8860
8861                                 if (entry->vme_end < dst_end) {
8862                                         sub_end = entry->vme_end;
8863                                 } else {
8864                                         sub_end = dst_end;
8865                                 }
8866                                 sub_end -= entry->vme_start;
8867                                 sub_end += VME_OFFSET(entry);
8868                                 vm_map_unlock(dst_map);
8869
8870                                 kr = vm_map_overwrite_submap_recurse(
8871                                         VME_SUBMAP(entry),
8872                                         sub_start,
8873                                         sub_end - sub_start);
8874                                 if (kr != KERN_SUCCESS) {
8875                                         return kr;
8876                                 }
8877                                 vm_map_lock(dst_map);
8878                         }
8879
8880                         if (dst_end <= entry->vme_end) {
8881                                 goto start_overwrite;
8882                         }
8883                         if (!vm_map_lookup_entry(dst_map, local_end,
8884                             &entry)) {
8885                                 vm_map_unlock(dst_map);
8886                                 return KERN_INVALID_ADDRESS;
8887                         }
8888                         next = entry->vme_next;
8889                 }
8890
8891                 if (!(entry->protection & VM_PROT_WRITE)) {
8892                         vm_map_unlock(dst_map);
8893                         return KERN_PROTECTION_FAILURE;
8894                 }
8895
8896                 /*
8897                  *      If the entry is in transition, we must wait
8898                  *      for it to exit that state.  Anything could happen
8899                  *      when we unlock the map, so start over.
8900                  */
8901                 if (entry->in_transition) {
8902                         /*
8903                          * Say that we are waiting, and wait for entry.
8904                          */
8905                         entry->needs_wakeup = TRUE;
8906                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8907
8908                         goto start_pass_1;
8909                 }
8910
8911 /*
8912  *              our range is contained completely within this map entry
8913  */
8914                 if (dst_end <= entry->vme_end) {
8915                         break;
8916                 }
8917 /*
8918  *              check that range specified is contiguous region
8919  */
8920                 if ((next == vm_map_to_entry(dst_map)) ||
8921                     (next->vme_start != entry->vme_end)) {
8922                         vm_map_unlock(dst_map);
8923                         return KERN_INVALID_ADDRESS;
8924                 }
8925
8926
8927                 /*
8928                  *      Check for permanent objects in the destination.
8929                  */
8930                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8931                     ((!VME_OBJECT(entry)->internal) ||
8932                     (VME_OBJECT(entry)->true_share))) {
8933                         contains_permanent_objects = TRUE;
8934                 }
8935
8936                 entry = next;
8937         }/* for */
8938
8939 start_overwrite:
8940         /*
8941          *      If there are permanent objects in the destination, then
8942          *      the copy cannot be interrupted.
8943          */
8944
8945         if (interruptible && contains_permanent_objects) {
8946                 vm_map_unlock(dst_map);
8947                 return KERN_FAILURE;   /* XXX */
8948         }
8949
8950         /*
8951          *
8952          *      Make a second pass, overwriting the data
8953          *      At the beginning of each loop iteration,
8954          *      the next entry to be overwritten is "tmp_entry"
8955          *      (initially, the value returned from the lookup above),
8956          *      and the starting address expected in that entry
8957          *      is "start".
8958          */
8959
8960         total_size = copy->size;
8961         if (encountered_sub_map) {
8962                 copy_size = 0;
8963                 /* re-calculate tmp_entry since we've had the map */
8964                 /* unlocked */
8965                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8966                         vm_map_unlock(dst_map);
8967                         return KERN_INVALID_ADDRESS;
8968                 }
8969         } else {
8970                 copy_size = copy->size;
8971         }
8972
8973         base_addr = dst_addr;
8974         while (TRUE) {
8975                 /* deconstruct the copy object and do in parts */
8976                 /* only in sub_map, interruptable case */
8977                 vm_map_entry_t  copy_entry;
8978                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
8979                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
8980                 int             nentries;
8981                 int             remaining_entries = 0;
8982                 vm_map_offset_t new_offset = 0;
8983
8984                 for (entry = tmp_entry; copy_size == 0;) {
8985                         vm_map_entry_t  next;
8986
8987                         next = entry->vme_next;
8988
8989                         /* tmp_entry and base address are moved along */
8990                         /* each time we encounter a sub-map.  Otherwise */
8991                         /* entry can outpase tmp_entry, and the copy_size */
8992                         /* may reflect the distance between them */
8993                         /* if the current entry is found to be in transition */
8994                         /* we will start over at the beginning or the last */
8995                         /* encounter of a submap as dictated by base_addr */
8996                         /* we will zero copy_size accordingly. */
8997                         if (entry->in_transition) {
8998                                 /*
8999                                  * Say that we are waiting, and wait for entry.
9000                                  */
9001                                 entry->needs_wakeup = TRUE;
9002                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9003
9004                                 if (!vm_map_lookup_entry(dst_map, base_addr,
9005                                     &tmp_entry)) {
9006                                         vm_map_unlock(dst_map);
9007                                         return KERN_INVALID_ADDRESS;
9008                                 }
9009                                 copy_size = 0;
9010                                 entry = tmp_entry;
9011                                 continue;
9012                         }
9013                         if (entry->is_sub_map) {
9014                                 vm_map_offset_t sub_start;
9015                                 vm_map_offset_t sub_end;
9016                                 vm_map_offset_t local_end;
9017
9018                                 if (entry->needs_copy) {
9019                                         /* if this is a COW submap */
9020                                         /* just back the range with a */
9021                                         /* anonymous entry */
9022                                         if (entry->vme_end < dst_end) {
9023                                                 sub_end = entry->vme_end;
9024                                         } else {
9025                                                 sub_end = dst_end;
9026                                         }
9027                                         if (entry->vme_start < base_addr) {
9028                                                 sub_start = base_addr;
9029                                         } else {
9030                                                 sub_start = entry->vme_start;
9031                                         }
9032                                         vm_map_clip_end(
9033                                                 dst_map, entry, sub_end);
9034                                         vm_map_clip_start(
9035                                                 dst_map, entry, sub_start);
9036                                         assert(!entry->use_pmap);
9037                                         assert(!entry->iokit_acct);
9038                                         entry->use_pmap = TRUE;
9039                                         entry->is_sub_map = FALSE;
9040                                         vm_map_deallocate(
9041                                                 VME_SUBMAP(entry));
9042                                         VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9043                                         VME_OFFSET_SET(entry, 0);
9044                                         entry->is_shared = FALSE;
9045                                         entry->needs_copy = FALSE;
9046                                         entry->protection = VM_PROT_DEFAULT;
9047                                         entry->max_protection = VM_PROT_ALL;
9048                                         entry->wired_count = 0;
9049                                         entry->user_wired_count = 0;
9050                                         if (entry->inheritance
9051                                             == VM_INHERIT_SHARE) {
9052                                                 entry->inheritance = VM_INHERIT_COPY;
9053                                         }
9054                                         continue;
9055                                 }
9056                                 /* first take care of any non-sub_map */
9057                                 /* entries to send */
9058                                 if (base_addr < entry->vme_start) {
9059                                         /* stuff to send */
9060                                         copy_size =
9061                                             entry->vme_start - base_addr;
9062                                         break;
9063                                 }
9064                                 sub_start = VME_OFFSET(entry);
9065
9066                                 if (entry->vme_end < dst_end) {
9067                                         sub_end = entry->vme_end;
9068                                 } else {
9069                                         sub_end = dst_end;
9070                                 }
9071                                 sub_end -= entry->vme_start;
9072                                 sub_end += VME_OFFSET(entry);
9073                                 local_end = entry->vme_end;
9074                                 vm_map_unlock(dst_map);
9075                                 copy_size = sub_end - sub_start;
9076
9077                                 /* adjust the copy object */
9078                                 if (total_size > copy_size) {
9079                                         vm_map_size_t   local_size = 0;
9080                                         vm_map_size_t   entry_size;
9081
9082                                         nentries = 1;
9083                                         new_offset = copy->offset;
9084                                         copy_entry = vm_map_copy_first_entry(copy);
9085                                         while (copy_entry !=
9086                                             vm_map_copy_to_entry(copy)) {
9087                                                 entry_size = copy_entry->vme_end -
9088                                                     copy_entry->vme_start;
9089                                                 if ((local_size < copy_size) &&
9090                                                     ((local_size + entry_size)
9091                                                     >= copy_size)) {
9092                                                         vm_map_copy_clip_end(copy,
9093                                                             copy_entry,
9094                                                             copy_entry->vme_start +
9095                                                             (copy_size - local_size));
9096                                                         entry_size = copy_entry->vme_end -
9097                                                             copy_entry->vme_start;
9098                                                         local_size += entry_size;
9099                                                         new_offset += entry_size;
9100                                                 }
9101                                                 if (local_size >= copy_size) {
9102                                                         next_copy = copy_entry->vme_next;
9103                                                         copy_entry->vme_next =
9104                                                             vm_map_copy_to_entry(copy);
9105                                                         previous_prev =
9106                                                             copy->cpy_hdr.links.prev;
9107                                                         copy->cpy_hdr.links.prev = copy_entry;
9108                                                         copy->size = copy_size;
9109                                                         remaining_entries =
9110                                                             copy->cpy_hdr.nentries;
9111                                                         remaining_entries -= nentries;
9112                                                         copy->cpy_hdr.nentries = nentries;
9113                                                         break;
9114                                                 } else {
9115                                                         local_size += entry_size;
9116                                                         new_offset += entry_size;
9117                                                         nentries++;
9118                                                 }
9119                                                 copy_entry = copy_entry->vme_next;
9120                                         }
9121                                 }
9122
9123                                 if ((entry->use_pmap) && (pmap == NULL)) {
9124                                         kr = vm_map_copy_overwrite_nested(
9125                                                 VME_SUBMAP(entry),
9126                                                 sub_start,
9127                                                 copy,
9128                                                 interruptible,
9129                                                 VME_SUBMAP(entry)->pmap,
9130                                                 TRUE);
9131                                 } else if (pmap != NULL) {
9132                                         kr = vm_map_copy_overwrite_nested(
9133                                                 VME_SUBMAP(entry),
9134                                                 sub_start,
9135                                                 copy,
9136                                                 interruptible, pmap,
9137                                                 TRUE);
9138                                 } else {
9139                                         kr = vm_map_copy_overwrite_nested(
9140                                                 VME_SUBMAP(entry),
9141                                                 sub_start,
9142                                                 copy,
9143                                                 interruptible,
9144                                                 dst_map->pmap,
9145                                                 TRUE);
9146                                 }
9147                                 if (kr != KERN_SUCCESS) {
9148                                         if (next_copy != NULL) {
9149                                                 copy->cpy_hdr.nentries +=
9150                                                     remaining_entries;
9151                                                 copy->cpy_hdr.links.prev->vme_next =
9152                                                     next_copy;
9153                                                 copy->cpy_hdr.links.prev
9154                                                         = previous_prev;
9155                                                 copy->size = total_size;
9156                                         }
9157                                         return kr;
9158                                 }
9159                                 if (dst_end <= local_end) {
9160                                         return KERN_SUCCESS;
9161                                 }
9162                                 /* otherwise copy no longer exists, it was */
9163                                 /* destroyed after successful copy_overwrite */
9164                                 copy = vm_map_copy_allocate();
9165                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
9166                                 copy->offset = new_offset;
9167
9168                                 /*
9169                                  * XXX FBDP
9170                                  * this does not seem to deal with
9171                                  * the VM map store (R&B tree)
9172                                  */
9173
9174                                 total_size -= copy_size;
9175                                 copy_size = 0;
9176                                 /* put back remainder of copy in container */
9177                                 if (next_copy != NULL) {
9178                                         copy->cpy_hdr.nentries = remaining_entries;
9179                                         copy->cpy_hdr.links.next = next_copy;
9180                                         copy->cpy_hdr.links.prev = previous_prev;
9181                                         copy->size = total_size;
9182                                         next_copy->vme_prev =
9183                                             vm_map_copy_to_entry(copy);
9184                                         next_copy = NULL;
9185                                 }
9186                                 base_addr = local_end;
9187                                 vm_map_lock(dst_map);
9188                                 if (!vm_map_lookup_entry(dst_map,
9189                                     local_end, &tmp_entry)) {
9190                                         vm_map_unlock(dst_map);
9191                                         return KERN_INVALID_ADDRESS;
9192                                 }
9193                                 entry = tmp_entry;
9194                                 continue;
9195                         }
9196                         if (dst_end <= entry->vme_end) {
9197                                 copy_size = dst_end - base_addr;
9198                                 break;
9199                         }
9200
9201                         if ((next == vm_map_to_entry(dst_map)) ||
9202                             (next->vme_start != entry->vme_end)) {
9203                                 vm_map_unlock(dst_map);
9204                                 return KERN_INVALID_ADDRESS;
9205                         }
9206
9207                         entry = next;
9208                 }/* for */
9209
9210                 next_copy = NULL;
9211                 nentries = 1;
9212
9213                 /* adjust the copy object */
9214                 if (total_size > copy_size) {
9215                         vm_map_size_t   local_size = 0;
9216                         vm_map_size_t   entry_size;
9217
9218                         new_offset = copy->offset;
9219                         copy_entry = vm_map_copy_first_entry(copy);
9220                         while (copy_entry != vm_map_copy_to_entry(copy)) {
9221                                 entry_size = copy_entry->vme_end -
9222                                     copy_entry->vme_start;
9223                                 if ((local_size < copy_size) &&
9224                                     ((local_size + entry_size)
9225                                     >= copy_size)) {
9226                                         vm_map_copy_clip_end(copy, copy_entry,
9227                                             copy_entry->vme_start +
9228                                             (copy_size - local_size));
9229                                         entry_size = copy_entry->vme_end -
9230                                             copy_entry->vme_start;
9231                                         local_size += entry_size;
9232                                         new_offset += entry_size;
9233                                 }
9234                                 if (local_size >= copy_size) {
9235                                         next_copy = copy_entry->vme_next;
9236                                         copy_entry->vme_next =
9237                                             vm_map_copy_to_entry(copy);
9238                                         previous_prev =
9239                                             copy->cpy_hdr.links.prev;
9240                                         copy->cpy_hdr.links.prev = copy_entry;
9241                                         copy->size = copy_size;
9242                                         remaining_entries =
9243                                             copy->cpy_hdr.nentries;
9244                                         remaining_entries -= nentries;
9245                                         copy->cpy_hdr.nentries = nentries;
9246                                         break;
9247                                 } else {
9248                                         local_size += entry_size;
9249                                         new_offset += entry_size;
9250                                         nentries++;
9251                                 }
9252                                 copy_entry = copy_entry->vme_next;
9253                         }
9254                 }
9255
9256                 if (aligned) {
9257                         pmap_t  local_pmap;
9258
9259                         if (pmap) {
9260                                 local_pmap = pmap;
9261                         } else {
9262                                 local_pmap = dst_map->pmap;
9263                         }
9264
9265                         if ((kr =  vm_map_copy_overwrite_aligned(
9266                                     dst_map, tmp_entry, copy,
9267                                     base_addr, local_pmap)) != KERN_SUCCESS) {
9268                                 if (next_copy != NULL) {
9269                                         copy->cpy_hdr.nentries +=
9270                                             remaining_entries;
9271                                         copy->cpy_hdr.links.prev->vme_next =
9272                                             next_copy;
9273                                         copy->cpy_hdr.links.prev =
9274                                             previous_prev;
9275                                         copy->size += copy_size;
9276                                 }
9277                                 return kr;
9278                         }
9279                         vm_map_unlock(dst_map);
9280                 } else {
9281                         /*
9282                          * Performance gain:
9283                          *
9284                          * if the copy and dst address are misaligned but the same
9285                          * offset within the page we can copy_not_aligned the
9286                          * misaligned parts and copy aligned the rest.  If they are
9287                          * aligned but len is unaligned we simply need to copy
9288                          * the end bit unaligned.  We'll need to split the misaligned
9289                          * bits of the region in this case !
9290                          */
9291                         /* ALWAYS UNLOCKS THE dst_map MAP */
9292                         kr = vm_map_copy_overwrite_unaligned(
9293                                 dst_map,
9294                                 tmp_entry,
9295                                 copy,
9296                                 base_addr,
9297                                 discard_on_success);
9298                         if (kr != KERN_SUCCESS) {
9299                                 if (next_copy != NULL) {
9300                                         copy->cpy_hdr.nentries +=
9301                                             remaining_entries;
9302                                         copy->cpy_hdr.links.prev->vme_next =
9303                                             next_copy;
9304                                         copy->cpy_hdr.links.prev =
9305                                             previous_prev;
9306                                         copy->size += copy_size;
9307                                 }
9308                                 return kr;
9309                         }
9310                 }
9311                 total_size -= copy_size;
9312                 if (total_size == 0) {
9313                         break;
9314                 }
9315                 base_addr += copy_size;
9316                 copy_size = 0;
9317                 copy->offset = new_offset;
9318                 if (next_copy != NULL) {
9319                         copy->cpy_hdr.nentries = remaining_entries;
9320                         copy->cpy_hdr.links.next = next_copy;
9321                         copy->cpy_hdr.links.prev = previous_prev;
9322                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
9323                         copy->size = total_size;
9324                 }
9325                 vm_map_lock(dst_map);
9326                 while (TRUE) {
9327                         if (!vm_map_lookup_entry(dst_map,
9328                             base_addr, &tmp_entry)) {
9329                                 vm_map_unlock(dst_map);
9330                                 return KERN_INVALID_ADDRESS;
9331                         }
9332                         if (tmp_entry->in_transition) {
9333                                 entry->needs_wakeup = TRUE;
9334                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9335                         } else {
9336                                 break;
9337                         }
9338                 }
9339                 vm_map_clip_start(dst_map,
9340                     tmp_entry,
9341                     vm_map_trunc_page(base_addr,
9342                     VM_MAP_PAGE_MASK(dst_map)));
9343
9344                 entry = tmp_entry;
9345         } /* while */
9346
9347         /*
9348          *      Throw away the vm_map_copy object
9349          */
9350         if (discard_on_success) {
9351                 vm_map_copy_discard(copy);
9352         }
9353
9354         return KERN_SUCCESS;
9355 }/* vm_map_copy_overwrite */
9356
9357 kern_return_t
9358 vm_map_copy_overwrite(
9359         vm_map_t        dst_map,
9360         vm_map_offset_t dst_addr,
9361         vm_map_copy_t   copy,
9362         boolean_t       interruptible)
9363 {
9364         vm_map_size_t   head_size, tail_size;
9365         vm_map_copy_t   head_copy, tail_copy;
9366         vm_map_offset_t head_addr, tail_addr;
9367         vm_map_entry_t  entry;
9368         kern_return_t   kr;
9369         vm_map_offset_t effective_page_mask, effective_page_size;
9370
9371         head_size = 0;
9372         tail_size = 0;
9373         head_copy = NULL;
9374         tail_copy = NULL;
9375         head_addr = 0;
9376         tail_addr = 0;
9377
9378         if (interruptible ||
9379             copy == VM_MAP_COPY_NULL ||
9380             copy->type != VM_MAP_COPY_ENTRY_LIST) {
9381                 /*
9382                  * We can't split the "copy" map if we're interruptible
9383                  * or if we don't have a "copy" map...
9384                  */
9385 blunt_copy:
9386                 return vm_map_copy_overwrite_nested(dst_map,
9387                            dst_addr,
9388                            copy,
9389                            interruptible,
9390                            (pmap_t) NULL,
9391                            TRUE);
9392         }
9393
9394         effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9395         effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9396             effective_page_mask);
9397         effective_page_size = effective_page_mask + 1;
9398
9399         if (copy->size < 3 * effective_page_size) {
9400                 /*
9401                  * Too small to bother with optimizing...
9402                  */
9403                 goto blunt_copy;
9404         }
9405
9406         if ((dst_addr & effective_page_mask) !=
9407             (copy->offset & effective_page_mask)) {
9408                 /*
9409                  * Incompatible mis-alignment of source and destination...
9410                  */
9411                 goto blunt_copy;
9412         }
9413
9414         /*
9415          * Proper alignment or identical mis-alignment at the beginning.
9416          * Let's try and do a small unaligned copy first (if needed)
9417          * and then an aligned copy for the rest.
9418          */
9419         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9420                 head_addr = dst_addr;
9421                 head_size = (effective_page_size -
9422                     (copy->offset & effective_page_mask));
9423                 head_size = MIN(head_size, copy->size);
9424         }
9425         if (!vm_map_page_aligned(copy->offset + copy->size,
9426             effective_page_mask)) {
9427                 /*
9428                  * Mis-alignment at the end.
9429                  * Do an aligned copy up to the last page and
9430                  * then an unaligned copy for the remaining bytes.
9431                  */
9432                 tail_size = ((copy->offset + copy->size) &
9433                     effective_page_mask);
9434                 tail_size = MIN(tail_size, copy->size);
9435                 tail_addr = dst_addr + copy->size - tail_size;
9436                 assert(tail_addr >= head_addr + head_size);
9437         }
9438         assert(head_size + tail_size <= copy->size);
9439
9440         if (head_size + tail_size == copy->size) {
9441                 /*
9442                  * It's all unaligned, no optimization possible...
9443                  */
9444                 goto blunt_copy;
9445         }
9446
9447         /*
9448          * Can't optimize if there are any submaps in the
9449          * destination due to the way we free the "copy" map
9450          * progressively in vm_map_copy_overwrite_nested()
9451          * in that case.
9452          */
9453         vm_map_lock_read(dst_map);
9454         if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9455                 vm_map_unlock_read(dst_map);
9456                 goto blunt_copy;
9457         }
9458         for (;
9459             (entry != vm_map_copy_to_entry(copy) &&
9460             entry->vme_start < dst_addr + copy->size);
9461             entry = entry->vme_next) {
9462                 if (entry->is_sub_map) {
9463                         vm_map_unlock_read(dst_map);
9464                         goto blunt_copy;
9465                 }
9466         }
9467         vm_map_unlock_read(dst_map);
9468
9469         if (head_size) {
9470                 /*
9471                  * Unaligned copy of the first "head_size" bytes, to reach
9472                  * a page boundary.
9473                  */
9474
9475                 /*
9476                  * Extract "head_copy" out of "copy".
9477                  */
9478                 head_copy = vm_map_copy_allocate();
9479                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9480                 head_copy->cpy_hdr.entries_pageable =
9481                     copy->cpy_hdr.entries_pageable;
9482                 vm_map_store_init(&head_copy->cpy_hdr);
9483
9484                 entry = vm_map_copy_first_entry(copy);
9485                 if (entry->vme_end < copy->offset + head_size) {
9486                         head_size = entry->vme_end - copy->offset;
9487                 }
9488
9489                 head_copy->offset = copy->offset;
9490                 head_copy->size = head_size;
9491                 copy->offset += head_size;
9492                 copy->size -= head_size;
9493
9494                 vm_map_copy_clip_end(copy, entry, copy->offset);
9495                 vm_map_copy_entry_unlink(copy, entry);
9496                 vm_map_copy_entry_link(head_copy,
9497                     vm_map_copy_to_entry(head_copy),
9498                     entry);
9499
9500                 /*
9501                  * Do the unaligned copy.
9502                  */
9503                 kr = vm_map_copy_overwrite_nested(dst_map,
9504                     head_addr,
9505                     head_copy,
9506                     interruptible,
9507                     (pmap_t) NULL,
9508                     FALSE);
9509                 if (kr != KERN_SUCCESS) {
9510                         goto done;
9511                 }
9512         }
9513
9514         if (tail_size) {
9515                 /*
9516                  * Extract "tail_copy" out of "copy".
9517                  */
9518                 tail_copy = vm_map_copy_allocate();
9519                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9520                 tail_copy->cpy_hdr.entries_pageable =
9521                     copy->cpy_hdr.entries_pageable;
9522                 vm_map_store_init(&tail_copy->cpy_hdr);
9523
9524                 tail_copy->offset = copy->offset + copy->size - tail_size;
9525                 tail_copy->size = tail_size;
9526
9527                 copy->size -= tail_size;
9528
9529                 entry = vm_map_copy_last_entry(copy);
9530                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9531                 entry = vm_map_copy_last_entry(copy);
9532                 vm_map_copy_entry_unlink(copy, entry);
9533                 vm_map_copy_entry_link(tail_copy,
9534                     vm_map_copy_last_entry(tail_copy),
9535                     entry);
9536         }
9537
9538         /*
9539          * Copy most (or possibly all) of the data.
9540          */
9541         kr = vm_map_copy_overwrite_nested(dst_map,
9542             dst_addr + head_size,
9543             copy,
9544             interruptible,
9545             (pmap_t) NULL,
9546             FALSE);
9547         if (kr != KERN_SUCCESS) {
9548                 goto done;
9549         }
9550
9551         if (tail_size) {
9552                 kr = vm_map_copy_overwrite_nested(dst_map,
9553                     tail_addr,
9554                     tail_copy,
9555                     interruptible,
9556                     (pmap_t) NULL,
9557                     FALSE);
9558         }
9559
9560 done:
9561         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9562         if (kr == KERN_SUCCESS) {
9563                 /*
9564                  * Discard all the copy maps.
9565                  */
9566                 if (head_copy) {
9567                         vm_map_copy_discard(head_copy);
9568                         head_copy = NULL;
9569                 }
9570                 vm_map_copy_discard(copy);
9571                 if (tail_copy) {
9572                         vm_map_copy_discard(tail_copy);
9573                         tail_copy = NULL;
9574                 }
9575         } else {
9576                 /*
9577                  * Re-assemble the original copy map.
9578                  */
9579                 if (head_copy) {
9580                         entry = vm_map_copy_first_entry(head_copy);
9581                         vm_map_copy_entry_unlink(head_copy, entry);
9582                         vm_map_copy_entry_link(copy,
9583                             vm_map_copy_to_entry(copy),
9584                             entry);
9585                         copy->offset -= head_size;
9586                         copy->size += head_size;
9587                         vm_map_copy_discard(head_copy);
9588                         head_copy = NULL;
9589                 }
9590                 if (tail_copy) {
9591                         entry = vm_map_copy_last_entry(tail_copy);
9592                         vm_map_copy_entry_unlink(tail_copy, entry);
9593                         vm_map_copy_entry_link(copy,
9594                             vm_map_copy_last_entry(copy),
9595                             entry);
9596                         copy->size += tail_size;
9597                         vm_map_copy_discard(tail_copy);
9598                         tail_copy = NULL;
9599                 }
9600         }
9601         return kr;
9602 }
9603
9604
9605 /*
9606  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
9607  *
9608  *      Decription:
9609  *      Physically copy unaligned data
9610  *
9611  *      Implementation:
9612  *      Unaligned parts of pages have to be physically copied.  We use
9613  *      a modified form of vm_fault_copy (which understands none-aligned
9614  *      page offsets and sizes) to do the copy.  We attempt to copy as
9615  *      much memory in one go as possibly, however vm_fault_copy copies
9616  *      within 1 memory object so we have to find the smaller of "amount left"
9617  *      "source object data size" and "target object data size".  With
9618  *      unaligned data we don't need to split regions, therefore the source
9619  *      (copy) object should be one map entry, the target range may be split
9620  *      over multiple map entries however.  In any event we are pessimistic
9621  *      about these assumptions.
9622  *
9623  *      Assumptions:
9624  *      dst_map is locked on entry and is return locked on success,
9625  *      unlocked on error.
9626  */
9627
9628 static kern_return_t
9629 vm_map_copy_overwrite_unaligned(
9630         vm_map_t        dst_map,
9631         vm_map_entry_t  entry,
9632         vm_map_copy_t   copy,
9633         vm_map_offset_t start,
9634         boolean_t       discard_on_success)
9635 {
9636         vm_map_entry_t          copy_entry;
9637         vm_map_entry_t          copy_entry_next;
9638         vm_map_version_t        version;
9639         vm_object_t             dst_object;
9640         vm_object_offset_t      dst_offset;
9641         vm_object_offset_t      src_offset;
9642         vm_object_offset_t      entry_offset;
9643         vm_map_offset_t         entry_end;
9644         vm_map_size_t           src_size,
9645             dst_size,
9646             copy_size,
9647             amount_left;
9648         kern_return_t           kr = KERN_SUCCESS;
9649
9650
9651         copy_entry = vm_map_copy_first_entry(copy);
9652
9653         vm_map_lock_write_to_read(dst_map);
9654
9655         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9656         amount_left = copy->size;
9657 /*
9658  *      unaligned so we never clipped this entry, we need the offset into
9659  *      the vm_object not just the data.
9660  */
9661         while (amount_left > 0) {
9662                 if (entry == vm_map_to_entry(dst_map)) {
9663                         vm_map_unlock_read(dst_map);
9664                         return KERN_INVALID_ADDRESS;
9665                 }
9666
9667                 /* "start" must be within the current map entry */
9668                 assert((start >= entry->vme_start) && (start < entry->vme_end));
9669
9670                 dst_offset = start - entry->vme_start;
9671
9672                 dst_size = entry->vme_end - start;
9673
9674                 src_size = copy_entry->vme_end -
9675                     (copy_entry->vme_start + src_offset);
9676
9677                 if (dst_size < src_size) {
9678 /*
9679  *                      we can only copy dst_size bytes before
9680  *                      we have to get the next destination entry
9681  */
9682                         copy_size = dst_size;
9683                 } else {
9684 /*
9685  *                      we can only copy src_size bytes before
9686  *                      we have to get the next source copy entry
9687  */
9688                         copy_size = src_size;
9689                 }
9690
9691                 if (copy_size > amount_left) {
9692                         copy_size = amount_left;
9693                 }
9694 /*
9695  *              Entry needs copy, create a shadow shadow object for
9696  *              Copy on write region.
9697  */
9698                 if (entry->needs_copy &&
9699                     ((entry->protection & VM_PROT_WRITE) != 0)) {
9700                         if (vm_map_lock_read_to_write(dst_map)) {
9701                                 vm_map_lock_read(dst_map);
9702                                 goto RetryLookup;
9703                         }
9704                         VME_OBJECT_SHADOW(entry,
9705                             (vm_map_size_t)(entry->vme_end
9706                             - entry->vme_start));
9707                         entry->needs_copy = FALSE;
9708                         vm_map_lock_write_to_read(dst_map);
9709                 }
9710                 dst_object = VME_OBJECT(entry);
9711 /*
9712  *              unlike with the virtual (aligned) copy we're going
9713  *              to fault on it therefore we need a target object.
9714  */
9715                 if (dst_object == VM_OBJECT_NULL) {
9716                         if (vm_map_lock_read_to_write(dst_map)) {
9717                                 vm_map_lock_read(dst_map);
9718                                 goto RetryLookup;
9719                         }
9720                         dst_object = vm_object_allocate((vm_map_size_t)
9721                             entry->vme_end - entry->vme_start);
9722                         VME_OBJECT_SET(entry, dst_object);
9723                         VME_OFFSET_SET(entry, 0);
9724                         assert(entry->use_pmap);
9725                         vm_map_lock_write_to_read(dst_map);
9726                 }
9727 /*
9728  *              Take an object reference and unlock map. The "entry" may
9729  *              disappear or change when the map is unlocked.
9730  */
9731                 vm_object_reference(dst_object);
9732                 version.main_timestamp = dst_map->timestamp;
9733                 entry_offset = VME_OFFSET(entry);
9734                 entry_end = entry->vme_end;
9735                 vm_map_unlock_read(dst_map);
9736 /*
9737  *              Copy as much as possible in one pass
9738  */
9739                 kr = vm_fault_copy(
9740                         VME_OBJECT(copy_entry),
9741                         VME_OFFSET(copy_entry) + src_offset,
9742                         &copy_size,
9743                         dst_object,
9744                         entry_offset + dst_offset,
9745                         dst_map,
9746                         &version,
9747                         THREAD_UNINT );
9748
9749                 start += copy_size;
9750                 src_offset += copy_size;
9751                 amount_left -= copy_size;
9752 /*
9753  *              Release the object reference
9754  */
9755                 vm_object_deallocate(dst_object);
9756 /*
9757  *              If a hard error occurred, return it now
9758  */
9759                 if (kr != KERN_SUCCESS) {
9760                         return kr;
9761                 }
9762
9763                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9764                     || amount_left == 0) {
9765 /*
9766  *                      all done with this copy entry, dispose.
9767  */
9768                         copy_entry_next = copy_entry->vme_next;
9769
9770                         if (discard_on_success) {
9771                                 vm_map_copy_entry_unlink(copy, copy_entry);
9772                                 assert(!copy_entry->is_sub_map);
9773                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9774                                 vm_map_copy_entry_dispose(copy, copy_entry);
9775                         }
9776
9777                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9778                             amount_left) {
9779 /*
9780  *                              not finished copying but run out of source
9781  */
9782                                 return KERN_INVALID_ADDRESS;
9783                         }
9784
9785                         copy_entry = copy_entry_next;
9786
9787                         src_offset = 0;
9788                 }
9789
9790                 if (amount_left == 0) {
9791                         return KERN_SUCCESS;
9792                 }
9793
9794                 vm_map_lock_read(dst_map);
9795                 if (version.main_timestamp == dst_map->timestamp) {
9796                         if (start == entry_end) {
9797 /*
9798  *                              destination region is split.  Use the version
9799  *                              information to avoid a lookup in the normal
9800  *                              case.
9801  */
9802                                 entry = entry->vme_next;
9803 /*
9804  *                              should be contiguous. Fail if we encounter
9805  *                              a hole in the destination.
9806  */
9807                                 if (start != entry->vme_start) {
9808                                         vm_map_unlock_read(dst_map);
9809                                         return KERN_INVALID_ADDRESS;
9810                                 }
9811                         }
9812                 } else {
9813 /*
9814  *                      Map version check failed.
9815  *                      we must lookup the entry because somebody
9816  *                      might have changed the map behind our backs.
9817  */
9818 RetryLookup:
9819                         if (!vm_map_lookup_entry(dst_map, start, &entry)) {
9820                                 vm_map_unlock_read(dst_map);
9821                                 return KERN_INVALID_ADDRESS;
9822                         }
9823                 }
9824         }/* while */
9825
9826         return KERN_SUCCESS;
9827 }/* vm_map_copy_overwrite_unaligned */
9828
9829 /*
9830  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
9831  *
9832  *      Description:
9833  *      Does all the vm_trickery possible for whole pages.
9834  *
9835  *      Implementation:
9836  *
9837  *      If there are no permanent objects in the destination,
9838  *      and the source and destination map entry zones match,
9839  *      and the destination map entry is not shared,
9840  *      then the map entries can be deleted and replaced
9841  *      with those from the copy.  The following code is the
9842  *      basic idea of what to do, but there are lots of annoying
9843  *      little details about getting protection and inheritance
9844  *      right.  Should add protection, inheritance, and sharing checks
9845  *      to the above pass and make sure that no wiring is involved.
9846  */
9847
9848 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9849 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9850 int vm_map_copy_overwrite_aligned_src_large = 0;
9851
9852 static kern_return_t
9853 vm_map_copy_overwrite_aligned(
9854         vm_map_t        dst_map,
9855         vm_map_entry_t  tmp_entry,
9856         vm_map_copy_t   copy,
9857         vm_map_offset_t start,
9858         __unused pmap_t pmap)
9859 {
9860         vm_object_t     object;
9861         vm_map_entry_t  copy_entry;
9862         vm_map_size_t   copy_size;
9863         vm_map_size_t   size;
9864         vm_map_entry_t  entry;
9865
9866         while ((copy_entry = vm_map_copy_first_entry(copy))
9867             != vm_map_copy_to_entry(copy)) {
9868                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9869
9870                 entry = tmp_entry;
9871                 if (entry->is_sub_map) {
9872                         /* unnested when clipped earlier */
9873                         assert(!entry->use_pmap);
9874                 }
9875                 if (entry == vm_map_to_entry(dst_map)) {
9876                         vm_map_unlock(dst_map);
9877                         return KERN_INVALID_ADDRESS;
9878                 }
9879                 size = (entry->vme_end - entry->vme_start);
9880                 /*
9881                  *      Make sure that no holes popped up in the
9882                  *      address map, and that the protection is
9883                  *      still valid, in case the map was unlocked
9884                  *      earlier.
9885                  */
9886
9887                 if ((entry->vme_start != start) || ((entry->is_sub_map)
9888                     && !entry->needs_copy)) {
9889                         vm_map_unlock(dst_map);
9890                         return KERN_INVALID_ADDRESS;
9891                 }
9892                 assert(entry != vm_map_to_entry(dst_map));
9893
9894                 /*
9895                  *      Check protection again
9896                  */
9897
9898                 if (!(entry->protection & VM_PROT_WRITE)) {
9899                         vm_map_unlock(dst_map);
9900                         return KERN_PROTECTION_FAILURE;
9901                 }
9902
9903                 /*
9904                  *      Adjust to source size first
9905                  */
9906
9907                 if (copy_size < size) {
9908                         if (entry->map_aligned &&
9909                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9910                             VM_MAP_PAGE_MASK(dst_map))) {
9911                                 /* no longer map-aligned */
9912                                 entry->map_aligned = FALSE;
9913                         }
9914                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9915                         size = copy_size;
9916                 }
9917
9918                 /*
9919                  *      Adjust to destination size
9920                  */
9921
9922                 if (size < copy_size) {
9923                         vm_map_copy_clip_end(copy, copy_entry,
9924                             copy_entry->vme_start + size);
9925                         copy_size = size;
9926                 }
9927
9928                 assert((entry->vme_end - entry->vme_start) == size);
9929                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9930                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9931
9932                 /*
9933                  *      If the destination contains temporary unshared memory,
9934                  *      we can perform the copy by throwing it away and
9935                  *      installing the source data.
9936                  */
9937
9938                 object = VME_OBJECT(entry);
9939                 if ((!entry->is_shared &&
9940                     ((object == VM_OBJECT_NULL) ||
9941                     (object->internal && !object->true_share))) ||
9942                     entry->needs_copy) {
9943                         vm_object_t     old_object = VME_OBJECT(entry);
9944                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
9945                         vm_object_offset_t      offset;
9946
9947                         /*
9948                          * Ensure that the source and destination aren't
9949                          * identical
9950                          */
9951                         if (old_object == VME_OBJECT(copy_entry) &&
9952                             old_offset == VME_OFFSET(copy_entry)) {
9953                                 vm_map_copy_entry_unlink(copy, copy_entry);
9954                                 vm_map_copy_entry_dispose(copy, copy_entry);
9955
9956                                 if (old_object != VM_OBJECT_NULL) {
9957                                         vm_object_deallocate(old_object);
9958                                 }
9959
9960                                 start = tmp_entry->vme_end;
9961                                 tmp_entry = tmp_entry->vme_next;
9962                                 continue;
9963                         }
9964
9965 #if !CONFIG_EMBEDDED
9966 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9967 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
9968                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9969                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9970                             copy_size <= __TRADEOFF1_COPY_SIZE) {
9971                                 /*
9972                                  * Virtual vs. Physical copy tradeoff #1.
9973                                  *
9974                                  * Copying only a few pages out of a large
9975                                  * object:  do a physical copy instead of
9976                                  * a virtual copy, to avoid possibly keeping
9977                                  * the entire large object alive because of
9978                                  * those few copy-on-write pages.
9979                                  */
9980                                 vm_map_copy_overwrite_aligned_src_large++;
9981                                 goto slow_copy;
9982                         }
9983 #endif /* !CONFIG_EMBEDDED */
9984
9985                         if ((dst_map->pmap != kernel_pmap) &&
9986                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9987                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
9988                                 vm_object_t new_object, new_shadow;
9989
9990                                 /*
9991                                  * We're about to map something over a mapping
9992                                  * established by malloc()...
9993                                  */
9994                                 new_object = VME_OBJECT(copy_entry);
9995                                 if (new_object != VM_OBJECT_NULL) {
9996                                         vm_object_lock_shared(new_object);
9997                                 }
9998                                 while (new_object != VM_OBJECT_NULL &&
9999 #if !CONFIG_EMBEDDED
10000                                     !new_object->true_share &&
10001                                     new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10002 #endif /* !CONFIG_EMBEDDED */
10003                                     new_object->internal) {
10004                                         new_shadow = new_object->shadow;
10005                                         if (new_shadow == VM_OBJECT_NULL) {
10006                                                 break;
10007                                         }
10008                                         vm_object_lock_shared(new_shadow);
10009                                         vm_object_unlock(new_object);
10010                                         new_object = new_shadow;
10011                                 }
10012                                 if (new_object != VM_OBJECT_NULL) {
10013                                         if (!new_object->internal) {
10014                                                 /*
10015                                                  * The new mapping is backed
10016                                                  * by an external object.  We
10017                                                  * don't want malloc'ed memory
10018                                                  * to be replaced with such a
10019                                                  * non-anonymous mapping, so
10020                                                  * let's go off the optimized
10021                                                  * path...
10022                                                  */
10023                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
10024                                                 vm_object_unlock(new_object);
10025                                                 goto slow_copy;
10026                                         }
10027 #if !CONFIG_EMBEDDED
10028                                         if (new_object->true_share ||
10029                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10030                                                 /*
10031                                                  * Same if there's a "true_share"
10032                                                  * object in the shadow chain, or
10033                                                  * an object with a non-default
10034                                                  * (SYMMETRIC) copy strategy.
10035                                                  */
10036                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10037                                                 vm_object_unlock(new_object);
10038                                                 goto slow_copy;
10039                                         }
10040 #endif /* !CONFIG_EMBEDDED */
10041                                         vm_object_unlock(new_object);
10042                                 }
10043                                 /*
10044                                  * The new mapping is still backed by
10045                                  * anonymous (internal) memory, so it's
10046                                  * OK to substitute it for the original
10047                                  * malloc() mapping.
10048                                  */
10049                         }
10050
10051                         if (old_object != VM_OBJECT_NULL) {
10052                                 if (entry->is_sub_map) {
10053                                         if (entry->use_pmap) {
10054 #ifndef NO_NESTED_PMAP
10055                                                 pmap_unnest(dst_map->pmap,
10056                                                     (addr64_t)entry->vme_start,
10057                                                     entry->vme_end - entry->vme_start);
10058 #endif  /* NO_NESTED_PMAP */
10059                                                 if (dst_map->mapped_in_other_pmaps) {
10060                                                         /* clean up parent */
10061                                                         /* map/maps */
10062                                                         vm_map_submap_pmap_clean(
10063                                                                 dst_map, entry->vme_start,
10064                                                                 entry->vme_end,
10065                                                                 VME_SUBMAP(entry),
10066                                                                 VME_OFFSET(entry));
10067                                                 }
10068                                         } else {
10069                                                 vm_map_submap_pmap_clean(
10070                                                         dst_map, entry->vme_start,
10071                                                         entry->vme_end,
10072                                                         VME_SUBMAP(entry),
10073                                                         VME_OFFSET(entry));
10074                                         }
10075                                         vm_map_deallocate(VME_SUBMAP(entry));
10076                                 } else {
10077                                         if (dst_map->mapped_in_other_pmaps) {
10078                                                 vm_object_pmap_protect_options(
10079                                                         VME_OBJECT(entry),
10080                                                         VME_OFFSET(entry),
10081                                                         entry->vme_end
10082                                                         - entry->vme_start,
10083                                                         PMAP_NULL,
10084                                                         entry->vme_start,
10085                                                         VM_PROT_NONE,
10086                                                         PMAP_OPTIONS_REMOVE);
10087                                         } else {
10088                                                 pmap_remove_options(
10089                                                         dst_map->pmap,
10090                                                         (addr64_t)(entry->vme_start),
10091                                                         (addr64_t)(entry->vme_end),
10092                                                         PMAP_OPTIONS_REMOVE);
10093                                         }
10094                                         vm_object_deallocate(old_object);
10095                                 }
10096                         }
10097
10098                         if (entry->iokit_acct) {
10099                                 /* keep using iokit accounting */
10100                                 entry->use_pmap = FALSE;
10101                         } else {
10102                                 /* use pmap accounting */
10103                                 entry->use_pmap = TRUE;
10104                         }
10105                         entry->is_sub_map = FALSE;
10106                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10107                         object = VME_OBJECT(entry);
10108                         entry->needs_copy = copy_entry->needs_copy;
10109                         entry->wired_count = 0;
10110                         entry->user_wired_count = 0;
10111                         offset = VME_OFFSET(copy_entry);
10112                         VME_OFFSET_SET(entry, offset);
10113
10114                         vm_map_copy_entry_unlink(copy, copy_entry);
10115                         vm_map_copy_entry_dispose(copy, copy_entry);
10116
10117                         /*
10118                          * we could try to push pages into the pmap at this point, BUT
10119                          * this optimization only saved on average 2 us per page if ALL
10120                          * the pages in the source were currently mapped
10121                          * and ALL the pages in the dest were touched, if there were fewer
10122                          * than 2/3 of the pages touched, this optimization actually cost more cycles
10123                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10124                          */
10125
10126                         /*
10127                          *      Set up for the next iteration.  The map
10128                          *      has not been unlocked, so the next
10129                          *      address should be at the end of this
10130                          *      entry, and the next map entry should be
10131                          *      the one following it.
10132                          */
10133
10134                         start = tmp_entry->vme_end;
10135                         tmp_entry = tmp_entry->vme_next;
10136                 } else {
10137                         vm_map_version_t        version;
10138                         vm_object_t             dst_object;
10139                         vm_object_offset_t      dst_offset;
10140                         kern_return_t           r;
10141
10142 slow_copy:
10143                         if (entry->needs_copy) {
10144                                 VME_OBJECT_SHADOW(entry,
10145                                     (entry->vme_end -
10146                                     entry->vme_start));
10147                                 entry->needs_copy = FALSE;
10148                         }
10149
10150                         dst_object = VME_OBJECT(entry);
10151                         dst_offset = VME_OFFSET(entry);
10152
10153                         /*
10154                          *      Take an object reference, and record
10155                          *      the map version information so that the
10156                          *      map can be safely unlocked.
10157                          */
10158
10159                         if (dst_object == VM_OBJECT_NULL) {
10160                                 /*
10161                                  * We would usually have just taken the
10162                                  * optimized path above if the destination
10163                                  * object has not been allocated yet.  But we
10164                                  * now disable that optimization if the copy
10165                                  * entry's object is not backed by anonymous
10166                                  * memory to avoid replacing malloc'ed
10167                                  * (i.e. re-usable) anonymous memory with a
10168                                  * not-so-anonymous mapping.
10169                                  * So we have to handle this case here and
10170                                  * allocate a new VM object for this map entry.
10171                                  */
10172                                 dst_object = vm_object_allocate(
10173                                         entry->vme_end - entry->vme_start);
10174                                 dst_offset = 0;
10175                                 VME_OBJECT_SET(entry, dst_object);
10176                                 VME_OFFSET_SET(entry, dst_offset);
10177                                 assert(entry->use_pmap);
10178                         }
10179
10180                         vm_object_reference(dst_object);
10181
10182                         /* account for unlock bumping up timestamp */
10183                         version.main_timestamp = dst_map->timestamp + 1;
10184
10185                         vm_map_unlock(dst_map);
10186
10187                         /*
10188                          *      Copy as much as possible in one pass
10189                          */
10190
10191                         copy_size = size;
10192                         r = vm_fault_copy(
10193                                 VME_OBJECT(copy_entry),
10194                                 VME_OFFSET(copy_entry),
10195                                 &copy_size,
10196                                 dst_object,
10197                                 dst_offset,
10198                                 dst_map,
10199                                 &version,
10200                                 THREAD_UNINT );
10201
10202                         /*
10203                          *      Release the object reference
10204                          */
10205
10206                         vm_object_deallocate(dst_object);
10207
10208                         /*
10209                          *      If a hard error occurred, return it now
10210                          */
10211
10212                         if (r != KERN_SUCCESS) {
10213                                 return r;
10214                         }
10215
10216                         if (copy_size != 0) {
10217                                 /*
10218                                  *      Dispose of the copied region
10219                                  */
10220
10221                                 vm_map_copy_clip_end(copy, copy_entry,
10222                                     copy_entry->vme_start + copy_size);
10223                                 vm_map_copy_entry_unlink(copy, copy_entry);
10224                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10225                                 vm_map_copy_entry_dispose(copy, copy_entry);
10226                         }
10227
10228                         /*
10229                          *      Pick up in the destination map where we left off.
10230                          *
10231                          *      Use the version information to avoid a lookup
10232                          *      in the normal case.
10233                          */
10234
10235                         start += copy_size;
10236                         vm_map_lock(dst_map);
10237                         if (version.main_timestamp == dst_map->timestamp &&
10238                             copy_size != 0) {
10239                                 /* We can safely use saved tmp_entry value */
10240
10241                                 if (tmp_entry->map_aligned &&
10242                                     !VM_MAP_PAGE_ALIGNED(
10243                                             start,
10244                                             VM_MAP_PAGE_MASK(dst_map))) {
10245                                         /* no longer map-aligned */
10246                                         tmp_entry->map_aligned = FALSE;
10247                                 }
10248                                 vm_map_clip_end(dst_map, tmp_entry, start);
10249                                 tmp_entry = tmp_entry->vme_next;
10250                         } else {
10251                                 /* Must do lookup of tmp_entry */
10252
10253                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10254                                         vm_map_unlock(dst_map);
10255                                         return KERN_INVALID_ADDRESS;
10256                                 }
10257                                 if (tmp_entry->map_aligned &&
10258                                     !VM_MAP_PAGE_ALIGNED(
10259                                             start,
10260                                             VM_MAP_PAGE_MASK(dst_map))) {
10261                                         /* no longer map-aligned */
10262                                         tmp_entry->map_aligned = FALSE;
10263                                 }
10264                                 vm_map_clip_start(dst_map, tmp_entry, start);
10265                         }
10266                 }
10267         }/* while */
10268
10269         return KERN_SUCCESS;
10270 }/* vm_map_copy_overwrite_aligned */
10271
10272 /*
10273  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
10274  *
10275  *      Description:
10276  *              Copy in data to a kernel buffer from space in the
10277  *              source map. The original space may be optionally
10278  *              deallocated.
10279  *
10280  *              If successful, returns a new copy object.
10281  */
10282 static kern_return_t
10283 vm_map_copyin_kernel_buffer(
10284         vm_map_t        src_map,
10285         vm_map_offset_t src_addr,
10286         vm_map_size_t   len,
10287         boolean_t       src_destroy,
10288         vm_map_copy_t   *copy_result)
10289 {
10290         kern_return_t kr;
10291         vm_map_copy_t copy;
10292         vm_size_t kalloc_size;
10293
10294         if (len > msg_ool_size_small) {
10295                 return KERN_INVALID_ARGUMENT;
10296         }
10297
10298         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10299
10300         copy = (vm_map_copy_t)kalloc(kalloc_size);
10301         if (copy == VM_MAP_COPY_NULL) {
10302                 return KERN_RESOURCE_SHORTAGE;
10303         }
10304         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10305         copy->size = len;
10306         copy->offset = 0;
10307
10308         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10309         if (kr != KERN_SUCCESS) {
10310                 kfree(copy, kalloc_size);
10311                 return kr;
10312         }
10313         if (src_destroy) {
10314                 (void) vm_map_remove(
10315                         src_map,
10316                         vm_map_trunc_page(src_addr,
10317                         VM_MAP_PAGE_MASK(src_map)),
10318                         vm_map_round_page(src_addr + len,
10319                         VM_MAP_PAGE_MASK(src_map)),
10320                         (VM_MAP_REMOVE_INTERRUPTIBLE |
10321                         VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10322                         ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10323         }
10324         *copy_result = copy;
10325         return KERN_SUCCESS;
10326 }
10327
10328 /*
10329  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
10330  *
10331  *      Description:
10332  *              Copy out data from a kernel buffer into space in the
10333  *              destination map. The space may be otpionally dynamically
10334  *              allocated.
10335  *
10336  *              If successful, consumes the copy object.
10337  *              Otherwise, the caller is responsible for it.
10338  */
10339 static int vm_map_copyout_kernel_buffer_failures = 0;
10340 static kern_return_t
10341 vm_map_copyout_kernel_buffer(
10342         vm_map_t                map,
10343         vm_map_address_t        *addr,  /* IN/OUT */
10344         vm_map_copy_t           copy,
10345         vm_map_size_t           copy_size,
10346         boolean_t               overwrite,
10347         boolean_t               consume_on_success)
10348 {
10349         kern_return_t kr = KERN_SUCCESS;
10350         thread_t thread = current_thread();
10351
10352         assert(copy->size == copy_size);
10353
10354         /*
10355          * check for corrupted vm_map_copy structure
10356          */
10357         if (copy_size > msg_ool_size_small || copy->offset) {
10358                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10359                     (long long)copy->size, (long long)copy->offset);
10360         }
10361
10362         if (!overwrite) {
10363                 /*
10364                  * Allocate space in the target map for the data
10365                  */
10366                 *addr = 0;
10367                 kr = vm_map_enter(map,
10368                     addr,
10369                     vm_map_round_page(copy_size,
10370                     VM_MAP_PAGE_MASK(map)),
10371                     (vm_map_offset_t) 0,
10372                     VM_FLAGS_ANYWHERE,
10373                     VM_MAP_KERNEL_FLAGS_NONE,
10374                     VM_KERN_MEMORY_NONE,
10375                     VM_OBJECT_NULL,
10376                     (vm_object_offset_t) 0,
10377                     FALSE,
10378                     VM_PROT_DEFAULT,
10379                     VM_PROT_ALL,
10380                     VM_INHERIT_DEFAULT);
10381                 if (kr != KERN_SUCCESS) {
10382                         return kr;
10383                 }
10384 #if KASAN
10385                 if (map->pmap == kernel_pmap) {
10386                         kasan_notify_address(*addr, copy->size);
10387                 }
10388 #endif
10389         }
10390
10391         /*
10392          * Copyout the data from the kernel buffer to the target map.
10393          */
10394         if (thread->map == map) {
10395                 /*
10396                  * If the target map is the current map, just do
10397                  * the copy.
10398                  */
10399                 assert((vm_size_t)copy_size == copy_size);
10400                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10401                         kr = KERN_INVALID_ADDRESS;
10402                 }
10403         } else {
10404                 vm_map_t oldmap;
10405
10406                 /*
10407                  * If the target map is another map, assume the
10408                  * target's address space identity for the duration
10409                  * of the copy.
10410                  */
10411                 vm_map_reference(map);
10412                 oldmap = vm_map_switch(map);
10413
10414                 assert((vm_size_t)copy_size == copy_size);
10415                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10416                         vm_map_copyout_kernel_buffer_failures++;
10417                         kr = KERN_INVALID_ADDRESS;
10418                 }
10419
10420                 (void) vm_map_switch(oldmap);
10421                 vm_map_deallocate(map);
10422         }
10423
10424         if (kr != KERN_SUCCESS) {
10425                 /* the copy failed, clean up */
10426                 if (!overwrite) {
10427                         /*
10428                          * Deallocate the space we allocated in the target map.
10429                          */
10430                         (void) vm_map_remove(
10431                                 map,
10432                                 vm_map_trunc_page(*addr,
10433                                 VM_MAP_PAGE_MASK(map)),
10434                                 vm_map_round_page((*addr +
10435                                 vm_map_round_page(copy_size,
10436                                 VM_MAP_PAGE_MASK(map))),
10437                                 VM_MAP_PAGE_MASK(map)),
10438                                 VM_MAP_REMOVE_NO_FLAGS);
10439                         *addr = 0;
10440                 }
10441         } else {
10442                 /* copy was successful, dicard the copy structure */
10443                 if (consume_on_success) {
10444                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
10445                 }
10446         }
10447
10448         return kr;
10449 }
10450
10451 /*
10452  *      Routine:        vm_map_copy_insert      [internal use only]
10453  *
10454  *      Description:
10455  *              Link a copy chain ("copy") into a map at the
10456  *              specified location (after "where").
10457  *      Side effects:
10458  *              The copy chain is destroyed.
10459  */
10460 static void
10461 vm_map_copy_insert(
10462         vm_map_t        map,
10463         vm_map_entry_t  after_where,
10464         vm_map_copy_t   copy)
10465 {
10466         vm_map_entry_t  entry;
10467
10468         while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10469                 entry = vm_map_copy_first_entry(copy);
10470                 vm_map_copy_entry_unlink(copy, entry);
10471                 vm_map_store_entry_link(map, after_where, entry,
10472                     VM_MAP_KERNEL_FLAGS_NONE);
10473                 after_where = entry;
10474         }
10475         zfree(vm_map_copy_zone, copy);
10476 }
10477
10478 void
10479 vm_map_copy_remap(
10480         vm_map_t        map,
10481         vm_map_entry_t  where,
10482         vm_map_copy_t   copy,
10483         vm_map_offset_t adjustment,
10484         vm_prot_t       cur_prot,
10485         vm_prot_t       max_prot,
10486         vm_inherit_t    inheritance)
10487 {
10488         vm_map_entry_t  copy_entry, new_entry;
10489
10490         for (copy_entry = vm_map_copy_first_entry(copy);
10491             copy_entry != vm_map_copy_to_entry(copy);
10492             copy_entry = copy_entry->vme_next) {
10493                 /* get a new VM map entry for the map */
10494                 new_entry = vm_map_entry_create(map,
10495                     !map->hdr.entries_pageable);
10496                 /* copy the "copy entry" to the new entry */
10497                 vm_map_entry_copy(new_entry, copy_entry);
10498                 /* adjust "start" and "end" */
10499                 new_entry->vme_start += adjustment;
10500                 new_entry->vme_end += adjustment;
10501                 /* clear some attributes */
10502                 new_entry->inheritance = inheritance;
10503                 new_entry->protection = cur_prot;
10504                 new_entry->max_protection = max_prot;
10505                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10506                 /* take an extra reference on the entry's "object" */
10507                 if (new_entry->is_sub_map) {
10508                         assert(!new_entry->use_pmap); /* not nested */
10509                         vm_map_lock(VME_SUBMAP(new_entry));
10510                         vm_map_reference(VME_SUBMAP(new_entry));
10511                         vm_map_unlock(VME_SUBMAP(new_entry));
10512                 } else {
10513                         vm_object_reference(VME_OBJECT(new_entry));
10514                 }
10515                 /* insert the new entry in the map */
10516                 vm_map_store_entry_link(map, where, new_entry,
10517                     VM_MAP_KERNEL_FLAGS_NONE);
10518                 /* continue inserting the "copy entries" after the new entry */
10519                 where = new_entry;
10520         }
10521 }
10522
10523
10524 /*
10525  * Returns true if *size matches (or is in the range of) copy->size.
10526  * Upon returning true, the *size field is updated with the actual size of the
10527  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10528  */
10529 boolean_t
10530 vm_map_copy_validate_size(
10531         vm_map_t                dst_map,
10532         vm_map_copy_t           copy,
10533         vm_map_size_t           *size)
10534 {
10535         if (copy == VM_MAP_COPY_NULL) {
10536                 return FALSE;
10537         }
10538         vm_map_size_t copy_sz = copy->size;
10539         vm_map_size_t sz = *size;
10540         switch (copy->type) {
10541         case VM_MAP_COPY_OBJECT:
10542         case VM_MAP_COPY_KERNEL_BUFFER:
10543                 if (sz == copy_sz) {
10544                         return TRUE;
10545                 }
10546                 break;
10547         case VM_MAP_COPY_ENTRY_LIST:
10548                 /*
10549                  * potential page-size rounding prevents us from exactly
10550                  * validating this flavor of vm_map_copy, but we can at least
10551                  * assert that it's within a range.
10552                  */
10553                 if (copy_sz >= sz &&
10554                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10555                         *size = copy_sz;
10556                         return TRUE;
10557                 }
10558                 break;
10559         default:
10560                 break;
10561         }
10562         return FALSE;
10563 }
10564
10565 /*
10566  *      Routine:        vm_map_copyout_size
10567  *
10568  *      Description:
10569  *              Copy out a copy chain ("copy") into newly-allocated
10570  *              space in the destination map. Uses a prevalidated
10571  *              size for the copy object (vm_map_copy_validate_size).
10572  *
10573  *              If successful, consumes the copy object.
10574  *              Otherwise, the caller is responsible for it.
10575  */
10576 kern_return_t
10577 vm_map_copyout_size(
10578         vm_map_t                dst_map,
10579         vm_map_address_t        *dst_addr,      /* OUT */
10580         vm_map_copy_t           copy,
10581         vm_map_size_t           copy_size)
10582 {
10583         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10584                    TRUE,                     /* consume_on_success */
10585                    VM_PROT_DEFAULT,
10586                    VM_PROT_ALL,
10587                    VM_INHERIT_DEFAULT);
10588 }
10589
10590 /*
10591  *      Routine:        vm_map_copyout
10592  *
10593  *      Description:
10594  *              Copy out a copy chain ("copy") into newly-allocated
10595  *              space in the destination map.
10596  *
10597  *              If successful, consumes the copy object.
10598  *              Otherwise, the caller is responsible for it.
10599  */
10600 kern_return_t
10601 vm_map_copyout(
10602         vm_map_t                dst_map,
10603         vm_map_address_t        *dst_addr,      /* OUT */
10604         vm_map_copy_t           copy)
10605 {
10606         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10607                    TRUE,                     /* consume_on_success */
10608                    VM_PROT_DEFAULT,
10609                    VM_PROT_ALL,
10610                    VM_INHERIT_DEFAULT);
10611 }
10612
10613 kern_return_t
10614 vm_map_copyout_internal(
10615         vm_map_t                dst_map,
10616         vm_map_address_t        *dst_addr,      /* OUT */
10617         vm_map_copy_t           copy,
10618         vm_map_size_t           copy_size,
10619         boolean_t               consume_on_success,
10620         vm_prot_t               cur_protection,
10621         vm_prot_t               max_protection,
10622         vm_inherit_t            inheritance)
10623 {
10624         vm_map_size_t           size;
10625         vm_map_size_t           adjustment;
10626         vm_map_offset_t         start;
10627         vm_object_offset_t      vm_copy_start;
10628         vm_map_entry_t          last;
10629         vm_map_entry_t          entry;
10630         vm_map_entry_t          hole_entry;
10631
10632         /*
10633          *      Check for null copy object.
10634          */
10635
10636         if (copy == VM_MAP_COPY_NULL) {
10637                 *dst_addr = 0;
10638                 return KERN_SUCCESS;
10639         }
10640
10641         if (copy->size != copy_size) {
10642                 *dst_addr = 0;
10643                 return KERN_FAILURE;
10644         }
10645
10646         /*
10647          *      Check for special copy object, created
10648          *      by vm_map_copyin_object.
10649          */
10650
10651         if (copy->type == VM_MAP_COPY_OBJECT) {
10652                 vm_object_t             object = copy->cpy_object;
10653                 kern_return_t           kr;
10654                 vm_object_offset_t      offset;
10655
10656                 offset = vm_object_trunc_page(copy->offset);
10657                 size = vm_map_round_page((copy_size +
10658                     (vm_map_size_t)(copy->offset -
10659                     offset)),
10660                     VM_MAP_PAGE_MASK(dst_map));
10661                 *dst_addr = 0;
10662                 kr = vm_map_enter(dst_map, dst_addr, size,
10663                     (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10664                     VM_MAP_KERNEL_FLAGS_NONE,
10665                     VM_KERN_MEMORY_NONE,
10666                     object, offset, FALSE,
10667                     VM_PROT_DEFAULT, VM_PROT_ALL,
10668                     VM_INHERIT_DEFAULT);
10669                 if (kr != KERN_SUCCESS) {
10670                         return kr;
10671                 }
10672                 /* Account for non-pagealigned copy object */
10673                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10674                 if (consume_on_success) {
10675                         zfree(vm_map_copy_zone, copy);
10676                 }
10677                 return KERN_SUCCESS;
10678         }
10679
10680         /*
10681          *      Check for special kernel buffer allocated
10682          *      by new_ipc_kmsg_copyin.
10683          */
10684
10685         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10686                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10687                            copy, copy_size, FALSE,
10688                            consume_on_success);
10689         }
10690
10691
10692         /*
10693          *      Find space for the data
10694          */
10695
10696         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10697             VM_MAP_COPY_PAGE_MASK(copy));
10698         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10699             VM_MAP_COPY_PAGE_MASK(copy))
10700             - vm_copy_start;
10701
10702
10703 StartAgain:;
10704
10705         vm_map_lock(dst_map);
10706         if (dst_map->disable_vmentry_reuse == TRUE) {
10707                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10708                 last = entry;
10709         } else {
10710                 if (dst_map->holelistenabled) {
10711                         hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10712
10713                         if (hole_entry == NULL) {
10714                                 /*
10715                                  * No more space in the map?
10716                                  */
10717                                 vm_map_unlock(dst_map);
10718                                 return KERN_NO_SPACE;
10719                         }
10720
10721                         last = hole_entry;
10722                         start = last->vme_start;
10723                 } else {
10724                         assert(first_free_is_valid(dst_map));
10725                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10726                             vm_map_min(dst_map) : last->vme_end;
10727                 }
10728                 start = vm_map_round_page(start,
10729                     VM_MAP_PAGE_MASK(dst_map));
10730         }
10731
10732         while (TRUE) {
10733                 vm_map_entry_t  next = last->vme_next;
10734                 vm_map_offset_t end = start + size;
10735
10736                 if ((end > dst_map->max_offset) || (end < start)) {
10737                         if (dst_map->wait_for_space) {
10738                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10739                                         assert_wait((event_t) dst_map,
10740                                             THREAD_INTERRUPTIBLE);
10741                                         vm_map_unlock(dst_map);
10742                                         thread_block(THREAD_CONTINUE_NULL);
10743                                         goto StartAgain;
10744                                 }
10745                         }
10746                         vm_map_unlock(dst_map);
10747                         return KERN_NO_SPACE;
10748                 }
10749
10750                 if (dst_map->holelistenabled) {
10751                         if (last->vme_end >= end) {
10752                                 break;
10753                         }
10754                 } else {
10755                         /*
10756                          *      If there are no more entries, we must win.
10757                          *
10758                          *      OR
10759                          *
10760                          *      If there is another entry, it must be
10761                          *      after the end of the potential new region.
10762                          */
10763
10764                         if (next == vm_map_to_entry(dst_map)) {
10765                                 break;
10766                         }
10767
10768                         if (next->vme_start >= end) {
10769                                 break;
10770                         }
10771                 }
10772
10773                 last = next;
10774
10775                 if (dst_map->holelistenabled) {
10776                         if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10777                                 /*
10778                                  * Wrapped around
10779                                  */
10780                                 vm_map_unlock(dst_map);
10781                                 return KERN_NO_SPACE;
10782                         }
10783                         start = last->vme_start;
10784                 } else {
10785                         start = last->vme_end;
10786                 }
10787                 start = vm_map_round_page(start,
10788                     VM_MAP_PAGE_MASK(dst_map));
10789         }
10790
10791         if (dst_map->holelistenabled) {
10792                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10793                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10794                 }
10795         }
10796
10797
10798         adjustment = start - vm_copy_start;
10799         if (!consume_on_success) {
10800                 /*
10801                  * We're not allowed to consume "copy", so we'll have to
10802                  * copy its map entries into the destination map below.
10803                  * No need to re-allocate map entries from the correct
10804                  * (pageable or not) zone, since we'll get new map entries
10805                  * during the transfer.
10806                  * We'll also adjust the map entries's "start" and "end"
10807                  * during the transfer, to keep "copy"'s entries consistent
10808                  * with its "offset".
10809                  */
10810                 goto after_adjustments;
10811         }
10812
10813         /*
10814          *      Since we're going to just drop the map
10815          *      entries from the copy into the destination
10816          *      map, they must come from the same pool.
10817          */
10818
10819         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10820                 /*
10821                  * Mismatches occur when dealing with the default
10822                  * pager.
10823                  */
10824                 zone_t          old_zone;
10825                 vm_map_entry_t  next, new;
10826
10827                 /*
10828                  * Find the zone that the copies were allocated from
10829                  */
10830
10831                 entry = vm_map_copy_first_entry(copy);
10832
10833                 /*
10834                  * Reinitialize the copy so that vm_map_copy_entry_link
10835                  * will work.
10836                  */
10837                 vm_map_store_copy_reset(copy, entry);
10838                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10839
10840                 /*
10841                  * Copy each entry.
10842                  */
10843                 while (entry != vm_map_copy_to_entry(copy)) {
10844                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10845                         vm_map_entry_copy_full(new, entry);
10846                         new->vme_no_copy_on_read = FALSE;
10847                         assert(!new->iokit_acct);
10848                         if (new->is_sub_map) {
10849                                 /* clr address space specifics */
10850                                 new->use_pmap = FALSE;
10851                         }
10852                         vm_map_copy_entry_link(copy,
10853                             vm_map_copy_last_entry(copy),
10854                             new);
10855                         next = entry->vme_next;
10856                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10857                         zfree(old_zone, entry);
10858                         entry = next;
10859                 }
10860         }
10861
10862         /*
10863          *      Adjust the addresses in the copy chain, and
10864          *      reset the region attributes.
10865          */
10866
10867         for (entry = vm_map_copy_first_entry(copy);
10868             entry != vm_map_copy_to_entry(copy);
10869             entry = entry->vme_next) {
10870                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10871                         /*
10872                          * We're injecting this copy entry into a map that
10873                          * has the standard page alignment, so clear
10874                          * "map_aligned" (which might have been inherited
10875                          * from the original map entry).
10876                          */
10877                         entry->map_aligned = FALSE;
10878                 }
10879
10880                 entry->vme_start += adjustment;
10881                 entry->vme_end += adjustment;
10882
10883                 if (entry->map_aligned) {
10884                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10885                             VM_MAP_PAGE_MASK(dst_map)));
10886                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10887                             VM_MAP_PAGE_MASK(dst_map)));
10888                 }
10889
10890                 entry->inheritance = VM_INHERIT_DEFAULT;
10891                 entry->protection = VM_PROT_DEFAULT;
10892                 entry->max_protection = VM_PROT_ALL;
10893                 entry->behavior = VM_BEHAVIOR_DEFAULT;
10894
10895                 /*
10896                  * If the entry is now wired,
10897                  * map the pages into the destination map.
10898                  */
10899                 if (entry->wired_count != 0) {
10900                         vm_map_offset_t va;
10901                         vm_object_offset_t       offset;
10902                         vm_object_t object;
10903                         vm_prot_t prot;
10904                         int     type_of_fault;
10905
10906                         object = VME_OBJECT(entry);
10907                         offset = VME_OFFSET(entry);
10908                         va = entry->vme_start;
10909
10910                         pmap_pageable(dst_map->pmap,
10911                             entry->vme_start,
10912                             entry->vme_end,
10913                             TRUE);
10914
10915                         while (va < entry->vme_end) {
10916                                 vm_page_t       m;
10917                                 struct vm_object_fault_info fault_info = {};
10918
10919                                 /*
10920                                  * Look up the page in the object.
10921                                  * Assert that the page will be found in the
10922                                  * top object:
10923                                  * either
10924                                  *      the object was newly created by
10925                                  *      vm_object_copy_slowly, and has
10926                                  *      copies of all of the pages from
10927                                  *      the source object
10928                                  * or
10929                                  *      the object was moved from the old
10930                                  *      map entry; because the old map
10931                                  *      entry was wired, all of the pages
10932                                  *      were in the top-level object.
10933                                  *      (XXX not true if we wire pages for
10934                                  *       reading)
10935                                  */
10936                                 vm_object_lock(object);
10937
10938                                 m = vm_page_lookup(object, offset);
10939                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10940                                     m->vmp_absent) {
10941                                         panic("vm_map_copyout: wiring %p", m);
10942                                 }
10943
10944                                 prot = entry->protection;
10945
10946                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10947                                     prot) {
10948                                         prot |= VM_PROT_EXECUTE;
10949                                 }
10950
10951                                 type_of_fault = DBG_CACHE_HIT_FAULT;
10952
10953                                 fault_info.user_tag = VME_ALIAS(entry);
10954                                 fault_info.pmap_options = 0;
10955                                 if (entry->iokit_acct ||
10956                                     (!entry->is_sub_map && !entry->use_pmap)) {
10957                                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10958                                 }
10959
10960                                 vm_fault_enter(m,
10961                                     dst_map->pmap,
10962                                     va,
10963                                     prot,
10964                                     prot,
10965                                     VM_PAGE_WIRED(m),
10966                                     FALSE,            /* change_wiring */
10967                                     VM_KERN_MEMORY_NONE,            /* tag - not wiring */
10968                                     &fault_info,
10969                                     NULL,             /* need_retry */
10970                                     &type_of_fault);
10971
10972                                 vm_object_unlock(object);
10973
10974                                 offset += PAGE_SIZE_64;
10975                                 va += PAGE_SIZE;
10976                         }
10977                 }
10978         }
10979
10980 after_adjustments:
10981
10982         /*
10983          *      Correct the page alignment for the result
10984          */
10985
10986         *dst_addr = start + (copy->offset - vm_copy_start);
10987
10988 #if KASAN
10989         kasan_notify_address(*dst_addr, size);
10990 #endif
10991
10992         /*
10993          *      Update the hints and the map size
10994          */
10995
10996         if (consume_on_success) {
10997                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10998         } else {
10999                 SAVE_HINT_MAP_WRITE(dst_map, last);
11000         }
11001
11002         dst_map->size += size;
11003
11004         /*
11005          *      Link in the copy
11006          */
11007
11008         if (consume_on_success) {
11009                 vm_map_copy_insert(dst_map, last, copy);
11010         } else {
11011                 vm_map_copy_remap(dst_map, last, copy, adjustment,
11012                     cur_protection, max_protection,
11013                     inheritance);
11014         }
11015
11016         vm_map_unlock(dst_map);
11017
11018         /*
11019          * XXX  If wiring_required, call vm_map_pageable
11020          */
11021
11022         return KERN_SUCCESS;
11023 }
11024
11025 /*
11026  *      Routine:        vm_map_copyin
11027  *
11028  *      Description:
11029  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
11030  *
11031  */
11032
11033 #undef vm_map_copyin
11034
11035 kern_return_t
11036 vm_map_copyin(
11037         vm_map_t                        src_map,
11038         vm_map_address_t        src_addr,
11039         vm_map_size_t           len,
11040         boolean_t                       src_destroy,
11041         vm_map_copy_t           *copy_result)   /* OUT */
11042 {
11043         return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11044                    FALSE, copy_result, FALSE);
11045 }
11046
11047 /*
11048  *      Routine:        vm_map_copyin_common
11049  *
11050  *      Description:
11051  *              Copy the specified region (src_addr, len) from the
11052  *              source address space (src_map), possibly removing
11053  *              the region from the source address space (src_destroy).
11054  *
11055  *      Returns:
11056  *              A vm_map_copy_t object (copy_result), suitable for
11057  *              insertion into another address space (using vm_map_copyout),
11058  *              copying over another address space region (using
11059  *              vm_map_copy_overwrite).  If the copy is unused, it
11060  *              should be destroyed (using vm_map_copy_discard).
11061  *
11062  *      In/out conditions:
11063  *              The source map should not be locked on entry.
11064  */
11065
11066 typedef struct submap_map {
11067         vm_map_t        parent_map;
11068         vm_map_offset_t base_start;
11069         vm_map_offset_t base_end;
11070         vm_map_size_t   base_len;
11071         struct submap_map *next;
11072 } submap_map_t;
11073
11074 kern_return_t
11075 vm_map_copyin_common(
11076         vm_map_t        src_map,
11077         vm_map_address_t src_addr,
11078         vm_map_size_t   len,
11079         boolean_t       src_destroy,
11080         __unused boolean_t      src_volatile,
11081         vm_map_copy_t   *copy_result,   /* OUT */
11082         boolean_t       use_maxprot)
11083 {
11084         int flags;
11085
11086         flags = 0;
11087         if (src_destroy) {
11088                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11089         }
11090         if (use_maxprot) {
11091                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11092         }
11093         return vm_map_copyin_internal(src_map,
11094                    src_addr,
11095                    len,
11096                    flags,
11097                    copy_result);
11098 }
11099 kern_return_t
11100 vm_map_copyin_internal(
11101         vm_map_t        src_map,
11102         vm_map_address_t src_addr,
11103         vm_map_size_t   len,
11104         int             flags,
11105         vm_map_copy_t   *copy_result)   /* OUT */
11106 {
11107         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
11108                                          * in multi-level lookup, this
11109                                          * entry contains the actual
11110                                          * vm_object/offset.
11111                                          */
11112         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
11113
11114         vm_map_offset_t src_start;      /* Start of current entry --
11115                                          * where copy is taking place now
11116                                          */
11117         vm_map_offset_t src_end;        /* End of entire region to be
11118                                          * copied */
11119         vm_map_offset_t src_base;
11120         vm_map_t        base_map = src_map;
11121         boolean_t       map_share = FALSE;
11122         submap_map_t    *parent_maps = NULL;
11123
11124         vm_map_copy_t   copy;           /* Resulting copy */
11125         vm_map_address_t copy_addr;
11126         vm_map_size_t   copy_size;
11127         boolean_t       src_destroy;
11128         boolean_t       use_maxprot;
11129         boolean_t       preserve_purgeable;
11130         boolean_t       entry_was_shared;
11131         vm_map_entry_t  saved_src_entry;
11132
11133         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11134                 return KERN_INVALID_ARGUMENT;
11135         }
11136
11137         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11138         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11139         preserve_purgeable =
11140             (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11141
11142         /*
11143          *      Check for copies of zero bytes.
11144          */
11145
11146         if (len == 0) {
11147                 *copy_result = VM_MAP_COPY_NULL;
11148                 return KERN_SUCCESS;
11149         }
11150
11151         /*
11152          *      Check that the end address doesn't overflow
11153          */
11154         src_end = src_addr + len;
11155         if (src_end < src_addr) {
11156                 return KERN_INVALID_ADDRESS;
11157         }
11158
11159         /*
11160          *      Compute (page aligned) start and end of region
11161          */
11162         src_start = vm_map_trunc_page(src_addr,
11163             VM_MAP_PAGE_MASK(src_map));
11164         src_end = vm_map_round_page(src_end,
11165             VM_MAP_PAGE_MASK(src_map));
11166
11167         /*
11168          * If the copy is sufficiently small, use a kernel buffer instead
11169          * of making a virtual copy.  The theory being that the cost of
11170          * setting up VM (and taking C-O-W faults) dominates the copy costs
11171          * for small regions.
11172          */
11173         if ((len < msg_ool_size_small) &&
11174             !use_maxprot &&
11175             !preserve_purgeable &&
11176             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11177             /*
11178              * Since the "msg_ool_size_small" threshold was increased and
11179              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11180              * address space limits, we revert to doing a virtual copy if the
11181              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
11182              * of the commpage would now fail when it used to work.
11183              */
11184             (src_start >= vm_map_min(src_map) &&
11185             src_start < vm_map_max(src_map) &&
11186             src_end >= vm_map_min(src_map) &&
11187             src_end < vm_map_max(src_map))) {
11188                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11189                            src_destroy, copy_result);
11190         }
11191
11192         /*
11193          *      Allocate a header element for the list.
11194          *
11195          *      Use the start and end in the header to
11196          *      remember the endpoints prior to rounding.
11197          */
11198
11199         copy = vm_map_copy_allocate();
11200         copy->type = VM_MAP_COPY_ENTRY_LIST;
11201         copy->cpy_hdr.entries_pageable = TRUE;
11202 #if 00
11203         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11204 #else
11205         /*
11206          * The copy entries can be broken down for a variety of reasons,
11207          * so we can't guarantee that they will remain map-aligned...
11208          * Will need to adjust the first copy_entry's "vme_start" and
11209          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11210          * rather than the original map's alignment.
11211          */
11212         copy->cpy_hdr.page_shift = PAGE_SHIFT;
11213 #endif
11214
11215         vm_map_store_init( &(copy->cpy_hdr));
11216
11217         copy->offset = src_addr;
11218         copy->size = len;
11219
11220         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11221
11222 #define RETURN(x)                                               \
11223         MACRO_BEGIN                                             \
11224         vm_map_unlock(src_map);                                 \
11225         if(src_map != base_map)                                 \
11226                 vm_map_deallocate(src_map);                     \
11227         if (new_entry != VM_MAP_ENTRY_NULL)                     \
11228                 vm_map_copy_entry_dispose(copy,new_entry);      \
11229         vm_map_copy_discard(copy);                              \
11230         {                                                       \
11231                 submap_map_t    *_ptr;                          \
11232                                                                 \
11233                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11234                         parent_maps=parent_maps->next;          \
11235                         if (_ptr->parent_map != base_map)       \
11236                                 vm_map_deallocate(_ptr->parent_map);    \
11237                         kfree(_ptr, sizeof(submap_map_t));      \
11238                 }                                               \
11239         }                                                       \
11240         MACRO_RETURN(x);                                        \
11241         MACRO_END
11242
11243         /*
11244          *      Find the beginning of the region.
11245          */
11246
11247         vm_map_lock(src_map);
11248
11249         /*
11250          * Lookup the original "src_addr" rather than the truncated
11251          * "src_start", in case "src_start" falls in a non-map-aligned
11252          * map entry *before* the map entry that contains "src_addr"...
11253          */
11254         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11255                 RETURN(KERN_INVALID_ADDRESS);
11256         }
11257         if (!tmp_entry->is_sub_map) {
11258                 /*
11259                  * ... but clip to the map-rounded "src_start" rather than
11260                  * "src_addr" to preserve map-alignment.  We'll adjust the
11261                  * first copy entry at the end, if needed.
11262                  */
11263                 vm_map_clip_start(src_map, tmp_entry, src_start);
11264         }
11265         if (src_start < tmp_entry->vme_start) {
11266                 /*
11267                  * Move "src_start" up to the start of the
11268                  * first map entry to copy.
11269                  */
11270                 src_start = tmp_entry->vme_start;
11271         }
11272         /* set for later submap fix-up */
11273         copy_addr = src_start;
11274
11275         /*
11276          *      Go through entries until we get to the end.
11277          */
11278
11279         while (TRUE) {
11280                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11281                 vm_map_size_t   src_size;               /* Size of source
11282                                                          * map entry (in both
11283                                                          * maps)
11284                                                          */
11285
11286                 vm_object_t             src_object;     /* Object to copy */
11287                 vm_object_offset_t      src_offset;
11288
11289                 boolean_t       src_needs_copy;         /* Should source map
11290                                                          * be made read-only
11291                                                          * for copy-on-write?
11292                                                          */
11293
11294                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11295
11296                 boolean_t       was_wired;              /* Was source wired? */
11297                 vm_map_version_t version;               /* Version before locks
11298                                                          * dropped to make copy
11299                                                          */
11300                 kern_return_t   result;                 /* Return value from
11301                                                          * copy_strategically.
11302                                                          */
11303                 while (tmp_entry->is_sub_map) {
11304                         vm_map_size_t submap_len;
11305                         submap_map_t *ptr;
11306
11307                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11308                         ptr->next = parent_maps;
11309                         parent_maps = ptr;
11310                         ptr->parent_map = src_map;
11311                         ptr->base_start = src_start;
11312                         ptr->base_end = src_end;
11313                         submap_len = tmp_entry->vme_end - src_start;
11314                         if (submap_len > (src_end - src_start)) {
11315                                 submap_len = src_end - src_start;
11316                         }
11317                         ptr->base_len = submap_len;
11318
11319                         src_start -= tmp_entry->vme_start;
11320                         src_start += VME_OFFSET(tmp_entry);
11321                         src_end = src_start + submap_len;
11322                         src_map = VME_SUBMAP(tmp_entry);
11323                         vm_map_lock(src_map);
11324                         /* keep an outstanding reference for all maps in */
11325                         /* the parents tree except the base map */
11326                         vm_map_reference(src_map);
11327                         vm_map_unlock(ptr->parent_map);
11328                         if (!vm_map_lookup_entry(
11329                                     src_map, src_start, &tmp_entry)) {
11330                                 RETURN(KERN_INVALID_ADDRESS);
11331                         }
11332                         map_share = TRUE;
11333                         if (!tmp_entry->is_sub_map) {
11334                                 vm_map_clip_start(src_map, tmp_entry, src_start);
11335                         }
11336                         src_entry = tmp_entry;
11337                 }
11338                 /* we are now in the lowest level submap... */
11339
11340                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11341                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11342                         /* This is not, supported for now.In future */
11343                         /* we will need to detect the phys_contig   */
11344                         /* condition and then upgrade copy_slowly   */
11345                         /* to do physical copy from the device mem  */
11346                         /* based object. We can piggy-back off of   */
11347                         /* the was wired boolean to set-up the      */
11348                         /* proper handling */
11349                         RETURN(KERN_PROTECTION_FAILURE);
11350                 }
11351                 /*
11352                  *      Create a new address map entry to hold the result.
11353                  *      Fill in the fields from the appropriate source entries.
11354                  *      We must unlock the source map to do this if we need
11355                  *      to allocate a map entry.
11356                  */
11357                 if (new_entry == VM_MAP_ENTRY_NULL) {
11358                         version.main_timestamp = src_map->timestamp;
11359                         vm_map_unlock(src_map);
11360
11361                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11362
11363                         vm_map_lock(src_map);
11364                         if ((version.main_timestamp + 1) != src_map->timestamp) {
11365                                 if (!vm_map_lookup_entry(src_map, src_start,
11366                                     &tmp_entry)) {
11367                                         RETURN(KERN_INVALID_ADDRESS);
11368                                 }
11369                                 if (!tmp_entry->is_sub_map) {
11370                                         vm_map_clip_start(src_map, tmp_entry, src_start);
11371                                 }
11372                                 continue; /* restart w/ new tmp_entry */
11373                         }
11374                 }
11375
11376                 /*
11377                  *      Verify that the region can be read.
11378                  */
11379                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11380                     !use_maxprot) ||
11381                     (src_entry->max_protection & VM_PROT_READ) == 0) {
11382                         RETURN(KERN_PROTECTION_FAILURE);
11383                 }
11384
11385                 /*
11386                  *      Clip against the endpoints of the entire region.
11387                  */
11388
11389                 vm_map_clip_end(src_map, src_entry, src_end);
11390
11391                 src_size = src_entry->vme_end - src_start;
11392                 src_object = VME_OBJECT(src_entry);
11393                 src_offset = VME_OFFSET(src_entry);
11394                 was_wired = (src_entry->wired_count != 0);
11395
11396                 vm_map_entry_copy(new_entry, src_entry);
11397                 if (new_entry->is_sub_map) {
11398                         /* clr address space specifics */
11399                         new_entry->use_pmap = FALSE;
11400                 } else {
11401                         /*
11402                          * We're dealing with a copy-on-write operation,
11403                          * so the resulting mapping should not inherit the
11404                          * original mapping's accounting settings.
11405                          * "iokit_acct" should have been cleared in
11406                          * vm_map_entry_copy().
11407                          * "use_pmap" should be reset to its default (TRUE)
11408                          * so that the new mapping gets accounted for in
11409                          * the task's memory footprint.
11410                          */
11411                         assert(!new_entry->iokit_acct);
11412                         new_entry->use_pmap = TRUE;
11413                 }
11414
11415                 /*
11416                  *      Attempt non-blocking copy-on-write optimizations.
11417                  */
11418
11419                 /*
11420                  * If we are destroying the source, and the object
11421                  * is internal, we could move the object reference
11422                  * from the source to the copy.  The copy is
11423                  * copy-on-write only if the source is.
11424                  * We make another reference to the object, because
11425                  * destroying the source entry will deallocate it.
11426                  *
11427                  * This memory transfer has to be atomic, (to prevent
11428                  * the VM object from being shared or copied while
11429                  * it's being moved here), so we could only do this
11430                  * if we won't have to unlock the VM map until the
11431                  * original mapping has been fully removed.
11432                  */
11433
11434 RestartCopy:
11435                 if ((src_object == VM_OBJECT_NULL ||
11436                     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11437                     vm_object_copy_quickly(
11438                             VME_OBJECT_PTR(new_entry),
11439                             src_offset,
11440                             src_size,
11441                             &src_needs_copy,
11442                             &new_entry_needs_copy)) {
11443                         new_entry->needs_copy = new_entry_needs_copy;
11444
11445                         /*
11446                          *      Handle copy-on-write obligations
11447                          */
11448
11449                         if (src_needs_copy && !tmp_entry->needs_copy) {
11450                                 vm_prot_t prot;
11451
11452                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11453
11454                                 if (override_nx(src_map, VME_ALIAS(src_entry))
11455                                     && prot) {
11456                                         prot |= VM_PROT_EXECUTE;
11457                                 }
11458
11459                                 vm_object_pmap_protect(
11460                                         src_object,
11461                                         src_offset,
11462                                         src_size,
11463                                         (src_entry->is_shared ?
11464                                         PMAP_NULL
11465                                         : src_map->pmap),
11466                                         src_entry->vme_start,
11467                                         prot);
11468
11469                                 assert(tmp_entry->wired_count == 0);
11470                                 tmp_entry->needs_copy = TRUE;
11471                         }
11472
11473                         /*
11474                          *      The map has never been unlocked, so it's safe
11475                          *      to move to the next entry rather than doing
11476                          *      another lookup.
11477                          */
11478
11479                         goto CopySuccessful;
11480                 }
11481
11482                 entry_was_shared = tmp_entry->is_shared;
11483
11484                 /*
11485                  *      Take an object reference, so that we may
11486                  *      release the map lock(s).
11487                  */
11488
11489                 assert(src_object != VM_OBJECT_NULL);
11490                 vm_object_reference(src_object);
11491
11492                 /*
11493                  *      Record the timestamp for later verification.
11494                  *      Unlock the map.
11495                  */
11496
11497                 version.main_timestamp = src_map->timestamp;
11498                 vm_map_unlock(src_map); /* Increments timestamp once! */
11499                 saved_src_entry = src_entry;
11500                 tmp_entry = VM_MAP_ENTRY_NULL;
11501                 src_entry = VM_MAP_ENTRY_NULL;
11502
11503                 /*
11504                  *      Perform the copy
11505                  */
11506
11507                 if (was_wired) {
11508 CopySlowly:
11509                         vm_object_lock(src_object);
11510                         result = vm_object_copy_slowly(
11511                                 src_object,
11512                                 src_offset,
11513                                 src_size,
11514                                 THREAD_UNINT,
11515                                 VME_OBJECT_PTR(new_entry));
11516                         VME_OFFSET_SET(new_entry, 0);
11517                         new_entry->needs_copy = FALSE;
11518                 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11519                     (entry_was_shared || map_share)) {
11520                         vm_object_t new_object;
11521
11522                         vm_object_lock_shared(src_object);
11523                         new_object = vm_object_copy_delayed(
11524                                 src_object,
11525                                 src_offset,
11526                                 src_size,
11527                                 TRUE);
11528                         if (new_object == VM_OBJECT_NULL) {
11529                                 goto CopySlowly;
11530                         }
11531
11532                         VME_OBJECT_SET(new_entry, new_object);
11533                         assert(new_entry->wired_count == 0);
11534                         new_entry->needs_copy = TRUE;
11535                         assert(!new_entry->iokit_acct);
11536                         assert(new_object->purgable == VM_PURGABLE_DENY);
11537                         assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11538                         result = KERN_SUCCESS;
11539                 } else {
11540                         vm_object_offset_t new_offset;
11541                         new_offset = VME_OFFSET(new_entry);
11542                         result = vm_object_copy_strategically(src_object,
11543                             src_offset,
11544                             src_size,
11545                             VME_OBJECT_PTR(new_entry),
11546                             &new_offset,
11547                             &new_entry_needs_copy);
11548                         if (new_offset != VME_OFFSET(new_entry)) {
11549                                 VME_OFFSET_SET(new_entry, new_offset);
11550                         }
11551
11552                         new_entry->needs_copy = new_entry_needs_copy;
11553                 }
11554
11555                 if (result == KERN_SUCCESS &&
11556                     preserve_purgeable &&
11557                     src_object->purgable != VM_PURGABLE_DENY) {
11558                         vm_object_t     new_object;
11559
11560                         new_object = VME_OBJECT(new_entry);
11561                         assert(new_object != src_object);
11562                         vm_object_lock(new_object);
11563                         assert(new_object->ref_count == 1);
11564                         assert(new_object->shadow == VM_OBJECT_NULL);
11565                         assert(new_object->copy == VM_OBJECT_NULL);
11566                         assert(new_object->vo_owner == NULL);
11567
11568                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11569                         new_object->true_share = TRUE;
11570                         /* start as non-volatile with no owner... */
11571                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
11572                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11573                         /* ... and move to src_object's purgeable state */
11574                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11575                                 int state;
11576                                 state = src_object->purgable;
11577                                 vm_object_purgable_control(
11578                                         new_object,
11579                                         VM_PURGABLE_SET_STATE_FROM_KERNEL,
11580                                         &state);
11581                         }
11582                         vm_object_unlock(new_object);
11583                         new_object = VM_OBJECT_NULL;
11584                         /* no pmap accounting for purgeable objects */
11585                         new_entry->use_pmap = FALSE;
11586                 }
11587
11588                 if (result != KERN_SUCCESS &&
11589                     result != KERN_MEMORY_RESTART_COPY) {
11590                         vm_map_lock(src_map);
11591                         RETURN(result);
11592                 }
11593
11594                 /*
11595                  *      Throw away the extra reference
11596                  */
11597
11598                 vm_object_deallocate(src_object);
11599
11600                 /*
11601                  *      Verify that the map has not substantially
11602                  *      changed while the copy was being made.
11603                  */
11604
11605                 vm_map_lock(src_map);
11606
11607                 if ((version.main_timestamp + 1) == src_map->timestamp) {
11608                         /* src_map hasn't changed: src_entry is still valid */
11609                         src_entry = saved_src_entry;
11610                         goto VerificationSuccessful;
11611                 }
11612
11613                 /*
11614                  *      Simple version comparison failed.
11615                  *
11616                  *      Retry the lookup and verify that the
11617                  *      same object/offset are still present.
11618                  *
11619                  *      [Note: a memory manager that colludes with
11620                  *      the calling task can detect that we have
11621                  *      cheated.  While the map was unlocked, the
11622                  *      mapping could have been changed and restored.]
11623                  */
11624
11625                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11626                         if (result != KERN_MEMORY_RESTART_COPY) {
11627                                 vm_object_deallocate(VME_OBJECT(new_entry));
11628                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11629                                 /* reset accounting state */
11630                                 new_entry->iokit_acct = FALSE;
11631                                 new_entry->use_pmap = TRUE;
11632                         }
11633                         RETURN(KERN_INVALID_ADDRESS);
11634                 }
11635
11636                 src_entry = tmp_entry;
11637                 vm_map_clip_start(src_map, src_entry, src_start);
11638
11639                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11640                     !use_maxprot) ||
11641                     ((src_entry->max_protection & VM_PROT_READ) == 0)) {
11642                         goto VerificationFailed;
11643                 }
11644
11645                 if (src_entry->vme_end < new_entry->vme_end) {
11646                         /*
11647                          * This entry might have been shortened
11648                          * (vm_map_clip_end) or been replaced with
11649                          * an entry that ends closer to "src_start"
11650                          * than before.
11651                          * Adjust "new_entry" accordingly; copying
11652                          * less memory would be correct but we also
11653                          * redo the copy (see below) if the new entry
11654                          * no longer points at the same object/offset.
11655                          */
11656                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11657                             VM_MAP_COPY_PAGE_MASK(copy)));
11658                         new_entry->vme_end = src_entry->vme_end;
11659                         src_size = new_entry->vme_end - src_start;
11660                 } else if (src_entry->vme_end > new_entry->vme_end) {
11661                         /*
11662                          * This entry might have been extended
11663                          * (vm_map_entry_simplify() or coalesce)
11664                          * or been replaced with an entry that ends farther
11665                          * from "src_start" than before.
11666                          *
11667                          * We've called vm_object_copy_*() only on
11668                          * the previous <start:end> range, so we can't
11669                          * just extend new_entry.  We have to re-do
11670                          * the copy based on the new entry as if it was
11671                          * pointing at a different object/offset (see
11672                          * "Verification failed" below).
11673                          */
11674                 }
11675
11676                 if ((VME_OBJECT(src_entry) != src_object) ||
11677                     (VME_OFFSET(src_entry) != src_offset) ||
11678                     (src_entry->vme_end > new_entry->vme_end)) {
11679                         /*
11680                          *      Verification failed.
11681                          *
11682                          *      Start over with this top-level entry.
11683                          */
11684
11685 VerificationFailed:     ;
11686
11687                         vm_object_deallocate(VME_OBJECT(new_entry));
11688                         tmp_entry = src_entry;
11689                         continue;
11690                 }
11691
11692                 /*
11693                  *      Verification succeeded.
11694                  */
11695
11696 VerificationSuccessful:;
11697
11698                 if (result == KERN_MEMORY_RESTART_COPY) {
11699                         goto RestartCopy;
11700                 }
11701
11702                 /*
11703                  *      Copy succeeded.
11704                  */
11705
11706 CopySuccessful: ;
11707
11708                 /*
11709                  *      Link in the new copy entry.
11710                  */
11711
11712                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11713                     new_entry);
11714
11715                 /*
11716                  *      Determine whether the entire region
11717                  *      has been copied.
11718                  */
11719                 src_base = src_start;
11720                 src_start = new_entry->vme_end;
11721                 new_entry = VM_MAP_ENTRY_NULL;
11722                 while ((src_start >= src_end) && (src_end != 0)) {
11723                         submap_map_t    *ptr;
11724
11725                         if (src_map == base_map) {
11726                                 /* back to the top */
11727                                 break;
11728                         }
11729
11730                         ptr = parent_maps;
11731                         assert(ptr != NULL);
11732                         parent_maps = parent_maps->next;
11733
11734                         /* fix up the damage we did in that submap */
11735                         vm_map_simplify_range(src_map,
11736                             src_base,
11737                             src_end);
11738
11739                         vm_map_unlock(src_map);
11740                         vm_map_deallocate(src_map);
11741                         vm_map_lock(ptr->parent_map);
11742                         src_map = ptr->parent_map;
11743                         src_base = ptr->base_start;
11744                         src_start = ptr->base_start + ptr->base_len;
11745                         src_end = ptr->base_end;
11746                         if (!vm_map_lookup_entry(src_map,
11747                             src_start,
11748                             &tmp_entry) &&
11749                             (src_end > src_start)) {
11750                                 RETURN(KERN_INVALID_ADDRESS);
11751                         }
11752                         kfree(ptr, sizeof(submap_map_t));
11753                         if (parent_maps == NULL) {
11754                                 map_share = FALSE;
11755                         }
11756                         src_entry = tmp_entry->vme_prev;
11757                 }
11758
11759                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11760                     (src_start >= src_addr + len) &&
11761                     (src_addr + len != 0)) {
11762                         /*
11763                          * Stop copying now, even though we haven't reached
11764                          * "src_end".  We'll adjust the end of the last copy
11765                          * entry at the end, if needed.
11766                          *
11767                          * If src_map's aligment is different from the
11768                          * system's page-alignment, there could be
11769                          * extra non-map-aligned map entries between
11770                          * the original (non-rounded) "src_addr + len"
11771                          * and the rounded "src_end".
11772                          * We do not want to copy those map entries since
11773                          * they're not part of the copied range.
11774                          */
11775                         break;
11776                 }
11777
11778                 if ((src_start >= src_end) && (src_end != 0)) {
11779                         break;
11780                 }
11781
11782                 /*
11783                  *      Verify that there are no gaps in the region
11784                  */
11785
11786                 tmp_entry = src_entry->vme_next;
11787                 if ((tmp_entry->vme_start != src_start) ||
11788                     (tmp_entry == vm_map_to_entry(src_map))) {
11789                         RETURN(KERN_INVALID_ADDRESS);
11790                 }
11791         }
11792
11793         /*
11794          * If the source should be destroyed, do it now, since the
11795          * copy was successful.
11796          */
11797         if (src_destroy) {
11798                 (void) vm_map_delete(
11799                         src_map,
11800                         vm_map_trunc_page(src_addr,
11801                         VM_MAP_PAGE_MASK(src_map)),
11802                         src_end,
11803                         ((src_map == kernel_map) ?
11804                         VM_MAP_REMOVE_KUNWIRE :
11805                         VM_MAP_REMOVE_NO_FLAGS),
11806                         VM_MAP_NULL);
11807         } else {
11808                 /* fix up the damage we did in the base map */
11809                 vm_map_simplify_range(
11810                         src_map,
11811                         vm_map_trunc_page(src_addr,
11812                         VM_MAP_PAGE_MASK(src_map)),
11813                         vm_map_round_page(src_end,
11814                         VM_MAP_PAGE_MASK(src_map)));
11815         }
11816
11817         vm_map_unlock(src_map);
11818         tmp_entry = VM_MAP_ENTRY_NULL;
11819
11820         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11821                 vm_map_offset_t original_start, original_offset, original_end;
11822
11823                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11824
11825                 /* adjust alignment of first copy_entry's "vme_start" */
11826                 tmp_entry = vm_map_copy_first_entry(copy);
11827                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11828                         vm_map_offset_t adjustment;
11829
11830                         original_start = tmp_entry->vme_start;
11831                         original_offset = VME_OFFSET(tmp_entry);
11832
11833                         /* map-align the start of the first copy entry... */
11834                         adjustment = (tmp_entry->vme_start -
11835                             vm_map_trunc_page(
11836                                     tmp_entry->vme_start,
11837                                     VM_MAP_PAGE_MASK(src_map)));
11838                         tmp_entry->vme_start -= adjustment;
11839                         VME_OFFSET_SET(tmp_entry,
11840                             VME_OFFSET(tmp_entry) - adjustment);
11841                         copy_addr -= adjustment;
11842                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
11843                         /* ... adjust for mis-aligned start of copy range */
11844                         adjustment =
11845                             (vm_map_trunc_page(copy->offset,
11846                             PAGE_MASK) -
11847                             vm_map_trunc_page(copy->offset,
11848                             VM_MAP_PAGE_MASK(src_map)));
11849                         if (adjustment) {
11850                                 assert(page_aligned(adjustment));
11851                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11852                                 tmp_entry->vme_start += adjustment;
11853                                 VME_OFFSET_SET(tmp_entry,
11854                                     (VME_OFFSET(tmp_entry) +
11855                                     adjustment));
11856                                 copy_addr += adjustment;
11857                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11858                         }
11859
11860                         /*
11861                          * Assert that the adjustments haven't exposed
11862                          * more than was originally copied...
11863                          */
11864                         assert(tmp_entry->vme_start >= original_start);
11865                         assert(VME_OFFSET(tmp_entry) >= original_offset);
11866                         /*
11867                          * ... and that it did not adjust outside of a
11868                          * a single 16K page.
11869                          */
11870                         assert(vm_map_trunc_page(tmp_entry->vme_start,
11871                             VM_MAP_PAGE_MASK(src_map)) ==
11872                             vm_map_trunc_page(original_start,
11873                             VM_MAP_PAGE_MASK(src_map)));
11874                 }
11875
11876                 /* adjust alignment of last copy_entry's "vme_end" */
11877                 tmp_entry = vm_map_copy_last_entry(copy);
11878                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11879                         vm_map_offset_t adjustment;
11880
11881                         original_end = tmp_entry->vme_end;
11882
11883                         /* map-align the end of the last copy entry... */
11884                         tmp_entry->vme_end =
11885                             vm_map_round_page(tmp_entry->vme_end,
11886                             VM_MAP_PAGE_MASK(src_map));
11887                         /* ... adjust for mis-aligned end of copy range */
11888                         adjustment =
11889                             (vm_map_round_page((copy->offset +
11890                             copy->size),
11891                             VM_MAP_PAGE_MASK(src_map)) -
11892                             vm_map_round_page((copy->offset +
11893                             copy->size),
11894                             PAGE_MASK));
11895                         if (adjustment) {
11896                                 assert(page_aligned(adjustment));
11897                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11898                                 tmp_entry->vme_end -= adjustment;
11899                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11900                         }
11901
11902                         /*
11903                          * Assert that the adjustments haven't exposed
11904                          * more than was originally copied...
11905                          */
11906                         assert(tmp_entry->vme_end <= original_end);
11907                         /*
11908                          * ... and that it did not adjust outside of a
11909                          * a single 16K page.
11910                          */
11911                         assert(vm_map_round_page(tmp_entry->vme_end,
11912                             VM_MAP_PAGE_MASK(src_map)) ==
11913                             vm_map_round_page(original_end,
11914                             VM_MAP_PAGE_MASK(src_map)));
11915                 }
11916         }
11917
11918         /* Fix-up start and end points in copy.  This is necessary */
11919         /* when the various entries in the copy object were picked */
11920         /* up from different sub-maps */
11921
11922         tmp_entry = vm_map_copy_first_entry(copy);
11923         copy_size = 0; /* compute actual size */
11924         while (tmp_entry != vm_map_copy_to_entry(copy)) {
11925                 assert(VM_MAP_PAGE_ALIGNED(
11926                             copy_addr + (tmp_entry->vme_end -
11927                             tmp_entry->vme_start),
11928                             VM_MAP_COPY_PAGE_MASK(copy)));
11929                 assert(VM_MAP_PAGE_ALIGNED(
11930                             copy_addr,
11931                             VM_MAP_COPY_PAGE_MASK(copy)));
11932
11933                 /*
11934                  * The copy_entries will be injected directly into the
11935                  * destination map and might not be "map aligned" there...
11936                  */
11937                 tmp_entry->map_aligned = FALSE;
11938
11939                 tmp_entry->vme_end = copy_addr +
11940                     (tmp_entry->vme_end - tmp_entry->vme_start);
11941                 tmp_entry->vme_start = copy_addr;
11942                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11943                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11944                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11945                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11946         }
11947
11948         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11949             copy_size < copy->size) {
11950                 /*
11951                  * The actual size of the VM map copy is smaller than what
11952                  * was requested by the caller.  This must be because some
11953                  * PAGE_SIZE-sized pages are missing at the end of the last
11954                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11955                  * The caller might not have been aware of those missing
11956                  * pages and might not want to be aware of it, which is
11957                  * fine as long as they don't try to access (and crash on)
11958                  * those missing pages.
11959                  * Let's adjust the size of the "copy", to avoid failing
11960                  * in vm_map_copyout() or vm_map_copy_overwrite().
11961                  */
11962                 assert(vm_map_round_page(copy_size,
11963                     VM_MAP_PAGE_MASK(src_map)) ==
11964                     vm_map_round_page(copy->size,
11965                     VM_MAP_PAGE_MASK(src_map)));
11966                 copy->size = copy_size;
11967         }
11968
11969         *copy_result = copy;
11970         return KERN_SUCCESS;
11971
11972 #undef  RETURN
11973 }
11974
11975 kern_return_t
11976 vm_map_copy_extract(
11977         vm_map_t                src_map,
11978         vm_map_address_t        src_addr,
11979         vm_map_size_t           len,
11980         vm_map_copy_t           *copy_result,   /* OUT */
11981         vm_prot_t               *cur_prot,      /* OUT */
11982         vm_prot_t               *max_prot)
11983 {
11984         vm_map_offset_t src_start, src_end;
11985         vm_map_copy_t   copy;
11986         kern_return_t   kr;
11987
11988         /*
11989          *      Check for copies of zero bytes.
11990          */
11991
11992         if (len == 0) {
11993                 *copy_result = VM_MAP_COPY_NULL;
11994                 return KERN_SUCCESS;
11995         }
11996
11997         /*
11998          *      Check that the end address doesn't overflow
11999          */
12000         src_end = src_addr + len;
12001         if (src_end < src_addr) {
12002                 return KERN_INVALID_ADDRESS;
12003         }
12004
12005         /*
12006          *      Compute (page aligned) start and end of region
12007          */
12008         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
12009         src_end = vm_map_round_page(src_end, PAGE_MASK);
12010
12011         /*
12012          *      Allocate a header element for the list.
12013          *
12014          *      Use the start and end in the header to
12015          *      remember the endpoints prior to rounding.
12016          */
12017
12018         copy = vm_map_copy_allocate();
12019         copy->type = VM_MAP_COPY_ENTRY_LIST;
12020         copy->cpy_hdr.entries_pageable = TRUE;
12021
12022         vm_map_store_init(&copy->cpy_hdr);
12023
12024         copy->offset = 0;
12025         copy->size = len;
12026
12027         kr = vm_map_remap_extract(src_map,
12028             src_addr,
12029             len,
12030             FALSE,                       /* copy */
12031             &copy->cpy_hdr,
12032             cur_prot,
12033             max_prot,
12034             VM_INHERIT_SHARE,
12035             TRUE,                       /* pageable */
12036             FALSE,                       /* same_map */
12037             VM_MAP_KERNEL_FLAGS_NONE);
12038         if (kr != KERN_SUCCESS) {
12039                 vm_map_copy_discard(copy);
12040                 return kr;
12041         }
12042
12043         *copy_result = copy;
12044         return KERN_SUCCESS;
12045 }
12046
12047 /*
12048  *      vm_map_copyin_object:
12049  *
12050  *      Create a copy object from an object.
12051  *      Our caller donates an object reference.
12052  */
12053
12054 kern_return_t
12055 vm_map_copyin_object(
12056         vm_object_t             object,
12057         vm_object_offset_t      offset, /* offset of region in object */
12058         vm_object_size_t        size,   /* size of region in object */
12059         vm_map_copy_t   *copy_result)   /* OUT */
12060 {
12061         vm_map_copy_t   copy;           /* Resulting copy */
12062
12063         /*
12064          *      We drop the object into a special copy object
12065          *      that contains the object directly.
12066          */
12067
12068         copy = vm_map_copy_allocate();
12069         copy->type = VM_MAP_COPY_OBJECT;
12070         copy->cpy_object = object;
12071         copy->offset = offset;
12072         copy->size = size;
12073
12074         *copy_result = copy;
12075         return KERN_SUCCESS;
12076 }
12077
12078 static void
12079 vm_map_fork_share(
12080         vm_map_t        old_map,
12081         vm_map_entry_t  old_entry,
12082         vm_map_t        new_map)
12083 {
12084         vm_object_t     object;
12085         vm_map_entry_t  new_entry;
12086
12087         /*
12088          *      New sharing code.  New map entry
12089          *      references original object.  Internal
12090          *      objects use asynchronous copy algorithm for
12091          *      future copies.  First make sure we have
12092          *      the right object.  If we need a shadow,
12093          *      or someone else already has one, then
12094          *      make a new shadow and share it.
12095          */
12096
12097         object = VME_OBJECT(old_entry);
12098         if (old_entry->is_sub_map) {
12099                 assert(old_entry->wired_count == 0);
12100 #ifndef NO_NESTED_PMAP
12101                 if (old_entry->use_pmap) {
12102                         kern_return_t   result;
12103
12104                         result = pmap_nest(new_map->pmap,
12105                             (VME_SUBMAP(old_entry))->pmap,
12106                             (addr64_t)old_entry->vme_start,
12107                             (addr64_t)old_entry->vme_start,
12108                             (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12109                         if (result) {
12110                                 panic("vm_map_fork_share: pmap_nest failed!");
12111                         }
12112                 }
12113 #endif  /* NO_NESTED_PMAP */
12114         } else if (object == VM_OBJECT_NULL) {
12115                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12116                     old_entry->vme_start));
12117                 VME_OFFSET_SET(old_entry, 0);
12118                 VME_OBJECT_SET(old_entry, object);
12119                 old_entry->use_pmap = TRUE;
12120 //              assert(!old_entry->needs_copy);
12121         } else if (object->copy_strategy !=
12122             MEMORY_OBJECT_COPY_SYMMETRIC) {
12123                 /*
12124                  *      We are already using an asymmetric
12125                  *      copy, and therefore we already have
12126                  *      the right object.
12127                  */
12128
12129                 assert(!old_entry->needs_copy);
12130         } else if (old_entry->needs_copy ||       /* case 1 */
12131             object->shadowed ||                 /* case 2 */
12132             (!object->true_share &&             /* case 3 */
12133             !old_entry->is_shared &&
12134             (object->vo_size >
12135             (vm_map_size_t)(old_entry->vme_end -
12136             old_entry->vme_start)))) {
12137                 /*
12138                  *      We need to create a shadow.
12139                  *      There are three cases here.
12140                  *      In the first case, we need to
12141                  *      complete a deferred symmetrical
12142                  *      copy that we participated in.
12143                  *      In the second and third cases,
12144                  *      we need to create the shadow so
12145                  *      that changes that we make to the
12146                  *      object do not interfere with
12147                  *      any symmetrical copies which
12148                  *      have occured (case 2) or which
12149                  *      might occur (case 3).
12150                  *
12151                  *      The first case is when we had
12152                  *      deferred shadow object creation
12153                  *      via the entry->needs_copy mechanism.
12154                  *      This mechanism only works when
12155                  *      only one entry points to the source
12156                  *      object, and we are about to create
12157                  *      a second entry pointing to the
12158                  *      same object. The problem is that
12159                  *      there is no way of mapping from
12160                  *      an object to the entries pointing
12161                  *      to it. (Deferred shadow creation
12162                  *      works with one entry because occurs
12163                  *      at fault time, and we walk from the
12164                  *      entry to the object when handling
12165                  *      the fault.)
12166                  *
12167                  *      The second case is when the object
12168                  *      to be shared has already been copied
12169                  *      with a symmetric copy, but we point
12170                  *      directly to the object without
12171                  *      needs_copy set in our entry. (This
12172                  *      can happen because different ranges
12173                  *      of an object can be pointed to by
12174                  *      different entries. In particular,
12175                  *      a single entry pointing to an object
12176                  *      can be split by a call to vm_inherit,
12177                  *      which, combined with task_create, can
12178                  *      result in the different entries
12179                  *      having different needs_copy values.)
12180                  *      The shadowed flag in the object allows
12181                  *      us to detect this case. The problem
12182                  *      with this case is that if this object
12183                  *      has or will have shadows, then we
12184                  *      must not perform an asymmetric copy
12185                  *      of this object, since such a copy
12186                  *      allows the object to be changed, which
12187                  *      will break the previous symmetrical
12188                  *      copies (which rely upon the object
12189                  *      not changing). In a sense, the shadowed
12190                  *      flag says "don't change this object".
12191                  *      We fix this by creating a shadow
12192                  *      object for this object, and sharing
12193                  *      that. This works because we are free
12194                  *      to change the shadow object (and thus
12195                  *      to use an asymmetric copy strategy);
12196                  *      this is also semantically correct,
12197                  *      since this object is temporary, and
12198                  *      therefore a copy of the object is
12199                  *      as good as the object itself. (This
12200                  *      is not true for permanent objects,
12201                  *      since the pager needs to see changes,
12202                  *      which won't happen if the changes
12203                  *      are made to a copy.)
12204                  *
12205                  *      The third case is when the object
12206                  *      to be shared has parts sticking
12207                  *      outside of the entry we're working
12208                  *      with, and thus may in the future
12209                  *      be subject to a symmetrical copy.
12210                  *      (This is a preemptive version of
12211                  *      case 2.)
12212                  */
12213                 VME_OBJECT_SHADOW(old_entry,
12214                     (vm_map_size_t) (old_entry->vme_end -
12215                     old_entry->vme_start));
12216
12217                 /*
12218                  *      If we're making a shadow for other than
12219                  *      copy on write reasons, then we have
12220                  *      to remove write permission.
12221                  */
12222
12223                 if (!old_entry->needs_copy &&
12224                     (old_entry->protection & VM_PROT_WRITE)) {
12225                         vm_prot_t prot;
12226
12227                         assert(!pmap_has_prot_policy(old_entry->protection));
12228
12229                         prot = old_entry->protection & ~VM_PROT_WRITE;
12230
12231                         assert(!pmap_has_prot_policy(prot));
12232
12233                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12234                                 prot |= VM_PROT_EXECUTE;
12235                         }
12236
12237
12238                         if (old_map->mapped_in_other_pmaps) {
12239                                 vm_object_pmap_protect(
12240                                         VME_OBJECT(old_entry),
12241                                         VME_OFFSET(old_entry),
12242                                         (old_entry->vme_end -
12243                                         old_entry->vme_start),
12244                                         PMAP_NULL,
12245                                         old_entry->vme_start,
12246                                         prot);
12247                         } else {
12248                                 pmap_protect(old_map->pmap,
12249                                     old_entry->vme_start,
12250                                     old_entry->vme_end,
12251                                     prot);
12252                         }
12253                 }
12254
12255                 old_entry->needs_copy = FALSE;
12256                 object = VME_OBJECT(old_entry);
12257         }
12258
12259
12260         /*
12261          *      If object was using a symmetric copy strategy,
12262          *      change its copy strategy to the default
12263          *      asymmetric copy strategy, which is copy_delay
12264          *      in the non-norma case and copy_call in the
12265          *      norma case. Bump the reference count for the
12266          *      new entry.
12267          */
12268
12269         if (old_entry->is_sub_map) {
12270                 vm_map_lock(VME_SUBMAP(old_entry));
12271                 vm_map_reference(VME_SUBMAP(old_entry));
12272                 vm_map_unlock(VME_SUBMAP(old_entry));
12273         } else {
12274                 vm_object_lock(object);
12275                 vm_object_reference_locked(object);
12276                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12277                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12278                 }
12279                 vm_object_unlock(object);
12280         }
12281
12282         /*
12283          *      Clone the entry, using object ref from above.
12284          *      Mark both entries as shared.
12285          */
12286
12287         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12288                                                           * map or descendants */
12289         vm_map_entry_copy(new_entry, old_entry);
12290         old_entry->is_shared = TRUE;
12291         new_entry->is_shared = TRUE;
12292
12293         /*
12294          * We're dealing with a shared mapping, so the resulting mapping
12295          * should inherit some of the original mapping's accounting settings.
12296          * "iokit_acct" should have been cleared in vm_map_entry_copy().
12297          * "use_pmap" should stay the same as before (if it hasn't been reset
12298          * to TRUE when we cleared "iokit_acct").
12299          */
12300         assert(!new_entry->iokit_acct);
12301
12302         /*
12303          *      If old entry's inheritence is VM_INHERIT_NONE,
12304          *      the new entry is for corpse fork, remove the
12305          *      write permission from the new entry.
12306          */
12307         if (old_entry->inheritance == VM_INHERIT_NONE) {
12308                 new_entry->protection &= ~VM_PROT_WRITE;
12309                 new_entry->max_protection &= ~VM_PROT_WRITE;
12310         }
12311
12312         /*
12313          *      Insert the entry into the new map -- we
12314          *      know we're inserting at the end of the new
12315          *      map.
12316          */
12317
12318         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12319             VM_MAP_KERNEL_FLAGS_NONE);
12320
12321         /*
12322          *      Update the physical map
12323          */
12324
12325         if (old_entry->is_sub_map) {
12326                 /* Bill Angell pmap support goes here */
12327         } else {
12328                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12329                     old_entry->vme_end - old_entry->vme_start,
12330                     old_entry->vme_start);
12331         }
12332 }
12333
12334 static boolean_t
12335 vm_map_fork_copy(
12336         vm_map_t        old_map,
12337         vm_map_entry_t  *old_entry_p,
12338         vm_map_t        new_map,
12339         int             vm_map_copyin_flags)
12340 {
12341         vm_map_entry_t old_entry = *old_entry_p;
12342         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12343         vm_map_offset_t start = old_entry->vme_start;
12344         vm_map_copy_t copy;
12345         vm_map_entry_t last = vm_map_last_entry(new_map);
12346
12347         vm_map_unlock(old_map);
12348         /*
12349          *      Use maxprot version of copyin because we
12350          *      care about whether this memory can ever
12351          *      be accessed, not just whether it's accessible
12352          *      right now.
12353          */
12354         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12355         if (vm_map_copyin_internal(old_map, start, entry_size,
12356             vm_map_copyin_flags, &copy)
12357             != KERN_SUCCESS) {
12358                 /*
12359                  *      The map might have changed while it
12360                  *      was unlocked, check it again.  Skip
12361                  *      any blank space or permanently
12362                  *      unreadable region.
12363                  */
12364                 vm_map_lock(old_map);
12365                 if (!vm_map_lookup_entry(old_map, start, &last) ||
12366                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12367                         last = last->vme_next;
12368                 }
12369                 *old_entry_p = last;
12370
12371                 /*
12372                  * XXX  For some error returns, want to
12373                  * XXX  skip to the next element.  Note
12374                  *      that INVALID_ADDRESS and
12375                  *      PROTECTION_FAILURE are handled above.
12376                  */
12377
12378                 return FALSE;
12379         }
12380
12381         /*
12382          *      Insert the copy into the new map
12383          */
12384
12385         vm_map_copy_insert(new_map, last, copy);
12386
12387         /*
12388          *      Pick up the traversal at the end of
12389          *      the copied region.
12390          */
12391
12392         vm_map_lock(old_map);
12393         start += entry_size;
12394         if (!vm_map_lookup_entry(old_map, start, &last)) {
12395                 last = last->vme_next;
12396         } else {
12397                 if (last->vme_start == start) {
12398                         /*
12399                          * No need to clip here and we don't
12400                          * want to cause any unnecessary
12401                          * unnesting...
12402                          */
12403                 } else {
12404                         vm_map_clip_start(old_map, last, start);
12405                 }
12406         }
12407         *old_entry_p = last;
12408
12409         return TRUE;
12410 }
12411
12412 /*
12413  *      vm_map_fork:
12414  *
12415  *      Create and return a new map based on the old
12416  *      map, according to the inheritance values on the
12417  *      regions in that map and the options.
12418  *
12419  *      The source map must not be locked.
12420  */
12421 vm_map_t
12422 vm_map_fork(
12423         ledger_t        ledger,
12424         vm_map_t        old_map,
12425         int             options)
12426 {
12427         pmap_t          new_pmap;
12428         vm_map_t        new_map;
12429         vm_map_entry_t  old_entry;
12430         vm_map_size_t   new_size = 0, entry_size;
12431         vm_map_entry_t  new_entry;
12432         boolean_t       src_needs_copy;
12433         boolean_t       new_entry_needs_copy;
12434         boolean_t       pmap_is64bit;
12435         int             vm_map_copyin_flags;
12436         vm_inherit_t    old_entry_inheritance;
12437         int             map_create_options;
12438         kern_return_t   footprint_collect_kr;
12439
12440         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12441             VM_MAP_FORK_PRESERVE_PURGEABLE |
12442             VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12443                 /* unsupported option */
12444                 return VM_MAP_NULL;
12445         }
12446
12447         pmap_is64bit =
12448 #if defined(__i386__) || defined(__x86_64__)
12449             old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12450 #elif defined(__arm64__)
12451             old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12452 #elif defined(__arm__)
12453             FALSE;
12454 #else
12455 #error Unknown architecture.
12456 #endif
12457
12458         unsigned int pmap_flags = 0;
12459         pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12460 #if defined(HAS_APPLE_PAC)
12461         pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12462 #endif
12463         new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12464
12465         vm_map_reference_swap(old_map);
12466         vm_map_lock(old_map);
12467
12468         map_create_options = 0;
12469         if (old_map->hdr.entries_pageable) {
12470                 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12471         }
12472         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12473                 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12474                 footprint_collect_kr = KERN_SUCCESS;
12475         }
12476         new_map = vm_map_create_options(new_pmap,
12477             old_map->min_offset,
12478             old_map->max_offset,
12479             map_create_options);
12480         vm_map_lock(new_map);
12481         vm_commit_pagezero_status(new_map);
12482         /* inherit the parent map's page size */
12483         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12484         for (
12485                 old_entry = vm_map_first_entry(old_map);
12486                 old_entry != vm_map_to_entry(old_map);
12487                 ) {
12488                 entry_size = old_entry->vme_end - old_entry->vme_start;
12489
12490                 old_entry_inheritance = old_entry->inheritance;
12491                 /*
12492                  * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12493                  * share VM_INHERIT_NONE entries that are not backed by a
12494                  * device pager.
12495                  */
12496                 if (old_entry_inheritance == VM_INHERIT_NONE &&
12497                     (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12498                     !(!old_entry->is_sub_map &&
12499                     VME_OBJECT(old_entry) != NULL &&
12500                     VME_OBJECT(old_entry)->pager != NULL &&
12501                     is_device_pager_ops(
12502                             VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12503                         old_entry_inheritance = VM_INHERIT_SHARE;
12504                 }
12505
12506                 if (old_entry_inheritance != VM_INHERIT_NONE &&
12507                     (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12508                     footprint_collect_kr == KERN_SUCCESS) {
12509                         /*
12510                          * The corpse won't have old_map->pmap to query
12511                          * footprint information, so collect that data now
12512                          * and store it in new_map->vmmap_corpse_footprint
12513                          * for later autopsy.
12514                          */
12515                         footprint_collect_kr =
12516                             vm_map_corpse_footprint_collect(old_map,
12517                             old_entry,
12518                             new_map);
12519                 }
12520
12521                 switch (old_entry_inheritance) {
12522                 case VM_INHERIT_NONE:
12523                         break;
12524
12525                 case VM_INHERIT_SHARE:
12526                         vm_map_fork_share(old_map, old_entry, new_map);
12527                         new_size += entry_size;
12528                         break;
12529
12530                 case VM_INHERIT_COPY:
12531
12532                         /*
12533                          *      Inline the copy_quickly case;
12534                          *      upon failure, fall back on call
12535                          *      to vm_map_fork_copy.
12536                          */
12537
12538                         if (old_entry->is_sub_map) {
12539                                 break;
12540                         }
12541                         if ((old_entry->wired_count != 0) ||
12542                             ((VME_OBJECT(old_entry) != NULL) &&
12543                             (VME_OBJECT(old_entry)->true_share))) {
12544                                 goto slow_vm_map_fork_copy;
12545                         }
12546
12547                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12548                         vm_map_entry_copy(new_entry, old_entry);
12549                         if (new_entry->is_sub_map) {
12550                                 /* clear address space specifics */
12551                                 new_entry->use_pmap = FALSE;
12552                         } else {
12553                                 /*
12554                                  * We're dealing with a copy-on-write operation,
12555                                  * so the resulting mapping should not inherit
12556                                  * the original mapping's accounting settings.
12557                                  * "iokit_acct" should have been cleared in
12558                                  * vm_map_entry_copy().
12559                                  * "use_pmap" should be reset to its default
12560                                  * (TRUE) so that the new mapping gets
12561                                  * accounted for in the task's memory footprint.
12562                                  */
12563                                 assert(!new_entry->iokit_acct);
12564                                 new_entry->use_pmap = TRUE;
12565                         }
12566
12567                         if (!vm_object_copy_quickly(
12568                                     VME_OBJECT_PTR(new_entry),
12569                                     VME_OFFSET(old_entry),
12570                                     (old_entry->vme_end -
12571                                     old_entry->vme_start),
12572                                     &src_needs_copy,
12573                                     &new_entry_needs_copy)) {
12574                                 vm_map_entry_dispose(new_map, new_entry);
12575                                 goto slow_vm_map_fork_copy;
12576                         }
12577
12578                         /*
12579                          *      Handle copy-on-write obligations
12580                          */
12581
12582                         if (src_needs_copy && !old_entry->needs_copy) {
12583                                 vm_prot_t prot;
12584
12585                                 assert(!pmap_has_prot_policy(old_entry->protection));
12586
12587                                 prot = old_entry->protection & ~VM_PROT_WRITE;
12588
12589                                 if (override_nx(old_map, VME_ALIAS(old_entry))
12590                                     && prot) {
12591                                         prot |= VM_PROT_EXECUTE;
12592                                 }
12593
12594                                 assert(!pmap_has_prot_policy(prot));
12595
12596                                 vm_object_pmap_protect(
12597                                         VME_OBJECT(old_entry),
12598                                         VME_OFFSET(old_entry),
12599                                         (old_entry->vme_end -
12600                                         old_entry->vme_start),
12601                                         ((old_entry->is_shared
12602                                         || old_map->mapped_in_other_pmaps)
12603                                         ? PMAP_NULL :
12604                                         old_map->pmap),
12605                                         old_entry->vme_start,
12606                                         prot);
12607
12608                                 assert(old_entry->wired_count == 0);
12609                                 old_entry->needs_copy = TRUE;
12610                         }
12611                         new_entry->needs_copy = new_entry_needs_copy;
12612
12613                         /*
12614                          *      Insert the entry at the end
12615                          *      of the map.
12616                          */
12617
12618                         vm_map_store_entry_link(new_map,
12619                             vm_map_last_entry(new_map),
12620                             new_entry,
12621                             VM_MAP_KERNEL_FLAGS_NONE);
12622                         new_size += entry_size;
12623                         break;
12624
12625 slow_vm_map_fork_copy:
12626                         vm_map_copyin_flags = 0;
12627                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12628                                 vm_map_copyin_flags |=
12629                                     VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12630                         }
12631                         if (vm_map_fork_copy(old_map,
12632                             &old_entry,
12633                             new_map,
12634                             vm_map_copyin_flags)) {
12635                                 new_size += entry_size;
12636                         }
12637                         continue;
12638                 }
12639                 old_entry = old_entry->vme_next;
12640         }
12641
12642 #if defined(__arm64__)
12643         pmap_insert_sharedpage(new_map->pmap);
12644 #endif
12645
12646         new_map->size = new_size;
12647
12648         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12649                 vm_map_corpse_footprint_collect_done(new_map);
12650         }
12651
12652         vm_map_unlock(new_map);
12653         vm_map_unlock(old_map);
12654         vm_map_deallocate(old_map);
12655
12656         return new_map;
12657 }
12658
12659 /*
12660  * vm_map_exec:
12661  *
12662  *      Setup the "new_map" with the proper execution environment according
12663  *      to the type of executable (platform, 64bit, chroot environment).
12664  *      Map the comm page and shared region, etc...
12665  */
12666 kern_return_t
12667 vm_map_exec(
12668         vm_map_t        new_map,
12669         task_t          task,
12670         boolean_t       is64bit,
12671         void            *fsroot,
12672         cpu_type_t      cpu,
12673         cpu_subtype_t   cpu_subtype)
12674 {
12675         SHARED_REGION_TRACE_DEBUG(
12676                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12677                 (void *)VM_KERNEL_ADDRPERM(current_task()),
12678                 (void *)VM_KERNEL_ADDRPERM(new_map),
12679                 (void *)VM_KERNEL_ADDRPERM(task),
12680                 (void *)VM_KERNEL_ADDRPERM(fsroot),
12681                 cpu,
12682                 cpu_subtype));
12683         (void) vm_commpage_enter(new_map, task, is64bit);
12684         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12685         SHARED_REGION_TRACE_DEBUG(
12686                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12687                 (void *)VM_KERNEL_ADDRPERM(current_task()),
12688                 (void *)VM_KERNEL_ADDRPERM(new_map),
12689                 (void *)VM_KERNEL_ADDRPERM(task),
12690                 (void *)VM_KERNEL_ADDRPERM(fsroot),
12691                 cpu,
12692                 cpu_subtype));
12693         return KERN_SUCCESS;
12694 }
12695
12696 /*
12697  *      vm_map_lookup_locked:
12698  *
12699  *      Finds the VM object, offset, and
12700  *      protection for a given virtual address in the
12701  *      specified map, assuming a page fault of the
12702  *      type specified.
12703  *
12704  *      Returns the (object, offset, protection) for
12705  *      this address, whether it is wired down, and whether
12706  *      this map has the only reference to the data in question.
12707  *      In order to later verify this lookup, a "version"
12708  *      is returned.
12709  *
12710  *      The map MUST be locked by the caller and WILL be
12711  *      locked on exit.  In order to guarantee the
12712  *      existence of the returned object, it is returned
12713  *      locked.
12714  *
12715  *      If a lookup is requested with "write protection"
12716  *      specified, the map may be changed to perform virtual
12717  *      copying operations, although the data referenced will
12718  *      remain the same.
12719  */
12720 kern_return_t
12721 vm_map_lookup_locked(
12722         vm_map_t                *var_map,       /* IN/OUT */
12723         vm_map_offset_t         vaddr,
12724         vm_prot_t               fault_type,
12725         int                     object_lock_type,
12726         vm_map_version_t        *out_version,   /* OUT */
12727         vm_object_t             *object,        /* OUT */
12728         vm_object_offset_t      *offset,        /* OUT */
12729         vm_prot_t               *out_prot,      /* OUT */
12730         boolean_t               *wired,         /* OUT */
12731         vm_object_fault_info_t  fault_info,     /* OUT */
12732         vm_map_t                *real_map)
12733 {
12734         vm_map_entry_t                  entry;
12735         vm_map_t                        map = *var_map;
12736         vm_map_t                        old_map = *var_map;
12737         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
12738         vm_map_offset_t                 cow_parent_vaddr = 0;
12739         vm_map_offset_t                 old_start = 0;
12740         vm_map_offset_t                 old_end = 0;
12741         vm_prot_t                       prot;
12742         boolean_t                       mask_protections;
12743         boolean_t                       force_copy;
12744         vm_prot_t                       original_fault_type;
12745
12746         /*
12747          * VM_PROT_MASK means that the caller wants us to use "fault_type"
12748          * as a mask against the mapping's actual protections, not as an
12749          * absolute value.
12750          */
12751         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12752         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12753         fault_type &= VM_PROT_ALL;
12754         original_fault_type = fault_type;
12755
12756         *real_map = map;
12757
12758 RetryLookup:
12759         fault_type = original_fault_type;
12760
12761         /*
12762          *      If the map has an interesting hint, try it before calling
12763          *      full blown lookup routine.
12764          */
12765         entry = map->hint;
12766
12767         if ((entry == vm_map_to_entry(map)) ||
12768             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12769                 vm_map_entry_t  tmp_entry;
12770
12771                 /*
12772                  *      Entry was either not a valid hint, or the vaddr
12773                  *      was not contained in the entry, so do a full lookup.
12774                  */
12775                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12776                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12777                                 vm_map_unlock(cow_sub_map_parent);
12778                         }
12779                         if ((*real_map != map)
12780                             && (*real_map != cow_sub_map_parent)) {
12781                                 vm_map_unlock(*real_map);
12782                         }
12783                         return KERN_INVALID_ADDRESS;
12784                 }
12785
12786                 entry = tmp_entry;
12787         }
12788         if (map == old_map) {
12789                 old_start = entry->vme_start;
12790                 old_end = entry->vme_end;
12791         }
12792
12793         /*
12794          *      Handle submaps.  Drop lock on upper map, submap is
12795          *      returned locked.
12796          */
12797
12798 submap_recurse:
12799         if (entry->is_sub_map) {
12800                 vm_map_offset_t         local_vaddr;
12801                 vm_map_offset_t         end_delta;
12802                 vm_map_offset_t         start_delta;
12803                 vm_map_entry_t          submap_entry;
12804                 vm_prot_t               subentry_protection;
12805                 vm_prot_t               subentry_max_protection;
12806                 boolean_t               subentry_no_copy_on_read;
12807                 boolean_t               mapped_needs_copy = FALSE;
12808
12809                 local_vaddr = vaddr;
12810
12811                 if ((entry->use_pmap &&
12812                     !((fault_type & VM_PROT_WRITE) ||
12813                     force_copy))) {
12814                         /* if real_map equals map we unlock below */
12815                         if ((*real_map != map) &&
12816                             (*real_map != cow_sub_map_parent)) {
12817                                 vm_map_unlock(*real_map);
12818                         }
12819                         *real_map = VME_SUBMAP(entry);
12820                 }
12821
12822                 if (entry->needs_copy &&
12823                     ((fault_type & VM_PROT_WRITE) ||
12824                     force_copy)) {
12825                         if (!mapped_needs_copy) {
12826                                 if (vm_map_lock_read_to_write(map)) {
12827                                         vm_map_lock_read(map);
12828                                         *real_map = map;
12829                                         goto RetryLookup;
12830                                 }
12831                                 vm_map_lock_read(VME_SUBMAP(entry));
12832                                 *var_map = VME_SUBMAP(entry);
12833                                 cow_sub_map_parent = map;
12834                                 /* reset base to map before cow object */
12835                                 /* this is the map which will accept   */
12836                                 /* the new cow object */
12837                                 old_start = entry->vme_start;
12838                                 old_end = entry->vme_end;
12839                                 cow_parent_vaddr = vaddr;
12840                                 mapped_needs_copy = TRUE;
12841                         } else {
12842                                 vm_map_lock_read(VME_SUBMAP(entry));
12843                                 *var_map = VME_SUBMAP(entry);
12844                                 if ((cow_sub_map_parent != map) &&
12845                                     (*real_map != map)) {
12846                                         vm_map_unlock(map);
12847                                 }
12848                         }
12849                 } else {
12850                         vm_map_lock_read(VME_SUBMAP(entry));
12851                         *var_map = VME_SUBMAP(entry);
12852                         /* leave map locked if it is a target */
12853                         /* cow sub_map above otherwise, just  */
12854                         /* follow the maps down to the object */
12855                         /* here we unlock knowing we are not  */
12856                         /* revisiting the map.  */
12857                         if ((*real_map != map) && (map != cow_sub_map_parent)) {
12858                                 vm_map_unlock_read(map);
12859                         }
12860                 }
12861
12862                 map = *var_map;
12863
12864                 /* calculate the offset in the submap for vaddr */
12865                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12866
12867 RetrySubMap:
12868                 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12869                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12870                                 vm_map_unlock(cow_sub_map_parent);
12871                         }
12872                         if ((*real_map != map)
12873                             && (*real_map != cow_sub_map_parent)) {
12874                                 vm_map_unlock(*real_map);
12875                         }
12876                         *real_map = map;
12877                         return KERN_INVALID_ADDRESS;
12878                 }
12879
12880                 /* find the attenuated shadow of the underlying object */
12881                 /* on our target map */
12882
12883                 /* in english the submap object may extend beyond the     */
12884                 /* region mapped by the entry or, may only fill a portion */
12885                 /* of it.  For our purposes, we only care if the object   */
12886                 /* doesn't fill.  In this case the area which will        */
12887                 /* ultimately be clipped in the top map will only need    */
12888                 /* to be as big as the portion of the underlying entry    */
12889                 /* which is mapped */
12890                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12891                     submap_entry->vme_start - VME_OFFSET(entry) : 0;
12892
12893                 end_delta =
12894                     (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12895                     submap_entry->vme_end ?
12896                     0 : (VME_OFFSET(entry) +
12897                     (old_end - old_start))
12898                     - submap_entry->vme_end;
12899
12900                 old_start += start_delta;
12901                 old_end -= end_delta;
12902
12903                 if (submap_entry->is_sub_map) {
12904                         entry = submap_entry;
12905                         vaddr = local_vaddr;
12906                         goto submap_recurse;
12907                 }
12908
12909                 if (((fault_type & VM_PROT_WRITE) ||
12910                     force_copy)
12911                     && cow_sub_map_parent) {
12912                         vm_object_t     sub_object, copy_object;
12913                         vm_object_offset_t copy_offset;
12914                         vm_map_offset_t local_start;
12915                         vm_map_offset_t local_end;
12916                         boolean_t               copied_slowly = FALSE;
12917
12918                         if (vm_map_lock_read_to_write(map)) {
12919                                 vm_map_lock_read(map);
12920                                 old_start -= start_delta;
12921                                 old_end += end_delta;
12922                                 goto RetrySubMap;
12923                         }
12924
12925
12926                         sub_object = VME_OBJECT(submap_entry);
12927                         if (sub_object == VM_OBJECT_NULL) {
12928                                 sub_object =
12929                                     vm_object_allocate(
12930                                         (vm_map_size_t)
12931                                         (submap_entry->vme_end -
12932                                         submap_entry->vme_start));
12933                                 VME_OBJECT_SET(submap_entry, sub_object);
12934                                 VME_OFFSET_SET(submap_entry, 0);
12935                                 assert(!submap_entry->is_sub_map);
12936                                 assert(submap_entry->use_pmap);
12937                         }
12938                         local_start =  local_vaddr -
12939                             (cow_parent_vaddr - old_start);
12940                         local_end = local_vaddr +
12941                             (old_end - cow_parent_vaddr);
12942                         vm_map_clip_start(map, submap_entry, local_start);
12943                         vm_map_clip_end(map, submap_entry, local_end);
12944                         if (submap_entry->is_sub_map) {
12945                                 /* unnesting was done when clipping */
12946                                 assert(!submap_entry->use_pmap);
12947                         }
12948
12949                         /* This is the COW case, lets connect */
12950                         /* an entry in our space to the underlying */
12951                         /* object in the submap, bypassing the  */
12952                         /* submap. */
12953
12954
12955                         if (submap_entry->wired_count != 0 ||
12956                             (sub_object->copy_strategy ==
12957                             MEMORY_OBJECT_COPY_NONE)) {
12958                                 vm_object_lock(sub_object);
12959                                 vm_object_copy_slowly(sub_object,
12960                                     VME_OFFSET(submap_entry),
12961                                     (submap_entry->vme_end -
12962                                     submap_entry->vme_start),
12963                                     FALSE,
12964                                     &copy_object);
12965                                 copied_slowly = TRUE;
12966                         } else {
12967                                 /* set up shadow object */
12968                                 copy_object = sub_object;
12969                                 vm_object_lock(sub_object);
12970                                 vm_object_reference_locked(sub_object);
12971                                 sub_object->shadowed = TRUE;
12972                                 vm_object_unlock(sub_object);
12973
12974                                 assert(submap_entry->wired_count == 0);
12975                                 submap_entry->needs_copy = TRUE;
12976
12977                                 prot = submap_entry->protection;
12978                                 assert(!pmap_has_prot_policy(prot));
12979                                 prot = prot & ~VM_PROT_WRITE;
12980                                 assert(!pmap_has_prot_policy(prot));
12981
12982                                 if (override_nx(old_map,
12983                                     VME_ALIAS(submap_entry))
12984                                     && prot) {
12985                                         prot |= VM_PROT_EXECUTE;
12986                                 }
12987
12988                                 vm_object_pmap_protect(
12989                                         sub_object,
12990                                         VME_OFFSET(submap_entry),
12991                                         submap_entry->vme_end -
12992                                         submap_entry->vme_start,
12993                                         (submap_entry->is_shared
12994                                         || map->mapped_in_other_pmaps) ?
12995                                         PMAP_NULL : map->pmap,
12996                                         submap_entry->vme_start,
12997                                         prot);
12998                         }
12999
13000                         /*
13001                          * Adjust the fault offset to the submap entry.
13002                          */
13003                         copy_offset = (local_vaddr -
13004                             submap_entry->vme_start +
13005                             VME_OFFSET(submap_entry));
13006
13007                         /* This works diffently than the   */
13008                         /* normal submap case. We go back  */
13009                         /* to the parent of the cow map and*/
13010                         /* clip out the target portion of  */
13011                         /* the sub_map, substituting the   */
13012                         /* new copy object,                */
13013
13014                         subentry_protection = submap_entry->protection;
13015                         subentry_max_protection = submap_entry->max_protection;
13016                         subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13017                         vm_map_unlock(map);
13018                         submap_entry = NULL; /* not valid after map unlock */
13019
13020                         local_start = old_start;
13021                         local_end = old_end;
13022                         map = cow_sub_map_parent;
13023                         *var_map = cow_sub_map_parent;
13024                         vaddr = cow_parent_vaddr;
13025                         cow_sub_map_parent = NULL;
13026
13027                         if (!vm_map_lookup_entry(map,
13028                             vaddr, &entry)) {
13029                                 vm_object_deallocate(
13030                                         copy_object);
13031                                 vm_map_lock_write_to_read(map);
13032                                 return KERN_INVALID_ADDRESS;
13033                         }
13034
13035                         /* clip out the portion of space */
13036                         /* mapped by the sub map which   */
13037                         /* corresponds to the underlying */
13038                         /* object */
13039
13040                         /*
13041                          * Clip (and unnest) the smallest nested chunk
13042                          * possible around the faulting address...
13043                          */
13044                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
13045                         local_end = local_start + pmap_nesting_size_min;
13046                         /*
13047                          * ... but don't go beyond the "old_start" to "old_end"
13048                          * range, to avoid spanning over another VM region
13049                          * with a possibly different VM object and/or offset.
13050                          */
13051                         if (local_start < old_start) {
13052                                 local_start = old_start;
13053                         }
13054                         if (local_end > old_end) {
13055                                 local_end = old_end;
13056                         }
13057                         /*
13058                          * Adjust copy_offset to the start of the range.
13059                          */
13060                         copy_offset -= (vaddr - local_start);
13061
13062                         vm_map_clip_start(map, entry, local_start);
13063                         vm_map_clip_end(map, entry, local_end);
13064                         if (entry->is_sub_map) {
13065                                 /* unnesting was done when clipping */
13066                                 assert(!entry->use_pmap);
13067                         }
13068
13069                         /* substitute copy object for */
13070                         /* shared map entry           */
13071                         vm_map_deallocate(VME_SUBMAP(entry));
13072                         assert(!entry->iokit_acct);
13073                         entry->is_sub_map = FALSE;
13074                         entry->use_pmap = TRUE;
13075                         VME_OBJECT_SET(entry, copy_object);
13076
13077                         /* propagate the submap entry's protections */
13078                         if (entry->protection != VM_PROT_READ) {
13079                                 /*
13080                                  * Someone has already altered the top entry's
13081                                  * protections via vm_protect(VM_PROT_COPY).
13082                                  * Respect these new values and ignore the
13083                                  * submap entry's protections.
13084                                  */
13085                         } else {
13086                                 /*
13087                                  * Regular copy-on-write: propagate the submap
13088                                  * entry's protections to the top map entry.
13089                                  */
13090                                 entry->protection |= subentry_protection;
13091                         }
13092                         entry->max_protection |= subentry_max_protection;
13093                         /* propagate no_copy_on_read */
13094                         entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13095
13096                         if ((entry->protection & VM_PROT_WRITE) &&
13097                             (entry->protection & VM_PROT_EXECUTE) &&
13098 #if !CONFIG_EMBEDDED
13099                             map != kernel_map &&
13100                             cs_process_enforcement(NULL) &&
13101 #endif /* !CONFIG_EMBEDDED */
13102                             !(entry->used_for_jit)) {
13103                                 DTRACE_VM3(cs_wx,
13104                                     uint64_t, (uint64_t)entry->vme_start,
13105                                     uint64_t, (uint64_t)entry->vme_end,
13106                                     vm_prot_t, entry->protection);
13107                                 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13108                                     proc_selfpid(),
13109                                     (current_task()->bsd_info
13110                                     ? proc_name_address(current_task()->bsd_info)
13111                                     : "?"),
13112                                     __FUNCTION__);
13113                                 entry->protection &= ~VM_PROT_EXECUTE;
13114                         }
13115
13116                         if (copied_slowly) {
13117                                 VME_OFFSET_SET(entry, local_start - old_start);
13118                                 entry->needs_copy = FALSE;
13119                                 entry->is_shared = FALSE;
13120                         } else {
13121                                 VME_OFFSET_SET(entry, copy_offset);
13122                                 assert(entry->wired_count == 0);
13123                                 entry->needs_copy = TRUE;
13124                                 if (entry->inheritance == VM_INHERIT_SHARE) {
13125                                         entry->inheritance = VM_INHERIT_COPY;
13126                                 }
13127                                 if (map != old_map) {
13128                                         entry->is_shared = TRUE;
13129                                 }
13130                         }
13131                         if (entry->inheritance == VM_INHERIT_SHARE) {
13132                                 entry->inheritance = VM_INHERIT_COPY;
13133                         }
13134
13135                         vm_map_lock_write_to_read(map);
13136                 } else {
13137                         if ((cow_sub_map_parent)
13138                             && (cow_sub_map_parent != *real_map)
13139                             && (cow_sub_map_parent != map)) {
13140                                 vm_map_unlock(cow_sub_map_parent);
13141                         }
13142                         entry = submap_entry;
13143                         vaddr = local_vaddr;
13144                 }
13145         }
13146
13147         /*
13148          *      Check whether this task is allowed to have
13149          *      this page.
13150          */
13151
13152         prot = entry->protection;
13153
13154         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13155                 /*
13156                  * HACK -- if not a stack, then allow execution
13157                  */
13158                 prot |= VM_PROT_EXECUTE;
13159         }
13160
13161         if (mask_protections) {
13162                 fault_type &= prot;
13163                 if (fault_type == VM_PROT_NONE) {
13164                         goto protection_failure;
13165                 }
13166         }
13167         if (((fault_type & prot) != fault_type)
13168 #if __arm64__
13169             /* prefetch abort in execute-only page */
13170             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13171 #endif
13172             ) {
13173 protection_failure:
13174                 if (*real_map != map) {
13175                         vm_map_unlock(*real_map);
13176                 }
13177                 *real_map = map;
13178
13179                 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13180                         log_stack_execution_failure((addr64_t)vaddr, prot);
13181                 }
13182
13183                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13184                 return KERN_PROTECTION_FAILURE;
13185         }
13186
13187         /*
13188          *      If this page is not pageable, we have to get
13189          *      it for all possible accesses.
13190          */
13191
13192         *wired = (entry->wired_count != 0);
13193         if (*wired) {
13194                 fault_type = prot;
13195         }
13196
13197         /*
13198          *      If the entry was copy-on-write, we either ...
13199          */
13200
13201         if (entry->needs_copy) {
13202                 /*
13203                  *      If we want to write the page, we may as well
13204                  *      handle that now since we've got the map locked.
13205                  *
13206                  *      If we don't need to write the page, we just
13207                  *      demote the permissions allowed.
13208                  */
13209
13210                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13211                         /*
13212                          *      Make a new object, and place it in the
13213                          *      object chain.  Note that no new references
13214                          *      have appeared -- one just moved from the
13215                          *      map to the new object.
13216                          */
13217
13218                         if (vm_map_lock_read_to_write(map)) {
13219                                 vm_map_lock_read(map);
13220                                 goto RetryLookup;
13221                         }
13222
13223                         if (VME_OBJECT(entry)->shadowed == FALSE) {
13224                                 vm_object_lock(VME_OBJECT(entry));
13225                                 VME_OBJECT(entry)->shadowed = TRUE;
13226                                 vm_object_unlock(VME_OBJECT(entry));
13227                         }
13228                         VME_OBJECT_SHADOW(entry,
13229                             (vm_map_size_t) (entry->vme_end -
13230                             entry->vme_start));
13231                         entry->needs_copy = FALSE;
13232
13233                         vm_map_lock_write_to_read(map);
13234                 }
13235                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13236                         /*
13237                          *      We're attempting to read a copy-on-write
13238                          *      page -- don't allow writes.
13239                          */
13240
13241                         prot &= (~VM_PROT_WRITE);
13242                 }
13243         }
13244
13245         /*
13246          *      Create an object if necessary.
13247          */
13248         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13249                 if (vm_map_lock_read_to_write(map)) {
13250                         vm_map_lock_read(map);
13251                         goto RetryLookup;
13252                 }
13253
13254                 VME_OBJECT_SET(entry,
13255                     vm_object_allocate(
13256                             (vm_map_size_t)(entry->vme_end -
13257                             entry->vme_start)));
13258                 VME_OFFSET_SET(entry, 0);
13259                 assert(entry->use_pmap);
13260                 vm_map_lock_write_to_read(map);
13261         }
13262
13263         /*
13264          *      Return the object/offset from this entry.  If the entry
13265          *      was copy-on-write or empty, it has been fixed up.  Also
13266          *      return the protection.
13267          */
13268
13269         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13270         *object = VME_OBJECT(entry);
13271         *out_prot = prot;
13272         KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
13273
13274         if (fault_info) {
13275                 fault_info->interruptible = THREAD_UNINT; /* for now... */
13276                 /* ... the caller will change "interruptible" if needed */
13277                 fault_info->cluster_size = 0;
13278                 fault_info->user_tag = VME_ALIAS(entry);
13279                 fault_info->pmap_options = 0;
13280                 if (entry->iokit_acct ||
13281                     (!entry->is_sub_map && !entry->use_pmap)) {
13282                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13283                 }
13284                 fault_info->behavior = entry->behavior;
13285                 fault_info->lo_offset = VME_OFFSET(entry);
13286                 fault_info->hi_offset =
13287                     (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13288                 fault_info->no_cache  = entry->no_cache;
13289                 fault_info->stealth = FALSE;
13290                 fault_info->io_sync = FALSE;
13291                 if (entry->used_for_jit ||
13292                     entry->vme_resilient_codesign) {
13293                         fault_info->cs_bypass = TRUE;
13294                 } else {
13295                         fault_info->cs_bypass = FALSE;
13296                 }
13297                 fault_info->pmap_cs_associated = FALSE;
13298 #if CONFIG_PMAP_CS
13299                 if (entry->pmap_cs_associated) {
13300                         /*
13301                          * The pmap layer will validate this page
13302                          * before allowing it to be executed from.
13303                          */
13304                         fault_info->pmap_cs_associated = TRUE;
13305                 }
13306 #endif /* CONFIG_PMAP_CS */
13307                 fault_info->mark_zf_absent = FALSE;
13308                 fault_info->batch_pmap_op = FALSE;
13309                 fault_info->resilient_media = entry->vme_resilient_media;
13310                 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13311         }
13312
13313         /*
13314          *      Lock the object to prevent it from disappearing
13315          */
13316         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13317                 vm_object_lock(*object);
13318         } else {
13319                 vm_object_lock_shared(*object);
13320         }
13321
13322         /*
13323          *      Save the version number
13324          */
13325
13326         out_version->main_timestamp = map->timestamp;
13327
13328         return KERN_SUCCESS;
13329 }
13330
13331
13332 /*
13333  *      vm_map_verify:
13334  *
13335  *      Verifies that the map in question has not changed
13336  *      since the given version. The map has to be locked
13337  *      ("shared" mode is fine) before calling this function
13338  *      and it will be returned locked too.
13339  */
13340 boolean_t
13341 vm_map_verify(
13342         vm_map_t                map,
13343         vm_map_version_t        *version)       /* REF */
13344 {
13345         boolean_t       result;
13346
13347         vm_map_lock_assert_held(map);
13348         result = (map->timestamp == version->main_timestamp);
13349
13350         return result;
13351 }
13352
13353 /*
13354  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13355  *      Goes away after regular vm_region_recurse function migrates to
13356  *      64 bits
13357  *      vm_region_recurse: A form of vm_region which follows the
13358  *      submaps in a target map
13359  *
13360  */
13361
13362 kern_return_t
13363 vm_map_region_recurse_64(
13364         vm_map_t                 map,
13365         vm_map_offset_t *address,               /* IN/OUT */
13366         vm_map_size_t           *size,                  /* OUT */
13367         natural_t               *nesting_depth, /* IN/OUT */
13368         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
13369         mach_msg_type_number_t  *count) /* IN/OUT */
13370 {
13371         mach_msg_type_number_t  original_count;
13372         vm_region_extended_info_data_t  extended;
13373         vm_map_entry_t                  tmp_entry;
13374         vm_map_offset_t                 user_address;
13375         unsigned int                    user_max_depth;
13376
13377         /*
13378          * "curr_entry" is the VM map entry preceding or including the
13379          * address we're looking for.
13380          * "curr_map" is the map or sub-map containing "curr_entry".
13381          * "curr_address" is the equivalent of the top map's "user_address"
13382          * in the current map.
13383          * "curr_offset" is the cumulated offset of "curr_map" in the
13384          * target task's address space.
13385          * "curr_depth" is the depth of "curr_map" in the chain of
13386          * sub-maps.
13387          *
13388          * "curr_max_below" and "curr_max_above" limit the range (around
13389          * "curr_address") we should take into account in the current (sub)map.
13390          * They limit the range to what's visible through the map entries
13391          * we've traversed from the top map to the current map.
13392          *
13393          */
13394         vm_map_entry_t                  curr_entry;
13395         vm_map_address_t                curr_address;
13396         vm_map_offset_t                 curr_offset;
13397         vm_map_t                        curr_map;
13398         unsigned int                    curr_depth;
13399         vm_map_offset_t                 curr_max_below, curr_max_above;
13400         vm_map_offset_t                 curr_skip;
13401
13402         /*
13403          * "next_" is the same as "curr_" but for the VM region immediately
13404          * after the address we're looking for.  We need to keep track of this
13405          * too because we want to return info about that region if the
13406          * address we're looking for is not mapped.
13407          */
13408         vm_map_entry_t                  next_entry;
13409         vm_map_offset_t                 next_offset;
13410         vm_map_offset_t                 next_address;
13411         vm_map_t                        next_map;
13412         unsigned int                    next_depth;
13413         vm_map_offset_t                 next_max_below, next_max_above;
13414         vm_map_offset_t                 next_skip;
13415
13416         boolean_t                       look_for_pages;
13417         vm_region_submap_short_info_64_t short_info;
13418         boolean_t                       do_region_footprint;
13419
13420         if (map == VM_MAP_NULL) {
13421                 /* no address space to work on */
13422                 return KERN_INVALID_ARGUMENT;
13423         }
13424
13425
13426         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13427                 /*
13428                  * "info" structure is not big enough and
13429                  * would overflow
13430                  */
13431                 return KERN_INVALID_ARGUMENT;
13432         }
13433
13434         do_region_footprint = task_self_region_footprint();
13435         original_count = *count;
13436
13437         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13438                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13439                 look_for_pages = FALSE;
13440                 short_info = (vm_region_submap_short_info_64_t) submap_info;
13441                 submap_info = NULL;
13442         } else {
13443                 look_for_pages = TRUE;
13444                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13445                 short_info = NULL;
13446
13447                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13448                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13449                 }
13450                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13451                         *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13452                 }
13453         }
13454
13455         user_address = *address;
13456         user_max_depth = *nesting_depth;
13457
13458         if (not_in_kdp) {
13459                 vm_map_lock_read(map);
13460         }
13461
13462 recurse_again:
13463         curr_entry = NULL;
13464         curr_map = map;
13465         curr_address = user_address;
13466         curr_offset = 0;
13467         curr_skip = 0;
13468         curr_depth = 0;
13469         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13470         curr_max_below = curr_address;
13471
13472         next_entry = NULL;
13473         next_map = NULL;
13474         next_address = 0;
13475         next_offset = 0;
13476         next_skip = 0;
13477         next_depth = 0;
13478         next_max_above = (vm_map_offset_t) -1;
13479         next_max_below = (vm_map_offset_t) -1;
13480
13481         for (;;) {
13482                 if (vm_map_lookup_entry(curr_map,
13483                     curr_address,
13484                     &tmp_entry)) {
13485                         /* tmp_entry contains the address we're looking for */
13486                         curr_entry = tmp_entry;
13487                 } else {
13488                         vm_map_offset_t skip;
13489                         /*
13490                          * The address is not mapped.  "tmp_entry" is the
13491                          * map entry preceding the address.  We want the next
13492                          * one, if it exists.
13493                          */
13494                         curr_entry = tmp_entry->vme_next;
13495
13496                         if (curr_entry == vm_map_to_entry(curr_map) ||
13497                             (curr_entry->vme_start >=
13498                             curr_address + curr_max_above)) {
13499                                 /* no next entry at this level: stop looking */
13500                                 if (not_in_kdp) {
13501                                         vm_map_unlock_read(curr_map);
13502                                 }
13503                                 curr_entry = NULL;
13504                                 curr_map = NULL;
13505                                 curr_skip = 0;
13506                                 curr_offset = 0;
13507                                 curr_depth = 0;
13508                                 curr_max_above = 0;
13509                                 curr_max_below = 0;
13510                                 break;
13511                         }
13512
13513                         /* adjust current address and offset */
13514                         skip = curr_entry->vme_start - curr_address;
13515                         curr_address = curr_entry->vme_start;
13516                         curr_skip += skip;
13517                         curr_offset += skip;
13518                         curr_max_above -= skip;
13519                         curr_max_below = 0;
13520                 }
13521
13522                 /*
13523                  * Is the next entry at this level closer to the address (or
13524                  * deeper in the submap chain) than the one we had
13525                  * so far ?
13526                  */
13527                 tmp_entry = curr_entry->vme_next;
13528                 if (tmp_entry == vm_map_to_entry(curr_map)) {
13529                         /* no next entry at this level */
13530                 } else if (tmp_entry->vme_start >=
13531                     curr_address + curr_max_above) {
13532                         /*
13533                          * tmp_entry is beyond the scope of what we mapped of
13534                          * this submap in the upper level: ignore it.
13535                          */
13536                 } else if ((next_entry == NULL) ||
13537                     (tmp_entry->vme_start + curr_offset <=
13538                     next_entry->vme_start + next_offset)) {
13539                         /*
13540                          * We didn't have a "next_entry" or this one is
13541                          * closer to the address we're looking for:
13542                          * use this "tmp_entry" as the new "next_entry".
13543                          */
13544                         if (next_entry != NULL) {
13545                                 /* unlock the last "next_map" */
13546                                 if (next_map != curr_map && not_in_kdp) {
13547                                         vm_map_unlock_read(next_map);
13548                                 }
13549                         }
13550                         next_entry = tmp_entry;
13551                         next_map = curr_map;
13552                         next_depth = curr_depth;
13553                         next_address = next_entry->vme_start;
13554                         next_skip = curr_skip;
13555                         next_skip += (next_address - curr_address);
13556                         next_offset = curr_offset;
13557                         next_offset += (next_address - curr_address);
13558                         next_max_above = MIN(next_max_above, curr_max_above);
13559                         next_max_above = MIN(next_max_above,
13560                             next_entry->vme_end - next_address);
13561                         next_max_below = MIN(next_max_below, curr_max_below);
13562                         next_max_below = MIN(next_max_below,
13563                             next_address - next_entry->vme_start);
13564                 }
13565
13566                 /*
13567                  * "curr_max_{above,below}" allow us to keep track of the
13568                  * portion of the submap that is actually mapped at this level:
13569                  * the rest of that submap is irrelevant to us, since it's not
13570                  * mapped here.
13571                  * The relevant portion of the map starts at
13572                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13573                  */
13574                 curr_max_above = MIN(curr_max_above,
13575                     curr_entry->vme_end - curr_address);
13576                 curr_max_below = MIN(curr_max_below,
13577                     curr_address - curr_entry->vme_start);
13578
13579                 if (!curr_entry->is_sub_map ||
13580                     curr_depth >= user_max_depth) {
13581                         /*
13582                          * We hit a leaf map or we reached the maximum depth
13583                          * we could, so stop looking.  Keep the current map
13584                          * locked.
13585                          */
13586                         break;
13587                 }
13588
13589                 /*
13590                  * Get down to the next submap level.
13591                  */
13592
13593                 /*
13594                  * Lock the next level and unlock the current level,
13595                  * unless we need to keep it locked to access the "next_entry"
13596                  * later.
13597                  */
13598                 if (not_in_kdp) {
13599                         vm_map_lock_read(VME_SUBMAP(curr_entry));
13600                 }
13601                 if (curr_map == next_map) {
13602                         /* keep "next_map" locked in case we need it */
13603                 } else {
13604                         /* release this map */
13605                         if (not_in_kdp) {
13606                                 vm_map_unlock_read(curr_map);
13607                         }
13608                 }
13609
13610                 /*
13611                  * Adjust the offset.  "curr_entry" maps the submap
13612                  * at relative address "curr_entry->vme_start" in the
13613                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
13614                  * bytes of the submap.
13615                  * "curr_offset" always represents the offset of a virtual
13616                  * address in the curr_map relative to the absolute address
13617                  * space (i.e. the top-level VM map).
13618                  */
13619                 curr_offset +=
13620                     (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13621                 curr_address = user_address + curr_offset;
13622                 /* switch to the submap */
13623                 curr_map = VME_SUBMAP(curr_entry);
13624                 curr_depth++;
13625                 curr_entry = NULL;
13626         }
13627
13628 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13629 // so probably should be a real 32b ID vs. ptr.
13630 // Current users just check for equality
13631
13632         if (curr_entry == NULL) {
13633                 /* no VM region contains the address... */
13634
13635                 if (do_region_footprint && /* we want footprint numbers */
13636                     next_entry == NULL && /* & there are no more regions */
13637                     /* & we haven't already provided our fake region: */
13638                     user_address <= vm_map_last_entry(map)->vme_end) {
13639                         ledger_amount_t ledger_resident, ledger_compressed;
13640
13641                         /*
13642                          * Add a fake memory region to account for
13643                          * purgeable and/or ledger-tagged memory that
13644                          * counts towards this task's memory footprint,
13645                          * i.e. the resident/compressed pages of non-volatile
13646                          * objects owned by that task.
13647                          */
13648                         task_ledgers_footprint(map->pmap->ledger,
13649                             &ledger_resident,
13650                             &ledger_compressed);
13651                         if (ledger_resident + ledger_compressed == 0) {
13652                                 /* no purgeable memory usage to report */
13653                                 return KERN_INVALID_ADDRESS;
13654                         }
13655                         /* fake region to show nonvolatile footprint */
13656                         if (look_for_pages) {
13657                                 submap_info->protection = VM_PROT_DEFAULT;
13658                                 submap_info->max_protection = VM_PROT_DEFAULT;
13659                                 submap_info->inheritance = VM_INHERIT_DEFAULT;
13660                                 submap_info->offset = 0;
13661                                 submap_info->user_tag = -1;
13662                                 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
13663                                 submap_info->pages_shared_now_private = 0;
13664                                 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
13665                                 submap_info->pages_dirtied = submap_info->pages_resident;
13666                                 submap_info->ref_count = 1;
13667                                 submap_info->shadow_depth = 0;
13668                                 submap_info->external_pager = 0;
13669                                 submap_info->share_mode = SM_PRIVATE;
13670                                 submap_info->is_submap = 0;
13671                                 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13672                                 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13673                                 submap_info->user_wired_count = 0;
13674                                 submap_info->pages_reusable = 0;
13675                         } else {
13676                                 short_info->user_tag = -1;
13677                                 short_info->offset = 0;
13678                                 short_info->protection = VM_PROT_DEFAULT;
13679                                 short_info->inheritance = VM_INHERIT_DEFAULT;
13680                                 short_info->max_protection = VM_PROT_DEFAULT;
13681                                 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13682                                 short_info->user_wired_count = 0;
13683                                 short_info->is_submap = 0;
13684                                 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13685                                 short_info->external_pager = 0;
13686                                 short_info->shadow_depth = 0;
13687                                 short_info->share_mode = SM_PRIVATE;
13688                                 short_info->ref_count = 1;
13689                         }
13690                         *nesting_depth = 0;
13691                         *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
13692 //                      *address = user_address;
13693                         *address = vm_map_last_entry(map)->vme_end;
13694                         return KERN_SUCCESS;
13695                 }
13696
13697                 if (next_entry == NULL) {
13698                         /* ... and no VM region follows it either */
13699                         return KERN_INVALID_ADDRESS;
13700                 }
13701                 /* ... gather info about the next VM region */
13702                 curr_entry = next_entry;
13703                 curr_map = next_map;    /* still locked ... */
13704                 curr_address = next_address;
13705                 curr_skip = next_skip;
13706                 curr_offset = next_offset;
13707                 curr_depth = next_depth;
13708                 curr_max_above = next_max_above;
13709                 curr_max_below = next_max_below;
13710         } else {
13711                 /* we won't need "next_entry" after all */
13712                 if (next_entry != NULL) {
13713                         /* release "next_map" */
13714                         if (next_map != curr_map && not_in_kdp) {
13715                                 vm_map_unlock_read(next_map);
13716                         }
13717                 }
13718         }
13719         next_entry = NULL;
13720         next_map = NULL;
13721         next_offset = 0;
13722         next_skip = 0;
13723         next_depth = 0;
13724         next_max_below = -1;
13725         next_max_above = -1;
13726
13727         if (curr_entry->is_sub_map &&
13728             curr_depth < user_max_depth) {
13729                 /*
13730                  * We're not as deep as we could be:  we must have
13731                  * gone back up after not finding anything mapped
13732                  * below the original top-level map entry's.
13733                  * Let's move "curr_address" forward and recurse again.
13734                  */
13735                 user_address = curr_address;
13736                 goto recurse_again;
13737         }
13738
13739         *nesting_depth = curr_depth;
13740         *size = curr_max_above + curr_max_below;
13741         *address = user_address + curr_skip - curr_max_below;
13742
13743 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13744 // so probably should be a real 32b ID vs. ptr.
13745 // Current users just check for equality
13746 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13747
13748         if (look_for_pages) {
13749                 submap_info->user_tag = VME_ALIAS(curr_entry);
13750                 submap_info->offset = VME_OFFSET(curr_entry);
13751                 submap_info->protection = curr_entry->protection;
13752                 submap_info->inheritance = curr_entry->inheritance;
13753                 submap_info->max_protection = curr_entry->max_protection;
13754                 submap_info->behavior = curr_entry->behavior;
13755                 submap_info->user_wired_count = curr_entry->user_wired_count;
13756                 submap_info->is_submap = curr_entry->is_sub_map;
13757                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13758         } else {
13759                 short_info->user_tag = VME_ALIAS(curr_entry);
13760                 short_info->offset = VME_OFFSET(curr_entry);
13761                 short_info->protection = curr_entry->protection;
13762                 short_info->inheritance = curr_entry->inheritance;
13763                 short_info->max_protection = curr_entry->max_protection;
13764                 short_info->behavior = curr_entry->behavior;
13765                 short_info->user_wired_count = curr_entry->user_wired_count;
13766                 short_info->is_submap = curr_entry->is_sub_map;
13767                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13768         }
13769
13770         extended.pages_resident = 0;
13771         extended.pages_swapped_out = 0;
13772         extended.pages_shared_now_private = 0;
13773         extended.pages_dirtied = 0;
13774         extended.pages_reusable = 0;
13775         extended.external_pager = 0;
13776         extended.shadow_depth = 0;
13777         extended.share_mode = SM_EMPTY;
13778         extended.ref_count = 0;
13779
13780         if (not_in_kdp) {
13781                 if (!curr_entry->is_sub_map) {
13782                         vm_map_offset_t range_start, range_end;
13783                         range_start = MAX((curr_address - curr_max_below),
13784                             curr_entry->vme_start);
13785                         range_end = MIN((curr_address + curr_max_above),
13786                             curr_entry->vme_end);
13787                         vm_map_region_walk(curr_map,
13788                             range_start,
13789                             curr_entry,
13790                             (VME_OFFSET(curr_entry) +
13791                             (range_start -
13792                             curr_entry->vme_start)),
13793                             range_end - range_start,
13794                             &extended,
13795                             look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13796                         if (extended.external_pager &&
13797                             extended.ref_count == 2 &&
13798                             extended.share_mode == SM_SHARED) {
13799                                 extended.share_mode = SM_PRIVATE;
13800                         }
13801                 } else {
13802                         if (curr_entry->use_pmap) {
13803                                 extended.share_mode = SM_TRUESHARED;
13804                         } else {
13805                                 extended.share_mode = SM_PRIVATE;
13806                         }
13807                         extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
13808                 }
13809         }
13810
13811         if (look_for_pages) {
13812                 submap_info->pages_resident = extended.pages_resident;
13813                 submap_info->pages_swapped_out = extended.pages_swapped_out;
13814                 submap_info->pages_shared_now_private =
13815                     extended.pages_shared_now_private;
13816                 submap_info->pages_dirtied = extended.pages_dirtied;
13817                 submap_info->external_pager = extended.external_pager;
13818                 submap_info->shadow_depth = extended.shadow_depth;
13819                 submap_info->share_mode = extended.share_mode;
13820                 submap_info->ref_count = extended.ref_count;
13821
13822                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13823                         submap_info->pages_reusable = extended.pages_reusable;
13824                 }
13825                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13826                         submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13827                 }
13828         } else {
13829                 short_info->external_pager = extended.external_pager;
13830                 short_info->shadow_depth = extended.shadow_depth;
13831                 short_info->share_mode = extended.share_mode;
13832                 short_info->ref_count = extended.ref_count;
13833         }
13834
13835         if (not_in_kdp) {
13836                 vm_map_unlock_read(curr_map);
13837         }
13838
13839         return KERN_SUCCESS;
13840 }
13841
13842 /*
13843  *      vm_region:
13844  *
13845  *      User call to obtain information about a region in
13846  *      a task's address map. Currently, only one flavor is
13847  *      supported.
13848  *
13849  *      XXX The reserved and behavior fields cannot be filled
13850  *          in until the vm merge from the IK is completed, and
13851  *          vm_reserve is implemented.
13852  */
13853
13854 kern_return_t
13855 vm_map_region(
13856         vm_map_t                 map,
13857         vm_map_offset_t *address,               /* IN/OUT */
13858         vm_map_size_t           *size,                  /* OUT */
13859         vm_region_flavor_t       flavor,                /* IN */
13860         vm_region_info_t         info,                  /* OUT */
13861         mach_msg_type_number_t  *count, /* IN/OUT */
13862         mach_port_t             *object_name)           /* OUT */
13863 {
13864         vm_map_entry_t          tmp_entry;
13865         vm_map_entry_t          entry;
13866         vm_map_offset_t         start;
13867
13868         if (map == VM_MAP_NULL) {
13869                 return KERN_INVALID_ARGUMENT;
13870         }
13871
13872         switch (flavor) {
13873         case VM_REGION_BASIC_INFO:
13874                 /* legacy for old 32-bit objects info */
13875         {
13876                 vm_region_basic_info_t  basic;
13877
13878                 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13879                         return KERN_INVALID_ARGUMENT;
13880                 }
13881
13882                 basic = (vm_region_basic_info_t) info;
13883                 *count = VM_REGION_BASIC_INFO_COUNT;
13884
13885                 vm_map_lock_read(map);
13886
13887                 start = *address;
13888                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13889                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13890                                 vm_map_unlock_read(map);
13891                                 return KERN_INVALID_ADDRESS;
13892                         }
13893                 } else {
13894                         entry = tmp_entry;
13895                 }
13896
13897                 start = entry->vme_start;
13898
13899                 basic->offset = (uint32_t)VME_OFFSET(entry);
13900                 basic->protection = entry->protection;
13901                 basic->inheritance = entry->inheritance;
13902                 basic->max_protection = entry->max_protection;
13903                 basic->behavior = entry->behavior;
13904                 basic->user_wired_count = entry->user_wired_count;
13905                 basic->reserved = entry->is_sub_map;
13906                 *address = start;
13907                 *size = (entry->vme_end - start);
13908
13909                 if (object_name) {
13910                         *object_name = IP_NULL;
13911                 }
13912                 if (entry->is_sub_map) {
13913                         basic->shared = FALSE;
13914                 } else {
13915                         basic->shared = entry->is_shared;
13916                 }
13917
13918                 vm_map_unlock_read(map);
13919                 return KERN_SUCCESS;
13920         }
13921
13922         case VM_REGION_BASIC_INFO_64:
13923         {
13924                 vm_region_basic_info_64_t       basic;
13925
13926                 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13927                         return KERN_INVALID_ARGUMENT;
13928                 }
13929
13930                 basic = (vm_region_basic_info_64_t) info;
13931                 *count = VM_REGION_BASIC_INFO_COUNT_64;
13932
13933                 vm_map_lock_read(map);
13934
13935                 start = *address;
13936                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13937                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13938                                 vm_map_unlock_read(map);
13939                                 return KERN_INVALID_ADDRESS;
13940                         }
13941                 } else {
13942                         entry = tmp_entry;
13943                 }
13944
13945                 start = entry->vme_start;
13946
13947                 basic->offset = VME_OFFSET(entry);
13948                 basic->protection = entry->protection;
13949                 basic->inheritance = entry->inheritance;
13950                 basic->max_protection = entry->max_protection;
13951                 basic->behavior = entry->behavior;
13952                 basic->user_wired_count = entry->user_wired_count;
13953                 basic->reserved = entry->is_sub_map;
13954                 *address = start;
13955                 *size = (entry->vme_end - start);
13956
13957                 if (object_name) {
13958                         *object_name = IP_NULL;
13959                 }
13960                 if (entry->is_sub_map) {
13961                         basic->shared = FALSE;
13962                 } else {
13963                         basic->shared = entry->is_shared;
13964                 }
13965
13966                 vm_map_unlock_read(map);
13967                 return KERN_SUCCESS;
13968         }
13969         case VM_REGION_EXTENDED_INFO:
13970                 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13971                         return KERN_INVALID_ARGUMENT;
13972                 }
13973         /*fallthru*/
13974         case VM_REGION_EXTENDED_INFO__legacy:
13975                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
13976                         return KERN_INVALID_ARGUMENT;
13977                 }
13978
13979                 {
13980                         vm_region_extended_info_t       extended;
13981                         mach_msg_type_number_t original_count;
13982
13983                         extended = (vm_region_extended_info_t) info;
13984
13985                         vm_map_lock_read(map);
13986
13987                         start = *address;
13988                         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13989                                 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13990                                         vm_map_unlock_read(map);
13991                                         return KERN_INVALID_ADDRESS;
13992                                 }
13993                         } else {
13994                                 entry = tmp_entry;
13995                         }
13996                         start = entry->vme_start;
13997
13998                         extended->protection = entry->protection;
13999                         extended->user_tag = VME_ALIAS(entry);
14000                         extended->pages_resident = 0;
14001                         extended->pages_swapped_out = 0;
14002                         extended->pages_shared_now_private = 0;
14003                         extended->pages_dirtied = 0;
14004                         extended->external_pager = 0;
14005                         extended->shadow_depth = 0;
14006
14007                         original_count = *count;
14008                         if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14009                                 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14010                         } else {
14011                                 extended->pages_reusable = 0;
14012                                 *count = VM_REGION_EXTENDED_INFO_COUNT;
14013                         }
14014
14015                         vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14016
14017                         if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14018                                 extended->share_mode = SM_PRIVATE;
14019                         }
14020
14021                         if (object_name) {
14022                                 *object_name = IP_NULL;
14023                         }
14024                         *address = start;
14025                         *size = (entry->vme_end - start);
14026
14027                         vm_map_unlock_read(map);
14028                         return KERN_SUCCESS;
14029                 }
14030         case VM_REGION_TOP_INFO:
14031         {
14032                 vm_region_top_info_t    top;
14033
14034                 if (*count < VM_REGION_TOP_INFO_COUNT) {
14035                         return KERN_INVALID_ARGUMENT;
14036                 }
14037
14038                 top = (vm_region_top_info_t) info;
14039                 *count = VM_REGION_TOP_INFO_COUNT;
14040
14041                 vm_map_lock_read(map);
14042
14043                 start = *address;
14044                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14045                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14046                                 vm_map_unlock_read(map);
14047                                 return KERN_INVALID_ADDRESS;
14048                         }
14049                 } else {
14050                         entry = tmp_entry;
14051                 }
14052                 start = entry->vme_start;
14053
14054                 top->private_pages_resident = 0;
14055                 top->shared_pages_resident = 0;
14056
14057                 vm_map_region_top_walk(entry, top);
14058
14059                 if (object_name) {
14060                         *object_name = IP_NULL;
14061                 }
14062                 *address = start;
14063                 *size = (entry->vme_end - start);
14064
14065                 vm_map_unlock_read(map);
14066                 return KERN_SUCCESS;
14067         }
14068         default:
14069                 return KERN_INVALID_ARGUMENT;
14070         }
14071 }
14072
14073 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
14074         MIN((entry_size),                                               \
14075             ((obj)->all_reusable ?                                      \
14076              (obj)->wired_page_count :                                  \
14077              (obj)->resident_page_count - (obj)->reusable_page_count))
14078
14079 void
14080 vm_map_region_top_walk(
14081         vm_map_entry_t             entry,
14082         vm_region_top_info_t       top)
14083 {
14084         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14085                 top->share_mode = SM_EMPTY;
14086                 top->ref_count = 0;
14087                 top->obj_id = 0;
14088                 return;
14089         }
14090
14091         {
14092                 struct  vm_object *obj, *tmp_obj;
14093                 int             ref_count;
14094                 uint32_t        entry_size;
14095
14096                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14097
14098                 obj = VME_OBJECT(entry);
14099
14100                 vm_object_lock(obj);
14101
14102                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14103                         ref_count--;
14104                 }
14105
14106                 assert(obj->reusable_page_count <= obj->resident_page_count);
14107                 if (obj->shadow) {
14108                         if (ref_count == 1) {
14109                                 top->private_pages_resident =
14110                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14111                         } else {
14112                                 top->shared_pages_resident =
14113                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14114                         }
14115                         top->ref_count  = ref_count;
14116                         top->share_mode = SM_COW;
14117
14118                         while ((tmp_obj = obj->shadow)) {
14119                                 vm_object_lock(tmp_obj);
14120                                 vm_object_unlock(obj);
14121                                 obj = tmp_obj;
14122
14123                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14124                                         ref_count--;
14125                                 }
14126
14127                                 assert(obj->reusable_page_count <= obj->resident_page_count);
14128                                 top->shared_pages_resident +=
14129                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14130                                 top->ref_count += ref_count - 1;
14131                         }
14132                 } else {
14133                         if (entry->superpage_size) {
14134                                 top->share_mode = SM_LARGE_PAGE;
14135                                 top->shared_pages_resident = 0;
14136                                 top->private_pages_resident = entry_size;
14137                         } else if (entry->needs_copy) {
14138                                 top->share_mode = SM_COW;
14139                                 top->shared_pages_resident =
14140                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14141                         } else {
14142                                 if (ref_count == 1 ||
14143                                     (ref_count == 2 && obj->named)) {
14144                                         top->share_mode = SM_PRIVATE;
14145                                         top->private_pages_resident =
14146                                             OBJ_RESIDENT_COUNT(obj,
14147                                             entry_size);
14148                                 } else {
14149                                         top->share_mode = SM_SHARED;
14150                                         top->shared_pages_resident =
14151                                             OBJ_RESIDENT_COUNT(obj,
14152                                             entry_size);
14153                                 }
14154                         }
14155                         top->ref_count = ref_count;
14156                 }
14157                 /* XXX K64: obj_id will be truncated */
14158                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14159
14160                 vm_object_unlock(obj);
14161         }
14162 }
14163
14164 void
14165 vm_map_region_walk(
14166         vm_map_t                        map,
14167         vm_map_offset_t                 va,
14168         vm_map_entry_t                  entry,
14169         vm_object_offset_t              offset,
14170         vm_object_size_t                range,
14171         vm_region_extended_info_t       extended,
14172         boolean_t                       look_for_pages,
14173         mach_msg_type_number_t count)
14174 {
14175         struct vm_object *obj, *tmp_obj;
14176         vm_map_offset_t       last_offset;
14177         int               i;
14178         int               ref_count;
14179         struct vm_object        *shadow_object;
14180         int                     shadow_depth;
14181         boolean_t         do_region_footprint;
14182
14183         do_region_footprint = task_self_region_footprint();
14184
14185         if ((VME_OBJECT(entry) == 0) ||
14186             (entry->is_sub_map) ||
14187             (VME_OBJECT(entry)->phys_contiguous &&
14188             !entry->superpage_size)) {
14189                 extended->share_mode = SM_EMPTY;
14190                 extended->ref_count = 0;
14191                 return;
14192         }
14193
14194         if (entry->superpage_size) {
14195                 extended->shadow_depth = 0;
14196                 extended->share_mode = SM_LARGE_PAGE;
14197                 extended->ref_count = 1;
14198                 extended->external_pager = 0;
14199                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14200                 extended->shadow_depth = 0;
14201                 return;
14202         }
14203
14204         obj = VME_OBJECT(entry);
14205
14206         vm_object_lock(obj);
14207
14208         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14209                 ref_count--;
14210         }
14211
14212         if (look_for_pages) {
14213                 for (last_offset = offset + range;
14214                     offset < last_offset;
14215                     offset += PAGE_SIZE_64, va += PAGE_SIZE) {
14216                         if (do_region_footprint) {
14217                                 int disp;
14218
14219                                 disp = 0;
14220                                 if (map->has_corpse_footprint) {
14221                                         /*
14222                                          * Query the page info data we saved
14223                                          * while forking the corpse.
14224                                          */
14225                                         vm_map_corpse_footprint_query_page_info(
14226                                                 map,
14227                                                 va,
14228                                                 &disp);
14229                                 } else {
14230                                         /*
14231                                          * Query the pmap.
14232                                          */
14233                                         pmap_query_page_info(map->pmap,
14234                                             va,
14235                                             &disp);
14236                                 }
14237                                 if (disp & PMAP_QUERY_PAGE_PRESENT) {
14238                                         if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14239                                                 extended->pages_resident++;
14240                                         }
14241                                         if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14242                                                 extended->pages_reusable++;
14243                                         } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
14244                                             (disp & PMAP_QUERY_PAGE_ALTACCT)) {
14245                                                 /* alternate accounting */
14246                                         } else {
14247                                                 extended->pages_dirtied++;
14248                                         }
14249                                 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14250                                         if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14251                                                 /* alternate accounting */
14252                                         } else {
14253                                                 extended->pages_swapped_out++;
14254                                         }
14255                                 }
14256                                 /* deal with alternate accounting */
14257                                 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14258                                     /* && not tagged as no-footprint? */
14259                                     VM_OBJECT_OWNER(obj) != NULL &&
14260                                     VM_OBJECT_OWNER(obj)->map == map) {
14261                                         if ((((va
14262                                             - entry->vme_start
14263                                             + VME_OFFSET(entry))
14264                                             / PAGE_SIZE) <
14265                                             (obj->resident_page_count +
14266                                             vm_compressor_pager_get_count(obj->pager)))) {
14267                                                 /*
14268                                                  * Non-volatile purgeable object owned
14269                                                  * by this task: report the first
14270                                                  * "#resident + #compressed" pages as
14271                                                  * "resident" (to show that they
14272                                                  * contribute to the footprint) but not
14273                                                  * "dirty" (to avoid double-counting
14274                                                  * with the fake "non-volatile" region
14275                                                  * we'll report at the end of the
14276                                                  * address space to account for all
14277                                                  * (mapped or not) non-volatile memory
14278                                                  * owned by this task.
14279                                                  */
14280                                                 extended->pages_resident++;
14281                                         }
14282                                 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14283                                     obj->purgable == VM_PURGABLE_EMPTY) &&
14284                                     /* && not tagged as no-footprint? */
14285                                     VM_OBJECT_OWNER(obj) != NULL &&
14286                                     VM_OBJECT_OWNER(obj)->map == map) {
14287                                         if ((((va
14288                                             - entry->vme_start
14289                                             + VME_OFFSET(entry))
14290                                             / PAGE_SIZE) <
14291                                             obj->wired_page_count)) {
14292                                                 /*
14293                                                  * Volatile|empty purgeable object owned
14294                                                  * by this task: report the first
14295                                                  * "#wired" pages as "resident" (to
14296                                                  * show that they contribute to the
14297                                                  * footprint) but not "dirty" (to avoid
14298                                                  * double-counting with the fake
14299                                                  * "non-volatile" region we'll report
14300                                                  * at the end of the address space to
14301                                                  * account for all (mapped or not)
14302                                                  * non-volatile memory owned by this
14303                                                  * task.
14304                                                  */
14305                                                 extended->pages_resident++;
14306                                         }
14307                                 } else if (obj->purgable != VM_PURGABLE_DENY) {
14308                                         /*
14309                                          * Pages from purgeable objects
14310                                          * will be reported as dirty
14311                                          * appropriately in an extra
14312                                          * fake memory region at the end of
14313                                          * the address space.
14314                                          */
14315                                 } else if (entry->iokit_acct) {
14316                                         /*
14317                                          * IOKit mappings are considered
14318                                          * as fully dirty for footprint's
14319                                          * sake.
14320                                          */
14321                                         extended->pages_dirtied++;
14322                                 }
14323                                 continue;
14324                         }
14325
14326                         vm_map_region_look_for_page(map, va, obj,
14327                             offset, ref_count,
14328                             0, extended, count);
14329                 }
14330
14331                 if (do_region_footprint) {
14332                         goto collect_object_info;
14333                 }
14334         } else {
14335 collect_object_info:
14336                 shadow_object = obj->shadow;
14337                 shadow_depth = 0;
14338
14339                 if (!(obj->internal)) {
14340                         extended->external_pager = 1;
14341                 }
14342
14343                 if (shadow_object != VM_OBJECT_NULL) {
14344                         vm_object_lock(shadow_object);
14345                         for (;
14346                             shadow_object != VM_OBJECT_NULL;
14347                             shadow_depth++) {
14348                                 vm_object_t     next_shadow;
14349
14350                                 if (!(shadow_object->internal)) {
14351                                         extended->external_pager = 1;
14352                                 }
14353
14354                                 next_shadow = shadow_object->shadow;
14355                                 if (next_shadow) {
14356                                         vm_object_lock(next_shadow);
14357                                 }
14358                                 vm_object_unlock(shadow_object);
14359                                 shadow_object = next_shadow;
14360                         }
14361                 }
14362                 extended->shadow_depth = shadow_depth;
14363         }
14364
14365         if (extended->shadow_depth || entry->needs_copy) {
14366                 extended->share_mode = SM_COW;
14367         } else {
14368                 if (ref_count == 1) {
14369                         extended->share_mode = SM_PRIVATE;
14370                 } else {
14371                         if (obj->true_share) {
14372                                 extended->share_mode = SM_TRUESHARED;
14373                         } else {
14374                                 extended->share_mode = SM_SHARED;
14375                         }
14376                 }
14377         }
14378         extended->ref_count = ref_count - extended->shadow_depth;
14379
14380         for (i = 0; i < extended->shadow_depth; i++) {
14381                 if ((tmp_obj = obj->shadow) == 0) {
14382                         break;
14383                 }
14384                 vm_object_lock(tmp_obj);
14385                 vm_object_unlock(obj);
14386
14387                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14388                         ref_count--;
14389                 }
14390
14391                 extended->ref_count += ref_count;
14392                 obj = tmp_obj;
14393         }
14394         vm_object_unlock(obj);
14395
14396         if (extended->share_mode == SM_SHARED) {
14397                 vm_map_entry_t       cur;
14398                 vm_map_entry_t       last;
14399                 int      my_refs;
14400
14401                 obj = VME_OBJECT(entry);
14402                 last = vm_map_to_entry(map);
14403                 my_refs = 0;
14404
14405                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14406                         ref_count--;
14407                 }
14408                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14409                         my_refs += vm_map_region_count_obj_refs(cur, obj);
14410                 }
14411
14412                 if (my_refs == ref_count) {
14413                         extended->share_mode = SM_PRIVATE_ALIASED;
14414                 } else if (my_refs > 1) {
14415                         extended->share_mode = SM_SHARED_ALIASED;
14416                 }
14417         }
14418 }
14419
14420
14421 /* object is locked on entry and locked on return */
14422
14423
14424 static void
14425 vm_map_region_look_for_page(
14426         __unused vm_map_t               map,
14427         __unused vm_map_offset_t        va,
14428         vm_object_t                     object,
14429         vm_object_offset_t              offset,
14430         int                             max_refcnt,
14431         int                             depth,
14432         vm_region_extended_info_t       extended,
14433         mach_msg_type_number_t count)
14434 {
14435         vm_page_t       p;
14436         vm_object_t     shadow;
14437         int             ref_count;
14438         vm_object_t     caller_object;
14439
14440         shadow = object->shadow;
14441         caller_object = object;
14442
14443
14444         while (TRUE) {
14445                 if (!(object->internal)) {
14446                         extended->external_pager = 1;
14447                 }
14448
14449                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14450                         if (shadow && (max_refcnt == 1)) {
14451                                 extended->pages_shared_now_private++;
14452                         }
14453
14454                         if (!p->vmp_fictitious &&
14455                             (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14456                                 extended->pages_dirtied++;
14457                         } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14458                                 if (p->vmp_reusable || object->all_reusable) {
14459                                         extended->pages_reusable++;
14460                                 }
14461                         }
14462
14463                         extended->pages_resident++;
14464
14465                         if (object != caller_object) {
14466                                 vm_object_unlock(object);
14467                         }
14468
14469                         return;
14470                 }
14471                 if (object->internal &&
14472                     object->alive &&
14473                     !object->terminating &&
14474                     object->pager_ready) {
14475                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14476                             == VM_EXTERNAL_STATE_EXISTS) {
14477                                 /* the pager has that page */
14478                                 extended->pages_swapped_out++;
14479                                 if (object != caller_object) {
14480                                         vm_object_unlock(object);
14481                                 }
14482                                 return;
14483                         }
14484                 }
14485
14486                 if (shadow) {
14487                         vm_object_lock(shadow);
14488
14489                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14490                                 ref_count--;
14491                         }
14492
14493                         if (++depth > extended->shadow_depth) {
14494                                 extended->shadow_depth = depth;
14495                         }
14496
14497                         if (ref_count > max_refcnt) {
14498                                 max_refcnt = ref_count;
14499                         }
14500
14501                         if (object != caller_object) {
14502                                 vm_object_unlock(object);
14503                         }
14504
14505                         offset = offset + object->vo_shadow_offset;
14506                         object = shadow;
14507                         shadow = object->shadow;
14508                         continue;
14509                 }
14510                 if (object != caller_object) {
14511                         vm_object_unlock(object);
14512                 }
14513                 break;
14514         }
14515 }
14516
14517 static int
14518 vm_map_region_count_obj_refs(
14519         vm_map_entry_t    entry,
14520         vm_object_t       object)
14521 {
14522         int ref_count;
14523         vm_object_t chk_obj;
14524         vm_object_t tmp_obj;
14525
14526         if (VME_OBJECT(entry) == 0) {
14527                 return 0;
14528         }
14529
14530         if (entry->is_sub_map) {
14531                 return 0;
14532         } else {
14533                 ref_count = 0;
14534
14535                 chk_obj = VME_OBJECT(entry);
14536                 vm_object_lock(chk_obj);
14537
14538                 while (chk_obj) {
14539                         if (chk_obj == object) {
14540                                 ref_count++;
14541                         }
14542                         tmp_obj = chk_obj->shadow;
14543                         if (tmp_obj) {
14544                                 vm_object_lock(tmp_obj);
14545                         }
14546                         vm_object_unlock(chk_obj);
14547
14548                         chk_obj = tmp_obj;
14549                 }
14550         }
14551         return ref_count;
14552 }
14553
14554
14555 /*
14556  *      Routine:        vm_map_simplify
14557  *
14558  *      Description:
14559  *              Attempt to simplify the map representation in
14560  *              the vicinity of the given starting address.
14561  *      Note:
14562  *              This routine is intended primarily to keep the
14563  *              kernel maps more compact -- they generally don't
14564  *              benefit from the "expand a map entry" technology
14565  *              at allocation time because the adjacent entry
14566  *              is often wired down.
14567  */
14568 void
14569 vm_map_simplify_entry(
14570         vm_map_t        map,
14571         vm_map_entry_t  this_entry)
14572 {
14573         vm_map_entry_t  prev_entry;
14574
14575         counter(c_vm_map_simplify_entry_called++);
14576
14577         prev_entry = this_entry->vme_prev;
14578
14579         if ((this_entry != vm_map_to_entry(map)) &&
14580             (prev_entry != vm_map_to_entry(map)) &&
14581
14582             (prev_entry->vme_end == this_entry->vme_start) &&
14583
14584             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14585             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14586             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14587             prev_entry->vme_start))
14588             == VME_OFFSET(this_entry)) &&
14589
14590             (prev_entry->behavior == this_entry->behavior) &&
14591             (prev_entry->needs_copy == this_entry->needs_copy) &&
14592             (prev_entry->protection == this_entry->protection) &&
14593             (prev_entry->max_protection == this_entry->max_protection) &&
14594             (prev_entry->inheritance == this_entry->inheritance) &&
14595             (prev_entry->use_pmap == this_entry->use_pmap) &&
14596             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14597             (prev_entry->no_cache == this_entry->no_cache) &&
14598             (prev_entry->permanent == this_entry->permanent) &&
14599             (prev_entry->map_aligned == this_entry->map_aligned) &&
14600             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14601             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14602             (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14603             /* from_reserved_zone: OK if that field doesn't match */
14604             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14605             (prev_entry->vme_resilient_codesign ==
14606             this_entry->vme_resilient_codesign) &&
14607             (prev_entry->vme_resilient_media ==
14608             this_entry->vme_resilient_media) &&
14609             (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
14610
14611             (prev_entry->wired_count == this_entry->wired_count) &&
14612             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14613
14614             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14615             (prev_entry->in_transition == FALSE) &&
14616             (this_entry->in_transition == FALSE) &&
14617             (prev_entry->needs_wakeup == FALSE) &&
14618             (this_entry->needs_wakeup == FALSE) &&
14619             (prev_entry->is_shared == FALSE) &&
14620             (this_entry->is_shared == FALSE) &&
14621             (prev_entry->superpage_size == FALSE) &&
14622             (this_entry->superpage_size == FALSE)
14623             ) {
14624                 vm_map_store_entry_unlink(map, prev_entry);
14625                 assert(prev_entry->vme_start < this_entry->vme_end);
14626                 if (prev_entry->map_aligned) {
14627                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14628                             VM_MAP_PAGE_MASK(map)));
14629                 }
14630                 this_entry->vme_start = prev_entry->vme_start;
14631                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14632
14633                 if (map->holelistenabled) {
14634                         vm_map_store_update_first_free(map, this_entry, TRUE);
14635                 }
14636
14637                 if (prev_entry->is_sub_map) {
14638                         vm_map_deallocate(VME_SUBMAP(prev_entry));
14639                 } else {
14640                         vm_object_deallocate(VME_OBJECT(prev_entry));
14641                 }
14642                 vm_map_entry_dispose(map, prev_entry);
14643                 SAVE_HINT_MAP_WRITE(map, this_entry);
14644                 counter(c_vm_map_simplified++);
14645         }
14646 }
14647
14648 void
14649 vm_map_simplify(
14650         vm_map_t        map,
14651         vm_map_offset_t start)
14652 {
14653         vm_map_entry_t  this_entry;
14654
14655         vm_map_lock(map);
14656         if (vm_map_lookup_entry(map, start, &this_entry)) {
14657                 vm_map_simplify_entry(map, this_entry);
14658                 vm_map_simplify_entry(map, this_entry->vme_next);
14659         }
14660         counter(c_vm_map_simplify_called++);
14661         vm_map_unlock(map);
14662 }
14663
14664 static void
14665 vm_map_simplify_range(
14666         vm_map_t        map,
14667         vm_map_offset_t start,
14668         vm_map_offset_t end)
14669 {
14670         vm_map_entry_t  entry;
14671
14672         /*
14673          * The map should be locked (for "write") by the caller.
14674          */
14675
14676         if (start >= end) {
14677                 /* invalid address range */
14678                 return;
14679         }
14680
14681         start = vm_map_trunc_page(start,
14682             VM_MAP_PAGE_MASK(map));
14683         end = vm_map_round_page(end,
14684             VM_MAP_PAGE_MASK(map));
14685
14686         if (!vm_map_lookup_entry(map, start, &entry)) {
14687                 /* "start" is not mapped and "entry" ends before "start" */
14688                 if (entry == vm_map_to_entry(map)) {
14689                         /* start with first entry in the map */
14690                         entry = vm_map_first_entry(map);
14691                 } else {
14692                         /* start with next entry */
14693                         entry = entry->vme_next;
14694                 }
14695         }
14696
14697         while (entry != vm_map_to_entry(map) &&
14698             entry->vme_start <= end) {
14699                 /* try and coalesce "entry" with its previous entry */
14700                 vm_map_simplify_entry(map, entry);
14701                 entry = entry->vme_next;
14702         }
14703 }
14704
14705
14706 /*
14707  *      Routine:        vm_map_machine_attribute
14708  *      Purpose:
14709  *              Provide machine-specific attributes to mappings,
14710  *              such as cachability etc. for machines that provide
14711  *              them.  NUMA architectures and machines with big/strange
14712  *              caches will use this.
14713  *      Note:
14714  *              Responsibilities for locking and checking are handled here,
14715  *              everything else in the pmap module. If any non-volatile
14716  *              information must be kept, the pmap module should handle
14717  *              it itself. [This assumes that attributes do not
14718  *              need to be inherited, which seems ok to me]
14719  */
14720 kern_return_t
14721 vm_map_machine_attribute(
14722         vm_map_t                        map,
14723         vm_map_offset_t         start,
14724         vm_map_offset_t         end,
14725         vm_machine_attribute_t  attribute,
14726         vm_machine_attribute_val_t* value)              /* IN/OUT */
14727 {
14728         kern_return_t   ret;
14729         vm_map_size_t sync_size;
14730         vm_map_entry_t entry;
14731
14732         if (start < vm_map_min(map) || end > vm_map_max(map)) {
14733                 return KERN_INVALID_ADDRESS;
14734         }
14735
14736         /* Figure how much memory we need to flush (in page increments) */
14737         sync_size = end - start;
14738
14739         vm_map_lock(map);
14740
14741         if (attribute != MATTR_CACHE) {
14742                 /* If we don't have to find physical addresses, we */
14743                 /* don't have to do an explicit traversal here.    */
14744                 ret = pmap_attribute(map->pmap, start, end - start,
14745                     attribute, value);
14746                 vm_map_unlock(map);
14747                 return ret;
14748         }
14749
14750         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
14751
14752         while (sync_size) {
14753                 if (vm_map_lookup_entry(map, start, &entry)) {
14754                         vm_map_size_t   sub_size;
14755                         if ((entry->vme_end - start) > sync_size) {
14756                                 sub_size = sync_size;
14757                                 sync_size = 0;
14758                         } else {
14759                                 sub_size = entry->vme_end - start;
14760                                 sync_size -= sub_size;
14761                         }
14762                         if (entry->is_sub_map) {
14763                                 vm_map_offset_t sub_start;
14764                                 vm_map_offset_t sub_end;
14765
14766                                 sub_start = (start - entry->vme_start)
14767                                     + VME_OFFSET(entry);
14768                                 sub_end = sub_start + sub_size;
14769                                 vm_map_machine_attribute(
14770                                         VME_SUBMAP(entry),
14771                                         sub_start,
14772                                         sub_end,
14773                                         attribute, value);
14774                         } else {
14775                                 if (VME_OBJECT(entry)) {
14776                                         vm_page_t               m;
14777                                         vm_object_t             object;
14778                                         vm_object_t             base_object;
14779                                         vm_object_t             last_object;
14780                                         vm_object_offset_t      offset;
14781                                         vm_object_offset_t      base_offset;
14782                                         vm_map_size_t           range;
14783                                         range = sub_size;
14784                                         offset = (start - entry->vme_start)
14785                                             + VME_OFFSET(entry);
14786                                         base_offset = offset;
14787                                         object = VME_OBJECT(entry);
14788                                         base_object = object;
14789                                         last_object = NULL;
14790
14791                                         vm_object_lock(object);
14792
14793                                         while (range) {
14794                                                 m = vm_page_lookup(
14795                                                         object, offset);
14796
14797                                                 if (m && !m->vmp_fictitious) {
14798                                                         ret =
14799                                                             pmap_attribute_cache_sync(
14800                                                                 VM_PAGE_GET_PHYS_PAGE(m),
14801                                                                 PAGE_SIZE,
14802                                                                 attribute, value);
14803                                                 } else if (object->shadow) {
14804                                                         offset = offset + object->vo_shadow_offset;
14805                                                         last_object = object;
14806                                                         object = object->shadow;
14807                                                         vm_object_lock(last_object->shadow);
14808                                                         vm_object_unlock(last_object);
14809                                                         continue;
14810                                                 }
14811                                                 range -= PAGE_SIZE;
14812
14813                                                 if (base_object != object) {
14814                                                         vm_object_unlock(object);
14815                                                         vm_object_lock(base_object);
14816                                                         object = base_object;
14817                                                 }
14818                                                 /* Bump to the next page */
14819                                                 base_offset += PAGE_SIZE;
14820                                                 offset = base_offset;
14821                                         }
14822                                         vm_object_unlock(object);
14823                                 }
14824                         }
14825                         start += sub_size;
14826                 } else {
14827                         vm_map_unlock(map);
14828                         return KERN_FAILURE;
14829                 }
14830         }
14831
14832         vm_map_unlock(map);
14833
14834         return ret;
14835 }
14836
14837 /*
14838  *      vm_map_behavior_set:
14839  *
14840  *      Sets the paging reference behavior of the specified address
14841  *      range in the target map.  Paging reference behavior affects
14842  *      how pagein operations resulting from faults on the map will be
14843  *      clustered.
14844  */
14845 kern_return_t
14846 vm_map_behavior_set(
14847         vm_map_t        map,
14848         vm_map_offset_t start,
14849         vm_map_offset_t end,
14850         vm_behavior_t   new_behavior)
14851 {
14852         vm_map_entry_t  entry;
14853         vm_map_entry_t  temp_entry;
14854
14855         if (start > end ||
14856             start < vm_map_min(map) ||
14857             end > vm_map_max(map)) {
14858                 return KERN_NO_SPACE;
14859         }
14860
14861         switch (new_behavior) {
14862         /*
14863          * This first block of behaviors all set a persistent state on the specified
14864          * memory range.  All we have to do here is to record the desired behavior
14865          * in the vm_map_entry_t's.
14866          */
14867
14868         case VM_BEHAVIOR_DEFAULT:
14869         case VM_BEHAVIOR_RANDOM:
14870         case VM_BEHAVIOR_SEQUENTIAL:
14871         case VM_BEHAVIOR_RSEQNTL:
14872         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14873                 vm_map_lock(map);
14874
14875                 /*
14876                  *      The entire address range must be valid for the map.
14877                  *      Note that vm_map_range_check() does a
14878                  *      vm_map_lookup_entry() internally and returns the
14879                  *      entry containing the start of the address range if
14880                  *      the entire range is valid.
14881                  */
14882                 if (vm_map_range_check(map, start, end, &temp_entry)) {
14883                         entry = temp_entry;
14884                         vm_map_clip_start(map, entry, start);
14885                 } else {
14886                         vm_map_unlock(map);
14887                         return KERN_INVALID_ADDRESS;
14888                 }
14889
14890                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14891                         vm_map_clip_end(map, entry, end);
14892                         if (entry->is_sub_map) {
14893                                 assert(!entry->use_pmap);
14894                         }
14895
14896                         if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
14897                                 entry->zero_wired_pages = TRUE;
14898                         } else {
14899                                 entry->behavior = new_behavior;
14900                         }
14901                         entry = entry->vme_next;
14902                 }
14903
14904                 vm_map_unlock(map);
14905                 break;
14906
14907         /*
14908          * The rest of these are different from the above in that they cause
14909          * an immediate action to take place as opposed to setting a behavior that
14910          * affects future actions.
14911          */
14912
14913         case VM_BEHAVIOR_WILLNEED:
14914                 return vm_map_willneed(map, start, end);
14915
14916         case VM_BEHAVIOR_DONTNEED:
14917                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14918
14919         case VM_BEHAVIOR_FREE:
14920                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14921
14922         case VM_BEHAVIOR_REUSABLE:
14923                 return vm_map_reusable_pages(map, start, end);
14924
14925         case VM_BEHAVIOR_REUSE:
14926                 return vm_map_reuse_pages(map, start, end);
14927
14928         case VM_BEHAVIOR_CAN_REUSE:
14929                 return vm_map_can_reuse(map, start, end);
14930
14931 #if MACH_ASSERT
14932         case VM_BEHAVIOR_PAGEOUT:
14933                 return vm_map_pageout(map, start, end);
14934 #endif /* MACH_ASSERT */
14935
14936         default:
14937                 return KERN_INVALID_ARGUMENT;
14938         }
14939
14940         return KERN_SUCCESS;
14941 }
14942
14943
14944 /*
14945  * Internals for madvise(MADV_WILLNEED) system call.
14946  *
14947  * The implementation is to do:-
14948  * a) read-ahead if the mapping corresponds to a mapped regular file
14949  * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14950  */
14951
14952
14953 static kern_return_t
14954 vm_map_willneed(
14955         vm_map_t        map,
14956         vm_map_offset_t start,
14957         vm_map_offset_t end
14958         )
14959 {
14960         vm_map_entry_t                  entry;
14961         vm_object_t                     object;
14962         memory_object_t                 pager;
14963         struct vm_object_fault_info     fault_info = {};
14964         kern_return_t                   kr;
14965         vm_object_size_t                len;
14966         vm_object_offset_t              offset;
14967
14968         fault_info.interruptible = THREAD_UNINT;        /* ignored value */
14969         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
14970         fault_info.stealth       = TRUE;
14971
14972         /*
14973          * The MADV_WILLNEED operation doesn't require any changes to the
14974          * vm_map_entry_t's, so the read lock is sufficient.
14975          */
14976
14977         vm_map_lock_read(map);
14978
14979         /*
14980          * The madvise semantics require that the address range be fully
14981          * allocated with no holes.  Otherwise, we're required to return
14982          * an error.
14983          */
14984
14985         if (!vm_map_range_check(map, start, end, &entry)) {
14986                 vm_map_unlock_read(map);
14987                 return KERN_INVALID_ADDRESS;
14988         }
14989
14990         /*
14991          * Examine each vm_map_entry_t in the range.
14992          */
14993         for (; entry != vm_map_to_entry(map) && start < end;) {
14994                 /*
14995                  * The first time through, the start address could be anywhere
14996                  * within the vm_map_entry we found.  So adjust the offset to
14997                  * correspond.  After that, the offset will always be zero to
14998                  * correspond to the beginning of the current vm_map_entry.
14999                  */
15000                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15001
15002                 /*
15003                  * Set the length so we don't go beyond the end of the
15004                  * map_entry or beyond the end of the range we were given.
15005                  * This range could span also multiple map entries all of which
15006                  * map different files, so make sure we only do the right amount
15007                  * of I/O for each object.  Note that it's possible for there
15008                  * to be multiple map entries all referring to the same object
15009                  * but with different page permissions, but it's not worth
15010                  * trying to optimize that case.
15011                  */
15012                 len = MIN(entry->vme_end - start, end - start);
15013
15014                 if ((vm_size_t) len != len) {
15015                         /* 32-bit overflow */
15016                         len = (vm_size_t) (0 - PAGE_SIZE);
15017                 }
15018                 fault_info.cluster_size = (vm_size_t) len;
15019                 fault_info.lo_offset    = offset;
15020                 fault_info.hi_offset    = offset + len;
15021                 fault_info.user_tag     = VME_ALIAS(entry);
15022                 fault_info.pmap_options = 0;
15023                 if (entry->iokit_acct ||
15024                     (!entry->is_sub_map && !entry->use_pmap)) {
15025                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15026                 }
15027
15028                 /*
15029                  * If the entry is a submap OR there's no read permission
15030                  * to this mapping, then just skip it.
15031                  */
15032                 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15033                         entry = entry->vme_next;
15034                         start = entry->vme_start;
15035                         continue;
15036                 }
15037
15038                 object = VME_OBJECT(entry);
15039
15040                 if (object == NULL ||
15041                     (object && object->internal)) {
15042                         /*
15043                          * Memory range backed by anonymous memory.
15044                          */
15045                         vm_size_t region_size = 0, effective_page_size = 0;
15046                         vm_map_offset_t addr = 0, effective_page_mask = 0;
15047
15048                         region_size = len;
15049                         addr = start;
15050
15051                         effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15052                         effective_page_size = effective_page_mask + 1;
15053
15054                         vm_map_unlock_read(map);
15055
15056                         while (region_size) {
15057                                 vm_pre_fault(
15058                                         vm_map_trunc_page(addr, effective_page_mask),
15059                                         VM_PROT_READ | VM_PROT_WRITE);
15060
15061                                 region_size -= effective_page_size;
15062                                 addr += effective_page_size;
15063                         }
15064                 } else {
15065                         /*
15066                          * Find the file object backing this map entry.  If there is
15067                          * none, then we simply ignore the "will need" advice for this
15068                          * entry and go on to the next one.
15069                          */
15070                         if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15071                                 entry = entry->vme_next;
15072                                 start = entry->vme_start;
15073                                 continue;
15074                         }
15075
15076                         vm_object_paging_begin(object);
15077                         pager = object->pager;
15078                         vm_object_unlock(object);
15079
15080                         /*
15081                          * The data_request() could take a long time, so let's
15082                          * release the map lock to avoid blocking other threads.
15083                          */
15084                         vm_map_unlock_read(map);
15085
15086                         /*
15087                          * Get the data from the object asynchronously.
15088                          *
15089                          * Note that memory_object_data_request() places limits on the
15090                          * amount of I/O it will do.  Regardless of the len we
15091                          * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15092                          * silently truncates the len to that size.  This isn't
15093                          * necessarily bad since madvise shouldn't really be used to
15094                          * page in unlimited amounts of data.  Other Unix variants
15095                          * limit the willneed case as well.  If this turns out to be an
15096                          * issue for developers, then we can always adjust the policy
15097                          * here and still be backwards compatible since this is all
15098                          * just "advice".
15099                          */
15100                         kr = memory_object_data_request(
15101                                 pager,
15102                                 offset + object->paging_offset,
15103                                 0,      /* ignored */
15104                                 VM_PROT_READ,
15105                                 (memory_object_fault_info_t)&fault_info);
15106
15107                         vm_object_lock(object);
15108                         vm_object_paging_end(object);
15109                         vm_object_unlock(object);
15110
15111                         /*
15112                          * If we couldn't do the I/O for some reason, just give up on
15113                          * the madvise.  We still return success to the user since
15114                          * madvise isn't supposed to fail when the advice can't be
15115                          * taken.
15116                          */
15117
15118                         if (kr != KERN_SUCCESS) {
15119                                 return KERN_SUCCESS;
15120                         }
15121                 }
15122
15123                 start += len;
15124                 if (start >= end) {
15125                         /* done */
15126                         return KERN_SUCCESS;
15127                 }
15128
15129                 /* look up next entry */
15130                 vm_map_lock_read(map);
15131                 if (!vm_map_lookup_entry(map, start, &entry)) {
15132                         /*
15133                          * There's a new hole in the address range.
15134                          */
15135                         vm_map_unlock_read(map);
15136                         return KERN_INVALID_ADDRESS;
15137                 }
15138         }
15139
15140         vm_map_unlock_read(map);
15141         return KERN_SUCCESS;
15142 }
15143
15144 static boolean_t
15145 vm_map_entry_is_reusable(
15146         vm_map_entry_t entry)
15147 {
15148         /* Only user map entries */
15149
15150         vm_object_t object;
15151
15152         if (entry->is_sub_map) {
15153                 return FALSE;
15154         }
15155
15156         switch (VME_ALIAS(entry)) {
15157         case VM_MEMORY_MALLOC:
15158         case VM_MEMORY_MALLOC_SMALL:
15159         case VM_MEMORY_MALLOC_LARGE:
15160         case VM_MEMORY_REALLOC:
15161         case VM_MEMORY_MALLOC_TINY:
15162         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15163         case VM_MEMORY_MALLOC_LARGE_REUSED:
15164                 /*
15165                  * This is a malloc() memory region: check if it's still
15166                  * in its original state and can be re-used for more
15167                  * malloc() allocations.
15168                  */
15169                 break;
15170         default:
15171                 /*
15172                  * Not a malloc() memory region: let the caller decide if
15173                  * it's re-usable.
15174                  */
15175                 return TRUE;
15176         }
15177
15178         if (/*entry->is_shared ||*/
15179                 entry->is_sub_map ||
15180                 entry->in_transition ||
15181                 entry->protection != VM_PROT_DEFAULT ||
15182                 entry->max_protection != VM_PROT_ALL ||
15183                 entry->inheritance != VM_INHERIT_DEFAULT ||
15184                 entry->no_cache ||
15185                 entry->permanent ||
15186                 entry->superpage_size != FALSE ||
15187                 entry->zero_wired_pages ||
15188                 entry->wired_count != 0 ||
15189                 entry->user_wired_count != 0) {
15190                 return FALSE;
15191         }
15192
15193         object = VME_OBJECT(entry);
15194         if (object == VM_OBJECT_NULL) {
15195                 return TRUE;
15196         }
15197         if (
15198 #if 0
15199                 /*
15200                  * Let's proceed even if the VM object is potentially
15201                  * shared.
15202                  * We check for this later when processing the actual
15203                  * VM pages, so the contents will be safe if shared.
15204                  *
15205                  * But we can still mark this memory region as "reusable" to
15206                  * acknowledge that the caller did let us know that the memory
15207                  * could be re-used and should not be penalized for holding
15208                  * on to it.  This allows its "resident size" to not include
15209                  * the reusable range.
15210                  */
15211                 object->ref_count == 1 &&
15212 #endif
15213                 object->wired_page_count == 0 &&
15214                 object->copy == VM_OBJECT_NULL &&
15215                 object->shadow == VM_OBJECT_NULL &&
15216                 object->internal &&
15217                 object->purgable == VM_PURGABLE_DENY &&
15218                 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15219                 !object->true_share &&
15220                 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15221                 !object->code_signed) {
15222                 return TRUE;
15223         }
15224         return FALSE;
15225 }
15226
15227 static kern_return_t
15228 vm_map_reuse_pages(
15229         vm_map_t        map,
15230         vm_map_offset_t start,
15231         vm_map_offset_t end)
15232 {
15233         vm_map_entry_t                  entry;
15234         vm_object_t                     object;
15235         vm_object_offset_t              start_offset, end_offset;
15236
15237         /*
15238          * The MADV_REUSE operation doesn't require any changes to the
15239          * vm_map_entry_t's, so the read lock is sufficient.
15240          */
15241
15242         vm_map_lock_read(map);
15243         assert(map->pmap != kernel_pmap);       /* protect alias access */
15244
15245         /*
15246          * The madvise semantics require that the address range be fully
15247          * allocated with no holes.  Otherwise, we're required to return
15248          * an error.
15249          */
15250
15251         if (!vm_map_range_check(map, start, end, &entry)) {
15252                 vm_map_unlock_read(map);
15253                 vm_page_stats_reusable.reuse_pages_failure++;
15254                 return KERN_INVALID_ADDRESS;
15255         }
15256
15257         /*
15258          * Examine each vm_map_entry_t in the range.
15259          */
15260         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15261             entry = entry->vme_next) {
15262                 /*
15263                  * Sanity check on the VM map entry.
15264                  */
15265                 if (!vm_map_entry_is_reusable(entry)) {
15266                         vm_map_unlock_read(map);
15267                         vm_page_stats_reusable.reuse_pages_failure++;
15268                         return KERN_INVALID_ADDRESS;
15269                 }
15270
15271                 /*
15272                  * The first time through, the start address could be anywhere
15273                  * within the vm_map_entry we found.  So adjust the offset to
15274                  * correspond.
15275                  */
15276                 if (entry->vme_start < start) {
15277                         start_offset = start - entry->vme_start;
15278                 } else {
15279                         start_offset = 0;
15280                 }
15281                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15282                 start_offset += VME_OFFSET(entry);
15283                 end_offset += VME_OFFSET(entry);
15284
15285                 assert(!entry->is_sub_map);
15286                 object = VME_OBJECT(entry);
15287                 if (object != VM_OBJECT_NULL) {
15288                         vm_object_lock(object);
15289                         vm_object_reuse_pages(object, start_offset, end_offset,
15290                             TRUE);
15291                         vm_object_unlock(object);
15292                 }
15293
15294                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15295                         /*
15296                          * XXX
15297                          * We do not hold the VM map exclusively here.
15298                          * The "alias" field is not that critical, so it's
15299                          * safe to update it here, as long as it is the only
15300                          * one that can be modified while holding the VM map
15301                          * "shared".
15302                          */
15303                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15304                 }
15305         }
15306
15307         vm_map_unlock_read(map);
15308         vm_page_stats_reusable.reuse_pages_success++;
15309         return KERN_SUCCESS;
15310 }
15311
15312
15313 static kern_return_t
15314 vm_map_reusable_pages(
15315         vm_map_t        map,
15316         vm_map_offset_t start,
15317         vm_map_offset_t end)
15318 {
15319         vm_map_entry_t                  entry;
15320         vm_object_t                     object;
15321         vm_object_offset_t              start_offset, end_offset;
15322         vm_map_offset_t                 pmap_offset;
15323
15324         /*
15325          * The MADV_REUSABLE operation doesn't require any changes to the
15326          * vm_map_entry_t's, so the read lock is sufficient.
15327          */
15328
15329         vm_map_lock_read(map);
15330         assert(map->pmap != kernel_pmap);       /* protect alias access */
15331
15332         /*
15333          * The madvise semantics require that the address range be fully
15334          * allocated with no holes.  Otherwise, we're required to return
15335          * an error.
15336          */
15337
15338         if (!vm_map_range_check(map, start, end, &entry)) {
15339                 vm_map_unlock_read(map);
15340                 vm_page_stats_reusable.reusable_pages_failure++;
15341                 return KERN_INVALID_ADDRESS;
15342         }
15343
15344         /*
15345          * Examine each vm_map_entry_t in the range.
15346          */
15347         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15348             entry = entry->vme_next) {
15349                 int kill_pages = 0;
15350
15351                 /*
15352                  * Sanity check on the VM map entry.
15353                  */
15354                 if (!vm_map_entry_is_reusable(entry)) {
15355                         vm_map_unlock_read(map);
15356                         vm_page_stats_reusable.reusable_pages_failure++;
15357                         return KERN_INVALID_ADDRESS;
15358                 }
15359
15360                 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15361                         /* not writable: can't discard contents */
15362                         vm_map_unlock_read(map);
15363                         vm_page_stats_reusable.reusable_nonwritable++;
15364                         vm_page_stats_reusable.reusable_pages_failure++;
15365                         return KERN_PROTECTION_FAILURE;
15366                 }
15367
15368                 /*
15369                  * The first time through, the start address could be anywhere
15370                  * within the vm_map_entry we found.  So adjust the offset to
15371                  * correspond.
15372                  */
15373                 if (entry->vme_start < start) {
15374                         start_offset = start - entry->vme_start;
15375                         pmap_offset = start;
15376                 } else {
15377                         start_offset = 0;
15378                         pmap_offset = entry->vme_start;
15379                 }
15380                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15381                 start_offset += VME_OFFSET(entry);
15382                 end_offset += VME_OFFSET(entry);
15383
15384                 assert(!entry->is_sub_map);
15385                 object = VME_OBJECT(entry);
15386                 if (object == VM_OBJECT_NULL) {
15387                         continue;
15388                 }
15389
15390
15391                 vm_object_lock(object);
15392                 if (((object->ref_count == 1) ||
15393                     (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15394                     object->copy == VM_OBJECT_NULL)) &&
15395                     object->shadow == VM_OBJECT_NULL &&
15396                     /*
15397                      * "iokit_acct" entries are billed for their virtual size
15398                      * (rather than for their resident pages only), so they
15399                      * wouldn't benefit from making pages reusable, and it
15400                      * would be hard to keep track of pages that are both
15401                      * "iokit_acct" and "reusable" in the pmap stats and
15402                      * ledgers.
15403                      */
15404                     !(entry->iokit_acct ||
15405                     (!entry->is_sub_map && !entry->use_pmap))) {
15406                         if (object->ref_count != 1) {
15407                                 vm_page_stats_reusable.reusable_shared++;
15408                         }
15409                         kill_pages = 1;
15410                 } else {
15411                         kill_pages = -1;
15412                 }
15413                 if (kill_pages != -1) {
15414                         vm_object_deactivate_pages(object,
15415                             start_offset,
15416                             end_offset - start_offset,
15417                             kill_pages,
15418                             TRUE /*reusable_pages*/,
15419                             map->pmap,
15420                             pmap_offset);
15421                 } else {
15422                         vm_page_stats_reusable.reusable_pages_shared++;
15423                 }
15424                 vm_object_unlock(object);
15425
15426                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15427                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15428                         /*
15429                          * XXX
15430                          * We do not hold the VM map exclusively here.
15431                          * The "alias" field is not that critical, so it's
15432                          * safe to update it here, as long as it is the only
15433                          * one that can be modified while holding the VM map
15434                          * "shared".
15435                          */
15436                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15437                 }
15438         }
15439
15440         vm_map_unlock_read(map);
15441         vm_page_stats_reusable.reusable_pages_success++;
15442         return KERN_SUCCESS;
15443 }
15444
15445
15446 static kern_return_t
15447 vm_map_can_reuse(
15448         vm_map_t        map,
15449         vm_map_offset_t start,
15450         vm_map_offset_t end)
15451 {
15452         vm_map_entry_t                  entry;
15453
15454         /*
15455          * The MADV_REUSABLE operation doesn't require any changes to the
15456          * vm_map_entry_t's, so the read lock is sufficient.
15457          */
15458
15459         vm_map_lock_read(map);
15460         assert(map->pmap != kernel_pmap);       /* protect alias access */
15461
15462         /*
15463          * The madvise semantics require that the address range be fully
15464          * allocated with no holes.  Otherwise, we're required to return
15465          * an error.
15466          */
15467
15468         if (!vm_map_range_check(map, start, end, &entry)) {
15469                 vm_map_unlock_read(map);
15470                 vm_page_stats_reusable.can_reuse_failure++;
15471                 return KERN_INVALID_ADDRESS;
15472         }
15473
15474         /*
15475          * Examine each vm_map_entry_t in the range.
15476          */
15477         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15478             entry = entry->vme_next) {
15479                 /*
15480                  * Sanity check on the VM map entry.
15481                  */
15482                 if (!vm_map_entry_is_reusable(entry)) {
15483                         vm_map_unlock_read(map);
15484                         vm_page_stats_reusable.can_reuse_failure++;
15485                         return KERN_INVALID_ADDRESS;
15486                 }
15487         }
15488
15489         vm_map_unlock_read(map);
15490         vm_page_stats_reusable.can_reuse_success++;
15491         return KERN_SUCCESS;
15492 }
15493
15494
15495 #if MACH_ASSERT
15496 static kern_return_t
15497 vm_map_pageout(
15498         vm_map_t        map,
15499         vm_map_offset_t start,
15500         vm_map_offset_t end)
15501 {
15502         vm_map_entry_t                  entry;
15503
15504         /*
15505          * The MADV_PAGEOUT operation doesn't require any changes to the
15506          * vm_map_entry_t's, so the read lock is sufficient.
15507          */
15508
15509         vm_map_lock_read(map);
15510
15511         /*
15512          * The madvise semantics require that the address range be fully
15513          * allocated with no holes.  Otherwise, we're required to return
15514          * an error.
15515          */
15516
15517         if (!vm_map_range_check(map, start, end, &entry)) {
15518                 vm_map_unlock_read(map);
15519                 return KERN_INVALID_ADDRESS;
15520         }
15521
15522         /*
15523          * Examine each vm_map_entry_t in the range.
15524          */
15525         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15526             entry = entry->vme_next) {
15527                 vm_object_t     object;
15528
15529                 /*
15530                  * Sanity check on the VM map entry.
15531                  */
15532                 if (entry->is_sub_map) {
15533                         vm_map_t submap;
15534                         vm_map_offset_t submap_start;
15535                         vm_map_offset_t submap_end;
15536                         vm_map_entry_t submap_entry;
15537
15538                         submap = VME_SUBMAP(entry);
15539                         submap_start = VME_OFFSET(entry);
15540                         submap_end = submap_start + (entry->vme_end -
15541                             entry->vme_start);
15542
15543                         vm_map_lock_read(submap);
15544
15545                         if (!vm_map_range_check(submap,
15546                             submap_start,
15547                             submap_end,
15548                             &submap_entry)) {
15549                                 vm_map_unlock_read(submap);
15550                                 vm_map_unlock_read(map);
15551                                 return KERN_INVALID_ADDRESS;
15552                         }
15553
15554                         object = VME_OBJECT(submap_entry);
15555                         if (submap_entry->is_sub_map ||
15556                             object == VM_OBJECT_NULL ||
15557                             !object->internal) {
15558                                 vm_map_unlock_read(submap);
15559                                 continue;
15560                         }
15561
15562                         vm_object_pageout(object);
15563
15564                         vm_map_unlock_read(submap);
15565                         submap = VM_MAP_NULL;
15566                         submap_entry = VM_MAP_ENTRY_NULL;
15567                         continue;
15568                 }
15569
15570                 object = VME_OBJECT(entry);
15571                 if (entry->is_sub_map ||
15572                     object == VM_OBJECT_NULL ||
15573                     !object->internal) {
15574                         continue;
15575                 }
15576
15577                 vm_object_pageout(object);
15578         }
15579
15580         vm_map_unlock_read(map);
15581         return KERN_SUCCESS;
15582 }
15583 #endif /* MACH_ASSERT */
15584
15585
15586 /*
15587  *      Routine:        vm_map_entry_insert
15588  *
15589  *      Description:    This routine inserts a new vm_entry in a locked map.
15590  */
15591 vm_map_entry_t
15592 vm_map_entry_insert(
15593         vm_map_t                map,
15594         vm_map_entry_t          insp_entry,
15595         vm_map_offset_t         start,
15596         vm_map_offset_t         end,
15597         vm_object_t             object,
15598         vm_object_offset_t      offset,
15599         boolean_t               needs_copy,
15600         boolean_t               is_shared,
15601         boolean_t               in_transition,
15602         vm_prot_t               cur_protection,
15603         vm_prot_t               max_protection,
15604         vm_behavior_t           behavior,
15605         vm_inherit_t            inheritance,
15606         unsigned                wired_count,
15607         boolean_t               no_cache,
15608         boolean_t               permanent,
15609         boolean_t               no_copy_on_read,
15610         unsigned int            superpage_size,
15611         boolean_t               clear_map_aligned,
15612         boolean_t               is_submap,
15613         boolean_t               used_for_jit,
15614         int                     alias)
15615 {
15616         vm_map_entry_t  new_entry;
15617
15618         assert(insp_entry != (vm_map_entry_t)0);
15619         vm_map_lock_assert_exclusive(map);
15620
15621 #if DEVELOPMENT || DEBUG
15622         vm_object_offset_t      end_offset = 0;
15623         assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15624 #endif /* DEVELOPMENT || DEBUG */
15625
15626         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15627
15628         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15629                 new_entry->map_aligned = TRUE;
15630         } else {
15631                 new_entry->map_aligned = FALSE;
15632         }
15633         if (clear_map_aligned &&
15634             (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15635             !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15636                 new_entry->map_aligned = FALSE;
15637         }
15638
15639         new_entry->vme_start = start;
15640         new_entry->vme_end = end;
15641         assert(page_aligned(new_entry->vme_start));
15642         assert(page_aligned(new_entry->vme_end));
15643         if (new_entry->map_aligned) {
15644                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15645                     VM_MAP_PAGE_MASK(map)));
15646                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15647                     VM_MAP_PAGE_MASK(map)));
15648         }
15649         assert(new_entry->vme_start < new_entry->vme_end);
15650
15651         VME_OBJECT_SET(new_entry, object);
15652         VME_OFFSET_SET(new_entry, offset);
15653         new_entry->is_shared = is_shared;
15654         new_entry->is_sub_map = is_submap;
15655         new_entry->needs_copy = needs_copy;
15656         new_entry->in_transition = in_transition;
15657         new_entry->needs_wakeup = FALSE;
15658         new_entry->inheritance = inheritance;
15659         new_entry->protection = cur_protection;
15660         new_entry->max_protection = max_protection;
15661         new_entry->behavior = behavior;
15662         new_entry->wired_count = wired_count;
15663         new_entry->user_wired_count = 0;
15664         if (is_submap) {
15665                 /*
15666                  * submap: "use_pmap" means "nested".
15667                  * default: false.
15668                  */
15669                 new_entry->use_pmap = FALSE;
15670         } else {
15671                 /*
15672                  * object: "use_pmap" means "use pmap accounting" for footprint.
15673                  * default: true.
15674                  */
15675                 new_entry->use_pmap = TRUE;
15676         }
15677         VME_ALIAS_SET(new_entry, alias);
15678         new_entry->zero_wired_pages = FALSE;
15679         new_entry->no_cache = no_cache;
15680         new_entry->permanent = permanent;
15681         if (superpage_size) {
15682                 new_entry->superpage_size = TRUE;
15683         } else {
15684                 new_entry->superpage_size = FALSE;
15685         }
15686         if (used_for_jit) {
15687 #if CONFIG_EMBEDDED
15688                 if (!(map->jit_entry_exists))
15689 #endif /* CONFIG_EMBEDDED */
15690                 {
15691                         new_entry->used_for_jit = TRUE;
15692                         map->jit_entry_exists = TRUE;
15693                 }
15694         } else {
15695                 new_entry->used_for_jit = FALSE;
15696         }
15697         new_entry->pmap_cs_associated = FALSE;
15698         new_entry->iokit_acct = FALSE;
15699         new_entry->vme_resilient_codesign = FALSE;
15700         new_entry->vme_resilient_media = FALSE;
15701         new_entry->vme_atomic = FALSE;
15702         new_entry->vme_no_copy_on_read = no_copy_on_read;
15703
15704         /*
15705          *      Insert the new entry into the list.
15706          */
15707
15708         vm_map_store_entry_link(map, insp_entry, new_entry,
15709             VM_MAP_KERNEL_FLAGS_NONE);
15710         map->size += end - start;
15711
15712         /*
15713          *      Update the free space hint and the lookup hint.
15714          */
15715
15716         SAVE_HINT_MAP_WRITE(map, new_entry);
15717         return new_entry;
15718 }
15719
15720 /*
15721  *      Routine:        vm_map_remap_extract
15722  *
15723  *      Descritpion:    This routine returns a vm_entry list from a map.
15724  */
15725 static kern_return_t
15726 vm_map_remap_extract(
15727         vm_map_t                map,
15728         vm_map_offset_t         addr,
15729         vm_map_size_t           size,
15730         boolean_t               copy,
15731         struct vm_map_header    *map_header,
15732         vm_prot_t               *cur_protection,
15733         vm_prot_t               *max_protection,
15734         /* What, no behavior? */
15735         vm_inherit_t            inheritance,
15736         boolean_t               pageable,
15737         boolean_t               same_map,
15738         vm_map_kernel_flags_t   vmk_flags)
15739 {
15740         kern_return_t           result;
15741         vm_map_size_t           mapped_size;
15742         vm_map_size_t           tmp_size;
15743         vm_map_entry_t          src_entry;     /* result of last map lookup */
15744         vm_map_entry_t          new_entry;
15745         vm_object_offset_t      offset;
15746         vm_map_offset_t         map_address;
15747         vm_map_offset_t         src_start;     /* start of entry to map */
15748         vm_map_offset_t         src_end;       /* end of region to be mapped */
15749         vm_object_t             object;
15750         vm_map_version_t        version;
15751         boolean_t               src_needs_copy;
15752         boolean_t               new_entry_needs_copy;
15753         vm_map_entry_t          saved_src_entry;
15754         boolean_t               src_entry_was_wired;
15755         vm_prot_t               max_prot_for_prot_copy;
15756
15757         assert(map != VM_MAP_NULL);
15758         assert(size != 0);
15759         assert(size == vm_map_round_page(size, PAGE_MASK));
15760         assert(inheritance == VM_INHERIT_NONE ||
15761             inheritance == VM_INHERIT_COPY ||
15762             inheritance == VM_INHERIT_SHARE);
15763
15764         /*
15765          *      Compute start and end of region.
15766          */
15767         src_start = vm_map_trunc_page(addr, PAGE_MASK);
15768         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15769
15770
15771         /*
15772          *      Initialize map_header.
15773          */
15774         map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15775         map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15776         map_header->nentries = 0;
15777         map_header->entries_pageable = pageable;
15778         map_header->page_shift = PAGE_SHIFT;
15779
15780         vm_map_store_init( map_header );
15781
15782         if (copy && vmk_flags.vmkf_remap_prot_copy) {
15783                 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15784         } else {
15785                 max_prot_for_prot_copy = VM_PROT_NONE;
15786         }
15787         *cur_protection = VM_PROT_ALL;
15788         *max_protection = VM_PROT_ALL;
15789
15790         map_address = 0;
15791         mapped_size = 0;
15792         result = KERN_SUCCESS;
15793
15794         /*
15795          *      The specified source virtual space might correspond to
15796          *      multiple map entries, need to loop on them.
15797          */
15798         vm_map_lock(map);
15799         while (mapped_size != size) {
15800                 vm_map_size_t   entry_size;
15801
15802                 /*
15803                  *      Find the beginning of the region.
15804                  */
15805                 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
15806                         result = KERN_INVALID_ADDRESS;
15807                         break;
15808                 }
15809
15810                 if (src_start < src_entry->vme_start ||
15811                     (mapped_size && src_start != src_entry->vme_start)) {
15812                         result = KERN_INVALID_ADDRESS;
15813                         break;
15814                 }
15815
15816                 tmp_size = size - mapped_size;
15817                 if (src_end > src_entry->vme_end) {
15818                         tmp_size -= (src_end - src_entry->vme_end);
15819                 }
15820
15821                 entry_size = (vm_map_size_t)(src_entry->vme_end -
15822                     src_entry->vme_start);
15823
15824                 if (src_entry->is_sub_map) {
15825                         vm_map_reference(VME_SUBMAP(src_entry));
15826                         object = VM_OBJECT_NULL;
15827                 } else {
15828                         object = VME_OBJECT(src_entry);
15829                         if (src_entry->iokit_acct) {
15830                                 /*
15831                                  * This entry uses "IOKit accounting".
15832                                  */
15833                         } else if (object != VM_OBJECT_NULL &&
15834                             (object->purgable != VM_PURGABLE_DENY ||
15835                             object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
15836                                 /*
15837                                  * Purgeable objects have their own accounting:
15838                                  * no pmap accounting for them.
15839                                  */
15840                                 assertf(!src_entry->use_pmap,
15841                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15842                                     map,
15843                                     src_entry,
15844                                     (uint64_t)src_entry->vme_start,
15845                                     (uint64_t)src_entry->vme_end,
15846                                     src_entry->protection,
15847                                     src_entry->max_protection,
15848                                     VME_ALIAS(src_entry));
15849                         } else {
15850                                 /*
15851                                  * Not IOKit or purgeable:
15852                                  * must be accounted by pmap stats.
15853                                  */
15854                                 assertf(src_entry->use_pmap,
15855                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15856                                     map,
15857                                     src_entry,
15858                                     (uint64_t)src_entry->vme_start,
15859                                     (uint64_t)src_entry->vme_end,
15860                                     src_entry->protection,
15861                                     src_entry->max_protection,
15862                                     VME_ALIAS(src_entry));
15863                         }
15864
15865                         if (object == VM_OBJECT_NULL) {
15866                                 object = vm_object_allocate(entry_size);
15867                                 VME_OFFSET_SET(src_entry, 0);
15868                                 VME_OBJECT_SET(src_entry, object);
15869                                 assert(src_entry->use_pmap);
15870                         } else if (object->copy_strategy !=
15871                             MEMORY_OBJECT_COPY_SYMMETRIC) {
15872                                 /*
15873                                  *      We are already using an asymmetric
15874                                  *      copy, and therefore we already have
15875                                  *      the right object.
15876                                  */
15877                                 assert(!src_entry->needs_copy);
15878                         } else if (src_entry->needs_copy || object->shadowed ||
15879                             (object->internal && !object->true_share &&
15880                             !src_entry->is_shared &&
15881                             object->vo_size > entry_size)) {
15882                                 VME_OBJECT_SHADOW(src_entry, entry_size);
15883                                 assert(src_entry->use_pmap);
15884
15885                                 if (!src_entry->needs_copy &&
15886                                     (src_entry->protection & VM_PROT_WRITE)) {
15887                                         vm_prot_t prot;
15888
15889                                         assert(!pmap_has_prot_policy(src_entry->protection));
15890
15891                                         prot = src_entry->protection & ~VM_PROT_WRITE;
15892
15893                                         if (override_nx(map,
15894                                             VME_ALIAS(src_entry))
15895                                             && prot) {
15896                                                 prot |= VM_PROT_EXECUTE;
15897                                         }
15898
15899                                         assert(!pmap_has_prot_policy(prot));
15900
15901                                         if (map->mapped_in_other_pmaps) {
15902                                                 vm_object_pmap_protect(
15903                                                         VME_OBJECT(src_entry),
15904                                                         VME_OFFSET(src_entry),
15905                                                         entry_size,
15906                                                         PMAP_NULL,
15907                                                         src_entry->vme_start,
15908                                                         prot);
15909                                         } else {
15910                                                 pmap_protect(vm_map_pmap(map),
15911                                                     src_entry->vme_start,
15912                                                     src_entry->vme_end,
15913                                                     prot);
15914                                         }
15915                                 }
15916
15917                                 object = VME_OBJECT(src_entry);
15918                                 src_entry->needs_copy = FALSE;
15919                         }
15920
15921
15922                         vm_object_lock(object);
15923                         vm_object_reference_locked(object); /* object ref. for new entry */
15924                         if (object->copy_strategy ==
15925                             MEMORY_OBJECT_COPY_SYMMETRIC) {
15926                                 object->copy_strategy =
15927                                     MEMORY_OBJECT_COPY_DELAY;
15928                         }
15929                         vm_object_unlock(object);
15930                 }
15931
15932                 offset = (VME_OFFSET(src_entry) +
15933                     (src_start - src_entry->vme_start));
15934
15935                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15936                 vm_map_entry_copy(new_entry, src_entry);
15937                 if (new_entry->is_sub_map) {
15938                         /* clr address space specifics */
15939                         new_entry->use_pmap = FALSE;
15940                 } else if (copy) {
15941                         /*
15942                          * We're dealing with a copy-on-write operation,
15943                          * so the resulting mapping should not inherit the
15944                          * original mapping's accounting settings.
15945                          * "use_pmap" should be reset to its default (TRUE)
15946                          * so that the new mapping gets accounted for in
15947                          * the task's memory footprint.
15948                          */
15949                         new_entry->use_pmap = TRUE;
15950                 }
15951                 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15952                 assert(!new_entry->iokit_acct);
15953
15954                 new_entry->map_aligned = FALSE;
15955
15956                 new_entry->vme_start = map_address;
15957                 new_entry->vme_end = map_address + tmp_size;
15958                 assert(new_entry->vme_start < new_entry->vme_end);
15959                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15960                         /*
15961                          * Remapping for vm_map_protect(VM_PROT_COPY)
15962                          * to convert a read-only mapping into a
15963                          * copy-on-write version of itself but
15964                          * with write access:
15965                          * keep the original inheritance and add
15966                          * VM_PROT_WRITE to the max protection.
15967                          */
15968                         new_entry->inheritance = src_entry->inheritance;
15969                         new_entry->protection &= max_prot_for_prot_copy;
15970                         new_entry->max_protection |= VM_PROT_WRITE;
15971                 } else {
15972                         new_entry->inheritance = inheritance;
15973                 }
15974                 VME_OFFSET_SET(new_entry, offset);
15975
15976                 /*
15977                  * The new region has to be copied now if required.
15978                  */
15979 RestartCopy:
15980                 if (!copy) {
15981                         if (src_entry->used_for_jit == TRUE) {
15982                                 if (same_map) {
15983 #if __APRR_SUPPORTED__
15984                                         /*
15985                                          * Disallow re-mapping of any JIT regions on APRR devices.
15986                                          */
15987                                         result = KERN_PROTECTION_FAILURE;
15988                                         break;
15989 #endif /* __APRR_SUPPORTED__*/
15990                                 } else {
15991 #if CONFIG_EMBEDDED
15992                                         /*
15993                                          * Cannot allow an entry describing a JIT
15994                                          * region to be shared across address spaces.
15995                                          */
15996                                         result = KERN_INVALID_ARGUMENT;
15997                                         break;
15998 #endif /* CONFIG_EMBEDDED */
15999                                 }
16000                         }
16001
16002                         src_entry->is_shared = TRUE;
16003                         new_entry->is_shared = TRUE;
16004                         if (!(new_entry->is_sub_map)) {
16005                                 new_entry->needs_copy = FALSE;
16006                         }
16007                 } else if (src_entry->is_sub_map) {
16008                         /* make this a COW sub_map if not already */
16009                         assert(new_entry->wired_count == 0);
16010                         new_entry->needs_copy = TRUE;
16011                         object = VM_OBJECT_NULL;
16012                 } else if (src_entry->wired_count == 0 &&
16013                     vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
16014                     VME_OFFSET(new_entry),
16015                     (new_entry->vme_end -
16016                     new_entry->vme_start),
16017                     &src_needs_copy,
16018                     &new_entry_needs_copy)) {
16019                         new_entry->needs_copy = new_entry_needs_copy;
16020                         new_entry->is_shared = FALSE;
16021                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16022
16023                         /*
16024                          * Handle copy_on_write semantics.
16025                          */
16026                         if (src_needs_copy && !src_entry->needs_copy) {
16027                                 vm_prot_t prot;
16028
16029                                 assert(!pmap_has_prot_policy(src_entry->protection));
16030
16031                                 prot = src_entry->protection & ~VM_PROT_WRITE;
16032
16033                                 if (override_nx(map,
16034                                     VME_ALIAS(src_entry))
16035                                     && prot) {
16036                                         prot |= VM_PROT_EXECUTE;
16037                                 }
16038
16039                                 assert(!pmap_has_prot_policy(prot));
16040
16041                                 vm_object_pmap_protect(object,
16042                                     offset,
16043                                     entry_size,
16044                                     ((src_entry->is_shared
16045                                     || map->mapped_in_other_pmaps) ?
16046                                     PMAP_NULL : map->pmap),
16047                                     src_entry->vme_start,
16048                                     prot);
16049
16050                                 assert(src_entry->wired_count == 0);
16051                                 src_entry->needs_copy = TRUE;
16052                         }
16053                         /*
16054                          * Throw away the old object reference of the new entry.
16055                          */
16056                         vm_object_deallocate(object);
16057                 } else {
16058                         new_entry->is_shared = FALSE;
16059                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16060
16061                         src_entry_was_wired = (src_entry->wired_count > 0);
16062                         saved_src_entry = src_entry;
16063                         src_entry = VM_MAP_ENTRY_NULL;
16064
16065                         /*
16066                          * The map can be safely unlocked since we
16067                          * already hold a reference on the object.
16068                          *
16069                          * Record the timestamp of the map for later
16070                          * verification, and unlock the map.
16071                          */
16072                         version.main_timestamp = map->timestamp;
16073                         vm_map_unlock(map);     /* Increments timestamp once! */
16074
16075                         /*
16076                          * Perform the copy.
16077                          */
16078                         if (src_entry_was_wired > 0) {
16079                                 vm_object_lock(object);
16080                                 result = vm_object_copy_slowly(
16081                                         object,
16082                                         offset,
16083                                         (new_entry->vme_end -
16084                                         new_entry->vme_start),
16085                                         THREAD_UNINT,
16086                                         VME_OBJECT_PTR(new_entry));
16087
16088                                 VME_OFFSET_SET(new_entry, 0);
16089                                 new_entry->needs_copy = FALSE;
16090                         } else {
16091                                 vm_object_offset_t new_offset;
16092
16093                                 new_offset = VME_OFFSET(new_entry);
16094                                 result = vm_object_copy_strategically(
16095                                         object,
16096                                         offset,
16097                                         (new_entry->vme_end -
16098                                         new_entry->vme_start),
16099                                         VME_OBJECT_PTR(new_entry),
16100                                         &new_offset,
16101                                         &new_entry_needs_copy);
16102                                 if (new_offset != VME_OFFSET(new_entry)) {
16103                                         VME_OFFSET_SET(new_entry, new_offset);
16104                                 }
16105
16106                                 new_entry->needs_copy = new_entry_needs_copy;
16107                         }
16108
16109                         /*
16110                          * Throw away the old object reference of the new entry.
16111                          */
16112                         vm_object_deallocate(object);
16113
16114                         if (result != KERN_SUCCESS &&
16115                             result != KERN_MEMORY_RESTART_COPY) {
16116                                 _vm_map_entry_dispose(map_header, new_entry);
16117                                 vm_map_lock(map);
16118                                 break;
16119                         }
16120
16121                         /*
16122                          * Verify that the map has not substantially
16123                          * changed while the copy was being made.
16124                          */
16125
16126                         vm_map_lock(map);
16127                         if (version.main_timestamp + 1 != map->timestamp) {
16128                                 /*
16129                                  * Simple version comparison failed.
16130                                  *
16131                                  * Retry the lookup and verify that the
16132                                  * same object/offset are still present.
16133                                  */
16134                                 saved_src_entry = VM_MAP_ENTRY_NULL;
16135                                 vm_object_deallocate(VME_OBJECT(new_entry));
16136                                 _vm_map_entry_dispose(map_header, new_entry);
16137                                 if (result == KERN_MEMORY_RESTART_COPY) {
16138                                         result = KERN_SUCCESS;
16139                                 }
16140                                 continue;
16141                         }
16142                         /* map hasn't changed: src_entry is still valid */
16143                         src_entry = saved_src_entry;
16144                         saved_src_entry = VM_MAP_ENTRY_NULL;
16145
16146                         if (result == KERN_MEMORY_RESTART_COPY) {
16147                                 vm_object_reference(object);
16148                                 goto RestartCopy;
16149                         }
16150                 }
16151
16152                 _vm_map_store_entry_link(map_header,
16153                     map_header->links.prev, new_entry);
16154
16155                 /*Protections for submap mapping are irrelevant here*/
16156                 if (!src_entry->is_sub_map) {
16157                         *cur_protection &= src_entry->protection;
16158                         *max_protection &= src_entry->max_protection;
16159                 }
16160                 map_address += tmp_size;
16161                 mapped_size += tmp_size;
16162                 src_start += tmp_size;
16163         } /* end while */
16164
16165         vm_map_unlock(map);
16166         if (result != KERN_SUCCESS) {
16167                 /*
16168                  * Free all allocated elements.
16169                  */
16170                 for (src_entry = map_header->links.next;
16171                     src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16172                     src_entry = new_entry) {
16173                         new_entry = src_entry->vme_next;
16174                         _vm_map_store_entry_unlink(map_header, src_entry);
16175                         if (src_entry->is_sub_map) {
16176                                 vm_map_deallocate(VME_SUBMAP(src_entry));
16177                         } else {
16178                                 vm_object_deallocate(VME_OBJECT(src_entry));
16179                         }
16180                         _vm_map_entry_dispose(map_header, src_entry);
16181                 }
16182         }
16183         return result;
16184 }
16185
16186 /*
16187  *      Routine:        vm_remap
16188  *
16189  *                      Map portion of a task's address space.
16190  *                      Mapped region must not overlap more than
16191  *                      one vm memory object. Protections and
16192  *                      inheritance attributes remain the same
16193  *                      as in the original task and are out parameters.
16194  *                      Source and Target task can be identical
16195  *                      Other attributes are identical as for vm_map()
16196  */
16197 kern_return_t
16198 vm_map_remap(
16199         vm_map_t                target_map,
16200         vm_map_address_t        *address,
16201         vm_map_size_t           size,
16202         vm_map_offset_t         mask,
16203         int                     flags,
16204         vm_map_kernel_flags_t   vmk_flags,
16205         vm_tag_t                tag,
16206         vm_map_t                src_map,
16207         vm_map_offset_t         memory_address,
16208         boolean_t               copy,
16209         vm_prot_t               *cur_protection,
16210         vm_prot_t               *max_protection,
16211         vm_inherit_t            inheritance)
16212 {
16213         kern_return_t           result;
16214         vm_map_entry_t          entry;
16215         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
16216         vm_map_entry_t          new_entry;
16217         struct vm_map_header    map_header;
16218         vm_map_offset_t         offset_in_mapping;
16219
16220         if (target_map == VM_MAP_NULL) {
16221                 return KERN_INVALID_ARGUMENT;
16222         }
16223
16224         switch (inheritance) {
16225         case VM_INHERIT_NONE:
16226         case VM_INHERIT_COPY:
16227         case VM_INHERIT_SHARE:
16228                 if (size != 0 && src_map != VM_MAP_NULL) {
16229                         break;
16230                 }
16231         /*FALL THRU*/
16232         default:
16233                 return KERN_INVALID_ARGUMENT;
16234         }
16235
16236         /*
16237          * If the user is requesting that we return the address of the
16238          * first byte of the data (rather than the base of the page),
16239          * then we use different rounding semantics: specifically,
16240          * we assume that (memory_address, size) describes a region
16241          * all of whose pages we must cover, rather than a base to be truncated
16242          * down and a size to be added to that base.  So we figure out
16243          * the highest page that the requested region includes and make
16244          * sure that the size will cover it.
16245          *
16246          * The key example we're worried about it is of the form:
16247          *
16248          *              memory_address = 0x1ff0, size = 0x20
16249          *
16250          * With the old semantics, we round down the memory_address to 0x1000
16251          * and round up the size to 0x1000, resulting in our covering *only*
16252          * page 0x1000.  With the new semantics, we'd realize that the region covers
16253          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
16254          * 0x1000 and page 0x2000 in the region we remap.
16255          */
16256         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16257                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16258                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16259         } else {
16260                 size = vm_map_round_page(size, PAGE_MASK);
16261         }
16262         if (size == 0) {
16263                 return KERN_INVALID_ARGUMENT;
16264         }
16265
16266         if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16267                 /* must be copy-on-write to be "media resilient" */
16268                 if (!copy) {
16269                         return KERN_INVALID_ARGUMENT;
16270                 }
16271         }
16272
16273         result = vm_map_remap_extract(src_map, memory_address,
16274             size, copy, &map_header,
16275             cur_protection,
16276             max_protection,
16277             inheritance,
16278             target_map->hdr.entries_pageable,
16279             src_map == target_map,
16280             vmk_flags);
16281
16282         if (result != KERN_SUCCESS) {
16283                 return result;
16284         }
16285
16286         /*
16287          * Allocate/check a range of free virtual address
16288          * space for the target
16289          */
16290         *address = vm_map_trunc_page(*address,
16291             VM_MAP_PAGE_MASK(target_map));
16292         vm_map_lock(target_map);
16293         result = vm_map_remap_range_allocate(target_map, address, size,
16294             mask, flags, vmk_flags, tag,
16295             &insp_entry);
16296
16297         for (entry = map_header.links.next;
16298             entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16299             entry = new_entry) {
16300                 new_entry = entry->vme_next;
16301                 _vm_map_store_entry_unlink(&map_header, entry);
16302                 if (result == KERN_SUCCESS) {
16303                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16304                                 /* no codesigning -> read-only access */
16305                                 entry->max_protection = VM_PROT_READ;
16306                                 entry->protection = VM_PROT_READ;
16307                                 entry->vme_resilient_codesign = TRUE;
16308                         }
16309                         entry->vme_start += *address;
16310                         entry->vme_end += *address;
16311                         assert(!entry->map_aligned);
16312                         if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16313                             !entry->is_sub_map &&
16314                             (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16315                             VME_OBJECT(entry)->internal)) {
16316                                 entry->vme_resilient_media = TRUE;
16317                         }
16318                         vm_map_store_entry_link(target_map, insp_entry, entry,
16319                             vmk_flags);
16320                         insp_entry = entry;
16321                 } else {
16322                         if (!entry->is_sub_map) {
16323                                 vm_object_deallocate(VME_OBJECT(entry));
16324                         } else {
16325                                 vm_map_deallocate(VME_SUBMAP(entry));
16326                         }
16327                         _vm_map_entry_dispose(&map_header, entry);
16328                 }
16329         }
16330
16331         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16332                 *cur_protection = VM_PROT_READ;
16333                 *max_protection = VM_PROT_READ;
16334         }
16335
16336         if (target_map->disable_vmentry_reuse == TRUE) {
16337                 assert(!target_map->is_nested_map);
16338                 if (target_map->highest_entry_end < insp_entry->vme_end) {
16339                         target_map->highest_entry_end = insp_entry->vme_end;
16340                 }
16341         }
16342
16343         if (result == KERN_SUCCESS) {
16344                 target_map->size += size;
16345                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16346
16347 #if PMAP_CS
16348                 if (*max_protection & VM_PROT_EXECUTE) {
16349                         vm_map_address_t region_start = 0, region_size = 0;
16350                         struct pmap_cs_code_directory *region_cd = NULL;
16351                         vm_map_address_t base = 0;
16352                         struct pmap_cs_lookup_results results = {};
16353                         vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16354                         vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16355
16356                         pmap_cs_lookup(src_map->pmap, memory_address, &results);
16357                         region_size = results.region_size;
16358                         region_start = results.region_start;
16359                         region_cd = results.region_cd_entry;
16360                         base = results.base;
16361
16362                         if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16363                                 *cur_protection = VM_PROT_READ;
16364                                 *max_protection = VM_PROT_READ;
16365                                 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16366                                     "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16367                                     page_addr, page_addr + assoc_size, *address,
16368                                     region_start, region_size,
16369                                     region_cd != NULL ? "not " : ""                     // Don't leak kernel slide
16370                                     );
16371                         }
16372                 }
16373 #endif
16374         }
16375         vm_map_unlock(target_map);
16376
16377         if (result == KERN_SUCCESS && target_map->wiring_required) {
16378                 result = vm_map_wire_kernel(target_map, *address,
16379                     *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16380                     TRUE);
16381         }
16382
16383         /*
16384          * If requested, return the address of the data pointed to by the
16385          * request, rather than the base of the resulting page.
16386          */
16387         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16388                 *address += offset_in_mapping;
16389         }
16390
16391         return result;
16392 }
16393
16394 /*
16395  *      Routine:        vm_map_remap_range_allocate
16396  *
16397  *      Description:
16398  *              Allocate a range in the specified virtual address map.
16399  *              returns the address and the map entry just before the allocated
16400  *              range
16401  *
16402  *      Map must be locked.
16403  */
16404
16405 static kern_return_t
16406 vm_map_remap_range_allocate(
16407         vm_map_t                map,
16408         vm_map_address_t        *address,       /* IN/OUT */
16409         vm_map_size_t           size,
16410         vm_map_offset_t         mask,
16411         int                     flags,
16412         vm_map_kernel_flags_t   vmk_flags,
16413         __unused vm_tag_t       tag,
16414         vm_map_entry_t          *map_entry)     /* OUT */
16415 {
16416         vm_map_entry_t  entry;
16417         vm_map_offset_t start;
16418         vm_map_offset_t end;
16419         vm_map_offset_t desired_empty_end;
16420         kern_return_t   kr;
16421         vm_map_entry_t          hole_entry;
16422
16423 StartAgain:;
16424
16425         start = *address;
16426
16427         if (flags & VM_FLAGS_ANYWHERE) {
16428                 if (flags & VM_FLAGS_RANDOM_ADDR) {
16429                         /*
16430                          * Get a random start address.
16431                          */
16432                         kr = vm_map_random_address_for_size(map, address, size);
16433                         if (kr != KERN_SUCCESS) {
16434                                 return kr;
16435                         }
16436                         start = *address;
16437                 }
16438
16439                 /*
16440                  *      Calculate the first possible address.
16441                  */
16442
16443                 if (start < map->min_offset) {
16444                         start = map->min_offset;
16445                 }
16446                 if (start > map->max_offset) {
16447                         return KERN_NO_SPACE;
16448                 }
16449
16450                 /*
16451                  *      Look for the first possible address;
16452                  *      if there's already something at this
16453                  *      address, we have to start after it.
16454                  */
16455
16456                 if (map->disable_vmentry_reuse == TRUE) {
16457                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
16458                 } else {
16459                         if (map->holelistenabled) {
16460                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16461
16462                                 if (hole_entry == NULL) {
16463                                         /*
16464                                          * No more space in the map?
16465                                          */
16466                                         return KERN_NO_SPACE;
16467                                 } else {
16468                                         boolean_t found_hole = FALSE;
16469
16470                                         do {
16471                                                 if (hole_entry->vme_start >= start) {
16472                                                         start = hole_entry->vme_start;
16473                                                         found_hole = TRUE;
16474                                                         break;
16475                                                 }
16476
16477                                                 if (hole_entry->vme_end > start) {
16478                                                         found_hole = TRUE;
16479                                                         break;
16480                                                 }
16481                                                 hole_entry = hole_entry->vme_next;
16482                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16483
16484                                         if (found_hole == FALSE) {
16485                                                 return KERN_NO_SPACE;
16486                                         }
16487
16488                                         entry = hole_entry;
16489                                 }
16490                         } else {
16491                                 assert(first_free_is_valid(map));
16492                                 if (start == map->min_offset) {
16493                                         if ((entry = map->first_free) != vm_map_to_entry(map)) {
16494                                                 start = entry->vme_end;
16495                                         }
16496                                 } else {
16497                                         vm_map_entry_t  tmp_entry;
16498                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
16499                                                 start = tmp_entry->vme_end;
16500                                         }
16501                                         entry = tmp_entry;
16502                                 }
16503                         }
16504                         start = vm_map_round_page(start,
16505                             VM_MAP_PAGE_MASK(map));
16506                 }
16507
16508                 /*
16509                  *      In any case, the "entry" always precedes
16510                  *      the proposed new region throughout the
16511                  *      loop:
16512                  */
16513
16514                 while (TRUE) {
16515                         vm_map_entry_t  next;
16516
16517                         /*
16518                          *      Find the end of the proposed new region.
16519                          *      Be sure we didn't go beyond the end, or
16520                          *      wrap around the address.
16521                          */
16522
16523                         end = ((start + mask) & ~mask);
16524                         end = vm_map_round_page(end,
16525                             VM_MAP_PAGE_MASK(map));
16526                         if (end < start) {
16527                                 return KERN_NO_SPACE;
16528                         }
16529                         start = end;
16530                         end += size;
16531
16532                         /* We want an entire page of empty space, but don't increase the allocation size. */
16533                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16534
16535                         if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16536                                 if (map->wait_for_space) {
16537                                         if (size <= (map->max_offset -
16538                                             map->min_offset)) {
16539                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16540                                                 vm_map_unlock(map);
16541                                                 thread_block(THREAD_CONTINUE_NULL);
16542                                                 vm_map_lock(map);
16543                                                 goto StartAgain;
16544                                         }
16545                                 }
16546
16547                                 return KERN_NO_SPACE;
16548                         }
16549
16550                         next = entry->vme_next;
16551
16552                         if (map->holelistenabled) {
16553                                 if (entry->vme_end >= desired_empty_end) {
16554                                         break;
16555                                 }
16556                         } else {
16557                                 /*
16558                                  *      If there are no more entries, we must win.
16559                                  *
16560                                  *      OR
16561                                  *
16562                                  *      If there is another entry, it must be
16563                                  *      after the end of the potential new region.
16564                                  */
16565
16566                                 if (next == vm_map_to_entry(map)) {
16567                                         break;
16568                                 }
16569
16570                                 if (next->vme_start >= desired_empty_end) {
16571                                         break;
16572                                 }
16573                         }
16574
16575                         /*
16576                          *      Didn't fit -- move to the next entry.
16577                          */
16578
16579                         entry = next;
16580
16581                         if (map->holelistenabled) {
16582                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16583                                         /*
16584                                          * Wrapped around
16585                                          */
16586                                         return KERN_NO_SPACE;
16587                                 }
16588                                 start = entry->vme_start;
16589                         } else {
16590                                 start = entry->vme_end;
16591                         }
16592                 }
16593
16594                 if (map->holelistenabled) {
16595                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16596                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16597                         }
16598                 }
16599
16600                 *address = start;
16601         } else {
16602                 vm_map_entry_t          temp_entry;
16603
16604                 /*
16605                  *      Verify that:
16606                  *              the address doesn't itself violate
16607                  *              the mask requirement.
16608                  */
16609
16610                 if ((start & mask) != 0) {
16611                         return KERN_NO_SPACE;
16612                 }
16613
16614
16615                 /*
16616                  *      ...     the address is within bounds
16617                  */
16618
16619                 end = start + size;
16620
16621                 if ((start < map->min_offset) ||
16622                     (end > map->max_offset) ||
16623                     (start >= end)) {
16624                         return KERN_INVALID_ADDRESS;
16625                 }
16626
16627                 /*
16628                  * If we're asked to overwrite whatever was mapped in that
16629                  * range, first deallocate that range.
16630                  */
16631                 if (flags & VM_FLAGS_OVERWRITE) {
16632                         vm_map_t zap_map;
16633                         int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16634
16635                         /*
16636                          * We use a "zap_map" to avoid having to unlock
16637                          * the "map" in vm_map_delete(), which would compromise
16638                          * the atomicity of the "deallocate" and then "remap"
16639                          * combination.
16640                          */
16641                         zap_map = vm_map_create(PMAP_NULL,
16642                             start,
16643                             end,
16644                             map->hdr.entries_pageable);
16645                         if (zap_map == VM_MAP_NULL) {
16646                                 return KERN_RESOURCE_SHORTAGE;
16647                         }
16648                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16649                         vm_map_disable_hole_optimization(zap_map);
16650
16651                         if (vmk_flags.vmkf_overwrite_immutable) {
16652                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16653                         }
16654                         kr = vm_map_delete(map, start, end,
16655                             remove_flags,
16656                             zap_map);
16657                         if (kr == KERN_SUCCESS) {
16658                                 vm_map_destroy(zap_map,
16659                                     VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16660                                 zap_map = VM_MAP_NULL;
16661                         }
16662                 }
16663
16664                 /*
16665                  *      ...     the starting address isn't allocated
16666                  */
16667
16668                 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16669                         return KERN_NO_SPACE;
16670                 }
16671
16672                 entry = temp_entry;
16673
16674                 /*
16675                  *      ...     the next region doesn't overlap the
16676                  *              end point.
16677                  */
16678
16679                 if ((entry->vme_next != vm_map_to_entry(map)) &&
16680                     (entry->vme_next->vme_start < end)) {
16681                         return KERN_NO_SPACE;
16682                 }
16683         }
16684         *map_entry = entry;
16685         return KERN_SUCCESS;
16686 }
16687
16688 /*
16689  *      vm_map_switch:
16690  *
16691  *      Set the address map for the current thread to the specified map
16692  */
16693
16694 vm_map_t
16695 vm_map_switch(
16696         vm_map_t        map)
16697 {
16698         int             mycpu;
16699         thread_t        thread = current_thread();
16700         vm_map_t        oldmap = thread->map;
16701
16702         mp_disable_preemption();
16703         mycpu = cpu_number();
16704
16705         /*
16706          *      Deactivate the current map and activate the requested map
16707          */
16708         PMAP_SWITCH_USER(thread, map, mycpu);
16709
16710         mp_enable_preemption();
16711         return oldmap;
16712 }
16713
16714
16715 /*
16716  *      Routine:        vm_map_write_user
16717  *
16718  *      Description:
16719  *              Copy out data from a kernel space into space in the
16720  *              destination map. The space must already exist in the
16721  *              destination map.
16722  *              NOTE:  This routine should only be called by threads
16723  *              which can block on a page fault. i.e. kernel mode user
16724  *              threads.
16725  *
16726  */
16727 kern_return_t
16728 vm_map_write_user(
16729         vm_map_t                map,
16730         void                    *src_p,
16731         vm_map_address_t        dst_addr,
16732         vm_size_t               size)
16733 {
16734         kern_return_t   kr = KERN_SUCCESS;
16735
16736         if (current_map() == map) {
16737                 if (copyout(src_p, dst_addr, size)) {
16738                         kr = KERN_INVALID_ADDRESS;
16739                 }
16740         } else {
16741                 vm_map_t        oldmap;
16742
16743                 /* take on the identity of the target map while doing */
16744                 /* the transfer */
16745
16746                 vm_map_reference(map);
16747                 oldmap = vm_map_switch(map);
16748                 if (copyout(src_p, dst_addr, size)) {
16749                         kr = KERN_INVALID_ADDRESS;
16750                 }
16751                 vm_map_switch(oldmap);
16752                 vm_map_deallocate(map);
16753         }
16754         return kr;
16755 }
16756
16757 /*
16758  *      Routine:        vm_map_read_user
16759  *
16760  *      Description:
16761  *              Copy in data from a user space source map into the
16762  *              kernel map. The space must already exist in the
16763  *              kernel map.
16764  *              NOTE:  This routine should only be called by threads
16765  *              which can block on a page fault. i.e. kernel mode user
16766  *              threads.
16767  *
16768  */
16769 kern_return_t
16770 vm_map_read_user(
16771         vm_map_t                map,
16772         vm_map_address_t        src_addr,
16773         void                    *dst_p,
16774         vm_size_t               size)
16775 {
16776         kern_return_t   kr = KERN_SUCCESS;
16777
16778         if (current_map() == map) {
16779                 if (copyin(src_addr, dst_p, size)) {
16780                         kr = KERN_INVALID_ADDRESS;
16781                 }
16782         } else {
16783                 vm_map_t        oldmap;
16784
16785                 /* take on the identity of the target map while doing */
16786                 /* the transfer */
16787
16788                 vm_map_reference(map);
16789                 oldmap = vm_map_switch(map);
16790                 if (copyin(src_addr, dst_p, size)) {
16791                         kr = KERN_INVALID_ADDRESS;
16792                 }
16793                 vm_map_switch(oldmap);
16794                 vm_map_deallocate(map);
16795         }
16796         return kr;
16797 }
16798
16799
16800 /*
16801  *      vm_map_check_protection:
16802  *
16803  *      Assert that the target map allows the specified
16804  *      privilege on the entire address region given.
16805  *      The entire region must be allocated.
16806  */
16807 boolean_t
16808 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16809     vm_map_offset_t end, vm_prot_t protection)
16810 {
16811         vm_map_entry_t entry;
16812         vm_map_entry_t tmp_entry;
16813
16814         vm_map_lock(map);
16815
16816         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
16817                 vm_map_unlock(map);
16818                 return FALSE;
16819         }
16820
16821         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16822                 vm_map_unlock(map);
16823                 return FALSE;
16824         }
16825
16826         entry = tmp_entry;
16827
16828         while (start < end) {
16829                 if (entry == vm_map_to_entry(map)) {
16830                         vm_map_unlock(map);
16831                         return FALSE;
16832                 }
16833
16834                 /*
16835                  *      No holes allowed!
16836                  */
16837
16838                 if (start < entry->vme_start) {
16839                         vm_map_unlock(map);
16840                         return FALSE;
16841                 }
16842
16843                 /*
16844                  * Check protection associated with entry.
16845                  */
16846
16847                 if ((entry->protection & protection) != protection) {
16848                         vm_map_unlock(map);
16849                         return FALSE;
16850                 }
16851
16852                 /* go to next entry */
16853
16854                 start = entry->vme_end;
16855                 entry = entry->vme_next;
16856         }
16857         vm_map_unlock(map);
16858         return TRUE;
16859 }
16860
16861 kern_return_t
16862 vm_map_purgable_control(
16863         vm_map_t                map,
16864         vm_map_offset_t         address,
16865         vm_purgable_t           control,
16866         int                     *state)
16867 {
16868         vm_map_entry_t          entry;
16869         vm_object_t             object;
16870         kern_return_t           kr;
16871         boolean_t               was_nonvolatile;
16872
16873         /*
16874          * Vet all the input parameters and current type and state of the
16875          * underlaying object.  Return with an error if anything is amiss.
16876          */
16877         if (map == VM_MAP_NULL) {
16878                 return KERN_INVALID_ARGUMENT;
16879         }
16880
16881         if (control != VM_PURGABLE_SET_STATE &&
16882             control != VM_PURGABLE_GET_STATE &&
16883             control != VM_PURGABLE_PURGE_ALL &&
16884             control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16885                 return KERN_INVALID_ARGUMENT;
16886         }
16887
16888         if (control == VM_PURGABLE_PURGE_ALL) {
16889                 vm_purgeable_object_purge_all();
16890                 return KERN_SUCCESS;
16891         }
16892
16893         if ((control == VM_PURGABLE_SET_STATE ||
16894             control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16895             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16896             ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16897                 return KERN_INVALID_ARGUMENT;
16898         }
16899
16900         vm_map_lock_read(map);
16901
16902         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16903                 /*
16904                  * Must pass a valid non-submap address.
16905                  */
16906                 vm_map_unlock_read(map);
16907                 return KERN_INVALID_ADDRESS;
16908         }
16909
16910         if ((entry->protection & VM_PROT_WRITE) == 0) {
16911                 /*
16912                  * Can't apply purgable controls to something you can't write.
16913                  */
16914                 vm_map_unlock_read(map);
16915                 return KERN_PROTECTION_FAILURE;
16916         }
16917
16918         object = VME_OBJECT(entry);
16919         if (object == VM_OBJECT_NULL ||
16920             object->purgable == VM_PURGABLE_DENY) {
16921                 /*
16922                  * Object must already be present and be purgeable.
16923                  */
16924                 vm_map_unlock_read(map);
16925                 return KERN_INVALID_ARGUMENT;
16926         }
16927
16928         vm_object_lock(object);
16929
16930 #if 00
16931         if (VME_OFFSET(entry) != 0 ||
16932             entry->vme_end - entry->vme_start != object->vo_size) {
16933                 /*
16934                  * Can only apply purgable controls to the whole (existing)
16935                  * object at once.
16936                  */
16937                 vm_map_unlock_read(map);
16938                 vm_object_unlock(object);
16939                 return KERN_INVALID_ARGUMENT;
16940         }
16941 #endif
16942
16943         assert(!entry->is_sub_map);
16944         assert(!entry->use_pmap); /* purgeable has its own accounting */
16945
16946         vm_map_unlock_read(map);
16947
16948         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16949
16950         kr = vm_object_purgable_control(object, control, state);
16951
16952         if (was_nonvolatile &&
16953             object->purgable != VM_PURGABLE_NONVOLATILE &&
16954             map->pmap == kernel_pmap) {
16955 #if DEBUG
16956                 object->vo_purgeable_volatilizer = kernel_task;
16957 #endif /* DEBUG */
16958         }
16959
16960         vm_object_unlock(object);
16961
16962         return kr;
16963 }
16964
16965 kern_return_t
16966 vm_map_page_query_internal(
16967         vm_map_t        target_map,
16968         vm_map_offset_t offset,
16969         int             *disposition,
16970         int             *ref_count)
16971 {
16972         kern_return_t                   kr;
16973         vm_page_info_basic_data_t       info;
16974         mach_msg_type_number_t          count;
16975
16976         count = VM_PAGE_INFO_BASIC_COUNT;
16977         kr = vm_map_page_info(target_map,
16978             offset,
16979             VM_PAGE_INFO_BASIC,
16980             (vm_page_info_t) &info,
16981             &count);
16982         if (kr == KERN_SUCCESS) {
16983                 *disposition = info.disposition;
16984                 *ref_count = info.ref_count;
16985         } else {
16986                 *disposition = 0;
16987                 *ref_count = 0;
16988         }
16989
16990         return kr;
16991 }
16992
16993 kern_return_t
16994 vm_map_page_info(
16995         vm_map_t                map,
16996         vm_map_offset_t         offset,
16997         vm_page_info_flavor_t   flavor,
16998         vm_page_info_t          info,
16999         mach_msg_type_number_t  *count)
17000 {
17001         return vm_map_page_range_info_internal(map,
17002                    offset,                     /* start of range */
17003                    (offset + 1),                     /* this will get rounded in the call to the page boundary */
17004                    flavor,
17005                    info,
17006                    count);
17007 }
17008
17009 kern_return_t
17010 vm_map_page_range_info_internal(
17011         vm_map_t                map,
17012         vm_map_offset_t         start_offset,
17013         vm_map_offset_t         end_offset,
17014         vm_page_info_flavor_t   flavor,
17015         vm_page_info_t          info,
17016         mach_msg_type_number_t  *count)
17017 {
17018         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
17019         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
17020         vm_page_t               m = VM_PAGE_NULL;
17021         kern_return_t           retval = KERN_SUCCESS;
17022         int                     disposition = 0;
17023         int                     ref_count = 0;
17024         int                     depth = 0, info_idx = 0;
17025         vm_page_info_basic_t    basic_info = 0;
17026         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
17027         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
17028         boolean_t               do_region_footprint;
17029         ledger_amount_t         ledger_resident, ledger_compressed;
17030
17031         switch (flavor) {
17032         case VM_PAGE_INFO_BASIC:
17033                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
17034                         /*
17035                          * The "vm_page_info_basic_data" structure was not
17036                          * properly padded, so allow the size to be off by
17037                          * one to maintain backwards binary compatibility...
17038                          */
17039                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
17040                                 return KERN_INVALID_ARGUMENT;
17041                         }
17042                 }
17043                 break;
17044         default:
17045                 return KERN_INVALID_ARGUMENT;
17046         }
17047
17048         do_region_footprint = task_self_region_footprint();
17049         disposition = 0;
17050         ref_count = 0;
17051         depth = 0;
17052         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
17053         retval = KERN_SUCCESS;
17054
17055         offset_in_page = start_offset & PAGE_MASK;
17056         start = vm_map_trunc_page(start_offset, PAGE_MASK);
17057         end = vm_map_round_page(end_offset, PAGE_MASK);
17058
17059         if (end < start) {
17060                 return KERN_INVALID_ARGUMENT;
17061         }
17062
17063         assert((end - start) <= MAX_PAGE_RANGE_QUERY);
17064
17065         vm_map_lock_read(map);
17066
17067         task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17068
17069         for (curr_s_offset = start; curr_s_offset < end;) {
17070                 /*
17071                  * New lookup needs reset of these variables.
17072                  */
17073                 curr_object = object = VM_OBJECT_NULL;
17074                 offset_in_object = 0;
17075                 ref_count = 0;
17076                 depth = 0;
17077
17078                 if (do_region_footprint &&
17079                     curr_s_offset >= vm_map_last_entry(map)->vme_end) {
17080                         /*
17081                          * Request for "footprint" info about a page beyond
17082                          * the end of address space: this must be for
17083                          * the fake region vm_map_region_recurse_64()
17084                          * reported to account for non-volatile purgeable
17085                          * memory owned by this task.
17086                          */
17087                         disposition = 0;
17088
17089                         if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
17090                             (unsigned) ledger_compressed) {
17091                                 /*
17092                                  * We haven't reported all the "non-volatile
17093                                  * compressed" pages yet, so report this fake
17094                                  * page as "compressed".
17095                                  */
17096                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17097                         } else {
17098                                 /*
17099                                  * We've reported all the non-volatile
17100                                  * compressed page but not all the non-volatile
17101                                  * pages , so report this fake page as
17102                                  * "resident dirty".
17103                                  */
17104                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17105                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17106                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
17107                         }
17108                         switch (flavor) {
17109                         case VM_PAGE_INFO_BASIC:
17110                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17111                                 basic_info->disposition = disposition;
17112                                 basic_info->ref_count = 1;
17113                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17114                                 basic_info->offset = 0;
17115                                 basic_info->depth = 0;
17116
17117                                 info_idx++;
17118                                 break;
17119                         }
17120                         curr_s_offset += PAGE_SIZE;
17121                         continue;
17122                 }
17123
17124                 /*
17125                  * First, find the map entry covering "curr_s_offset", going down
17126                  * submaps if necessary.
17127                  */
17128                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17129                         /* no entry -> no object -> no page */
17130
17131                         if (curr_s_offset < vm_map_min(map)) {
17132                                 /*
17133                                  * Illegal address that falls below map min.
17134                                  */
17135                                 curr_e_offset = MIN(end, vm_map_min(map));
17136                         } else if (curr_s_offset >= vm_map_max(map)) {
17137                                 /*
17138                                  * Illegal address that falls on/after map max.
17139                                  */
17140                                 curr_e_offset = end;
17141                         } else if (map_entry == vm_map_to_entry(map)) {
17142                                 /*
17143                                  * Hit a hole.
17144                                  */
17145                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
17146                                         /*
17147                                          * Empty map.
17148                                          */
17149                                         curr_e_offset = MIN(map->max_offset, end);
17150                                 } else {
17151                                         /*
17152                                          * Hole at start of the map.
17153                                          */
17154                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17155                                 }
17156                         } else {
17157                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
17158                                         /*
17159                                          * Hole at the end of the map.
17160                                          */
17161                                         curr_e_offset = MIN(map->max_offset, end);
17162                                 } else {
17163                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17164                                 }
17165                         }
17166
17167                         assert(curr_e_offset >= curr_s_offset);
17168
17169                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17170
17171                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17172
17173                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17174
17175                         curr_s_offset = curr_e_offset;
17176
17177                         info_idx += num_pages;
17178
17179                         continue;
17180                 }
17181
17182                 /* compute offset from this map entry's start */
17183                 offset_in_object = curr_s_offset - map_entry->vme_start;
17184
17185                 /* compute offset into this map entry's object (or submap) */
17186                 offset_in_object += VME_OFFSET(map_entry);
17187
17188                 if (map_entry->is_sub_map) {
17189                         vm_map_t sub_map = VM_MAP_NULL;
17190                         vm_page_info_t submap_info = 0;
17191                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17192
17193                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17194
17195                         submap_s_offset = offset_in_object;
17196                         submap_e_offset = submap_s_offset + range_len;
17197
17198                         sub_map = VME_SUBMAP(map_entry);
17199
17200                         vm_map_reference(sub_map);
17201                         vm_map_unlock_read(map);
17202
17203                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17204
17205                         retval = vm_map_page_range_info_internal(sub_map,
17206                             submap_s_offset,
17207                             submap_e_offset,
17208                             VM_PAGE_INFO_BASIC,
17209                             (vm_page_info_t) submap_info,
17210                             count);
17211
17212                         assert(retval == KERN_SUCCESS);
17213
17214                         vm_map_lock_read(map);
17215                         vm_map_deallocate(sub_map);
17216
17217                         /* Move the "info" index by the number of pages we inspected.*/
17218                         info_idx += range_len >> PAGE_SHIFT;
17219
17220                         /* Move our current offset by the size of the range we inspected.*/
17221                         curr_s_offset += range_len;
17222
17223                         continue;
17224                 }
17225
17226                 object = VME_OBJECT(map_entry);
17227                 if (object == VM_OBJECT_NULL) {
17228                         /*
17229                          * We don't have an object here and, hence,
17230                          * no pages to inspect. We'll fill up the
17231                          * info structure appropriately.
17232                          */
17233
17234                         curr_e_offset = MIN(map_entry->vme_end, end);
17235
17236                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17237
17238                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17239
17240                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17241
17242                         curr_s_offset = curr_e_offset;
17243
17244                         info_idx += num_pages;
17245
17246                         continue;
17247                 }
17248
17249                 if (do_region_footprint) {
17250                         int pmap_disp;
17251
17252                         disposition = 0;
17253                         pmap_disp = 0;
17254                         if (map->has_corpse_footprint) {
17255                                 /*
17256                                  * Query the page info data we saved
17257                                  * while forking the corpse.
17258                                  */
17259                                 vm_map_corpse_footprint_query_page_info(
17260                                         map,
17261                                         curr_s_offset,
17262                                         &pmap_disp);
17263                         } else {
17264                                 /*
17265                                  * Query the pmap.
17266                                  */
17267                                 pmap_query_page_info(map->pmap,
17268                                     curr_s_offset,
17269                                     &pmap_disp);
17270                         }
17271                         if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17272                             /* && not tagged as no-footprint? */
17273                             VM_OBJECT_OWNER(object) != NULL &&
17274                             VM_OBJECT_OWNER(object)->map == map) {
17275                                 if ((((curr_s_offset
17276                                     - map_entry->vme_start
17277                                     + VME_OFFSET(map_entry))
17278                                     / PAGE_SIZE) <
17279                                     (object->resident_page_count +
17280                                     vm_compressor_pager_get_count(object->pager)))) {
17281                                         /*
17282                                          * Non-volatile purgeable object owned
17283                                          * by this task: report the first
17284                                          * "#resident + #compressed" pages as
17285                                          * "resident" (to show that they
17286                                          * contribute to the footprint) but not
17287                                          * "dirty" (to avoid double-counting
17288                                          * with the fake "non-volatile" region
17289                                          * we'll report at the end of the
17290                                          * address space to account for all
17291                                          * (mapped or not) non-volatile memory
17292                                          * owned by this task.
17293                                          */
17294                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17295                                 }
17296                         } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
17297                             object->purgable == VM_PURGABLE_EMPTY) &&
17298                             /* && not tagged as no-footprint? */
17299                             VM_OBJECT_OWNER(object) != NULL &&
17300                             VM_OBJECT_OWNER(object)->map == map) {
17301                                 if ((((curr_s_offset
17302                                     - map_entry->vme_start
17303                                     + VME_OFFSET(map_entry))
17304                                     / PAGE_SIZE) <
17305                                     object->wired_page_count)) {
17306                                         /*
17307                                          * Volatile|empty purgeable object owned
17308                                          * by this task: report the first
17309                                          * "#wired" pages as "resident" (to
17310                                          * show that they contribute to the
17311                                          * footprint) but not "dirty" (to avoid
17312                                          * double-counting with the fake
17313                                          * "non-volatile" region we'll report
17314                                          * at the end of the address space to
17315                                          * account for all (mapped or not)
17316                                          * non-volatile memory owned by this
17317                                          * task.
17318                                          */
17319                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17320                                 }
17321                         } else if (map_entry->iokit_acct &&
17322                             object->internal &&
17323                             object->purgable == VM_PURGABLE_DENY) {
17324                                 /*
17325                                  * Non-purgeable IOKit memory: phys_footprint
17326                                  * includes the entire virtual mapping.
17327                                  */
17328                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17329                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17330                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17331                         } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17332                             PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17333                                 /* alternate accounting */
17334 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17335                                 if (map->pmap->footprint_was_suspended ||
17336                                     /*
17337                                      * XXX corpse does not know if original
17338                                      * pmap had its footprint suspended...
17339                                      */
17340                                     map->has_corpse_footprint) {
17341                                         /*
17342                                          * The assertion below can fail if dyld
17343                                          * suspended footprint accounting
17344                                          * while doing some adjustments to
17345                                          * this page;  the mapping would say
17346                                          * "use pmap accounting" but the page
17347                                          * would be marked "alternate
17348                                          * accounting".
17349                                          */
17350                                 } else
17351 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17352                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17353                                 pmap_disp = 0;
17354                         } else {
17355                                 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17356                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17357                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17358                                         disposition |= VM_PAGE_QUERY_PAGE_REF;
17359                                         if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17360                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17361                                         } else {
17362                                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17363                                         }
17364                                         if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17365                                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17366                                         }
17367                                 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17368                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17369                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17370                                 }
17371                         }
17372                         switch (flavor) {
17373                         case VM_PAGE_INFO_BASIC:
17374                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17375                                 basic_info->disposition = disposition;
17376                                 basic_info->ref_count = 1;
17377                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17378                                 basic_info->offset = 0;
17379                                 basic_info->depth = 0;
17380
17381                                 info_idx++;
17382                                 break;
17383                         }
17384                         curr_s_offset += PAGE_SIZE;
17385                         continue;
17386                 }
17387
17388                 vm_object_reference(object);
17389                 /*
17390                  * Shared mode -- so we can allow other readers
17391                  * to grab the lock too.
17392                  */
17393                 vm_object_lock_shared(object);
17394
17395                 curr_e_offset = MIN(map_entry->vme_end, end);
17396
17397                 vm_map_unlock_read(map);
17398
17399                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17400
17401                 curr_object = object;
17402
17403                 for (; curr_s_offset < curr_e_offset;) {
17404                         if (object == curr_object) {
17405                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17406                         } else {
17407                                 ref_count = curr_object->ref_count;
17408                         }
17409
17410                         curr_offset_in_object = offset_in_object;
17411
17412                         for (;;) {
17413                                 m = vm_page_lookup(curr_object, curr_offset_in_object);
17414
17415                                 if (m != VM_PAGE_NULL) {
17416                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17417                                         break;
17418                                 } else {
17419                                         if (curr_object->internal &&
17420                                             curr_object->alive &&
17421                                             !curr_object->terminating &&
17422                                             curr_object->pager_ready) {
17423                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17424                                                     == VM_EXTERNAL_STATE_EXISTS) {
17425                                                         /* the pager has that page */
17426                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17427                                                         break;
17428                                                 }
17429                                         }
17430
17431                                         /*
17432                                          * Go down the VM object shadow chain until we find the page
17433                                          * we're looking for.
17434                                          */
17435
17436                                         if (curr_object->shadow != VM_OBJECT_NULL) {
17437                                                 vm_object_t shadow = VM_OBJECT_NULL;
17438
17439                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
17440                                                 shadow = curr_object->shadow;
17441
17442                                                 vm_object_lock_shared(shadow);
17443                                                 vm_object_unlock(curr_object);
17444
17445                                                 curr_object = shadow;
17446                                                 depth++;
17447                                                 continue;
17448                                         } else {
17449                                                 break;
17450                                         }
17451                                 }
17452                         }
17453
17454                         /* The ref_count is not strictly accurate, it measures the number   */
17455                         /* of entities holding a ref on the object, they may not be mapping */
17456                         /* the object or may not be mapping the section holding the         */
17457                         /* target page but its still a ball park number and though an over- */
17458                         /* count, it picks up the copy-on-write cases                       */
17459
17460                         /* We could also get a picture of page sharing from pmap_attributes */
17461                         /* but this would under count as only faulted-in mappings would     */
17462                         /* show up.                                                         */
17463
17464                         if ((curr_object == object) && curr_object->shadow) {
17465                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17466                         }
17467
17468                         if (!curr_object->internal) {
17469                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17470                         }
17471
17472                         if (m != VM_PAGE_NULL) {
17473                                 if (m->vmp_fictitious) {
17474                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17475                                 } else {
17476                                         if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
17477                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17478                                         }
17479
17480                                         if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
17481                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
17482                                         }
17483
17484                                         if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
17485                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17486                                         }
17487
17488                                         if (m->vmp_cs_validated) {
17489                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17490                                         }
17491                                         if (m->vmp_cs_tainted) {
17492                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17493                                         }
17494                                         if (m->vmp_cs_nx) {
17495                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17496                                         }
17497                                         if (m->vmp_reusable || curr_object->all_reusable) {
17498                                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17499                                         }
17500                                 }
17501                         }
17502
17503                         switch (flavor) {
17504                         case VM_PAGE_INFO_BASIC:
17505                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17506                                 basic_info->disposition = disposition;
17507                                 basic_info->ref_count = ref_count;
17508                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17509                                     VM_KERNEL_ADDRPERM(curr_object);
17510                                 basic_info->offset =
17511                                     (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17512                                 basic_info->depth = depth;
17513
17514                                 info_idx++;
17515                                 break;
17516                         }
17517
17518                         disposition = 0;
17519                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17520
17521                         /*
17522                          * Move to next offset in the range and in our object.
17523                          */
17524                         curr_s_offset += PAGE_SIZE;
17525                         offset_in_object += PAGE_SIZE;
17526                         curr_offset_in_object = offset_in_object;
17527
17528                         if (curr_object != object) {
17529                                 vm_object_unlock(curr_object);
17530
17531                                 curr_object = object;
17532
17533                                 vm_object_lock_shared(curr_object);
17534                         } else {
17535                                 vm_object_lock_yield_shared(curr_object);
17536                         }
17537                 }
17538
17539                 vm_object_unlock(curr_object);
17540                 vm_object_deallocate(curr_object);
17541
17542                 vm_map_lock_read(map);
17543         }
17544
17545         vm_map_unlock_read(map);
17546         return retval;
17547 }
17548
17549 /*
17550  *      vm_map_msync
17551  *
17552  *      Synchronises the memory range specified with its backing store
17553  *      image by either flushing or cleaning the contents to the appropriate
17554  *      memory manager engaging in a memory object synchronize dialog with
17555  *      the manager.  The client doesn't return until the manager issues
17556  *      m_o_s_completed message.  MIG Magically converts user task parameter
17557  *      to the task's address map.
17558  *
17559  *      interpretation of sync_flags
17560  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
17561  *                                pages to manager.
17562  *
17563  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17564  *                              - discard pages, write dirty or precious
17565  *                                pages back to memory manager.
17566  *
17567  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17568  *                              - write dirty or precious pages back to
17569  *                                the memory manager.
17570  *
17571  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
17572  *                                is a hole in the region, and we would
17573  *                                have returned KERN_SUCCESS, return
17574  *                                KERN_INVALID_ADDRESS instead.
17575  *
17576  *      NOTE
17577  *      The memory object attributes have not yet been implemented, this
17578  *      function will have to deal with the invalidate attribute
17579  *
17580  *      RETURNS
17581  *      KERN_INVALID_TASK               Bad task parameter
17582  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
17583  *      KERN_SUCCESS                    The usual.
17584  *      KERN_INVALID_ADDRESS            There was a hole in the region.
17585  */
17586
17587 kern_return_t
17588 vm_map_msync(
17589         vm_map_t                map,
17590         vm_map_address_t        address,
17591         vm_map_size_t           size,
17592         vm_sync_t               sync_flags)
17593 {
17594         vm_map_entry_t          entry;
17595         vm_map_size_t           amount_left;
17596         vm_object_offset_t      offset;
17597         boolean_t               do_sync_req;
17598         boolean_t               had_hole = FALSE;
17599         vm_map_offset_t         pmap_offset;
17600
17601         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17602             (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17603                 return KERN_INVALID_ARGUMENT;
17604         }
17605
17606         /*
17607          * align address and size on page boundaries
17608          */
17609         size = (vm_map_round_page(address + size,
17610             VM_MAP_PAGE_MASK(map)) -
17611             vm_map_trunc_page(address,
17612             VM_MAP_PAGE_MASK(map)));
17613         address = vm_map_trunc_page(address,
17614             VM_MAP_PAGE_MASK(map));
17615
17616         if (map == VM_MAP_NULL) {
17617                 return KERN_INVALID_TASK;
17618         }
17619
17620         if (size == 0) {
17621                 return KERN_SUCCESS;
17622         }
17623
17624         amount_left = size;
17625
17626         while (amount_left > 0) {
17627                 vm_object_size_t        flush_size;
17628                 vm_object_t             object;
17629
17630                 vm_map_lock(map);
17631                 if (!vm_map_lookup_entry(map,
17632                     address,
17633                     &entry)) {
17634                         vm_map_size_t   skip;
17635
17636                         /*
17637                          * hole in the address map.
17638                          */
17639                         had_hole = TRUE;
17640
17641                         if (sync_flags & VM_SYNC_KILLPAGES) {
17642                                 /*
17643                                  * For VM_SYNC_KILLPAGES, there should be
17644                                  * no holes in the range, since we couldn't
17645                                  * prevent someone else from allocating in
17646                                  * that hole and we wouldn't want to "kill"
17647                                  * their pages.
17648                                  */
17649                                 vm_map_unlock(map);
17650                                 break;
17651                         }
17652
17653                         /*
17654                          * Check for empty map.
17655                          */
17656                         if (entry == vm_map_to_entry(map) &&
17657                             entry->vme_next == entry) {
17658                                 vm_map_unlock(map);
17659                                 break;
17660                         }
17661                         /*
17662                          * Check that we don't wrap and that
17663                          * we have at least one real map entry.
17664                          */
17665                         if ((map->hdr.nentries == 0) ||
17666                             (entry->vme_next->vme_start < address)) {
17667                                 vm_map_unlock(map);
17668                                 break;
17669                         }
17670                         /*
17671                          * Move up to the next entry if needed
17672                          */
17673                         skip = (entry->vme_next->vme_start - address);
17674                         if (skip >= amount_left) {
17675                                 amount_left = 0;
17676                         } else {
17677                                 amount_left -= skip;
17678                         }
17679                         address = entry->vme_next->vme_start;
17680                         vm_map_unlock(map);
17681                         continue;
17682                 }
17683
17684                 offset = address - entry->vme_start;
17685                 pmap_offset = address;
17686
17687                 /*
17688                  * do we have more to flush than is contained in this
17689                  * entry ?
17690                  */
17691                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17692                         flush_size = entry->vme_end -
17693                             (entry->vme_start + offset);
17694                 } else {
17695                         flush_size = amount_left;
17696                 }
17697                 amount_left -= flush_size;
17698                 address += flush_size;
17699
17700                 if (entry->is_sub_map == TRUE) {
17701                         vm_map_t        local_map;
17702                         vm_map_offset_t local_offset;
17703
17704                         local_map = VME_SUBMAP(entry);
17705                         local_offset = VME_OFFSET(entry);
17706                         vm_map_reference(local_map);
17707                         vm_map_unlock(map);
17708                         if (vm_map_msync(
17709                                     local_map,
17710                                     local_offset,
17711                                     flush_size,
17712                                     sync_flags) == KERN_INVALID_ADDRESS) {
17713                                 had_hole = TRUE;
17714                         }
17715                         vm_map_deallocate(local_map);
17716                         continue;
17717                 }
17718                 object = VME_OBJECT(entry);
17719
17720                 /*
17721                  * We can't sync this object if the object has not been
17722                  * created yet
17723                  */
17724                 if (object == VM_OBJECT_NULL) {
17725                         vm_map_unlock(map);
17726                         continue;
17727                 }
17728                 offset += VME_OFFSET(entry);
17729
17730                 vm_object_lock(object);
17731
17732                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17733                         int kill_pages = 0;
17734                         boolean_t reusable_pages = FALSE;
17735
17736                         if (sync_flags & VM_SYNC_KILLPAGES) {
17737                                 if (((object->ref_count == 1) ||
17738                                     ((object->copy_strategy !=
17739                                     MEMORY_OBJECT_COPY_SYMMETRIC) &&
17740                                     (object->copy == VM_OBJECT_NULL))) &&
17741                                     (object->shadow == VM_OBJECT_NULL)) {
17742                                         if (object->ref_count != 1) {
17743                                                 vm_page_stats_reusable.free_shared++;
17744                                         }
17745                                         kill_pages = 1;
17746                                 } else {
17747                                         kill_pages = -1;
17748                                 }
17749                         }
17750                         if (kill_pages != -1) {
17751                                 vm_object_deactivate_pages(
17752                                         object,
17753                                         offset,
17754                                         (vm_object_size_t) flush_size,
17755                                         kill_pages,
17756                                         reusable_pages,
17757                                         map->pmap,
17758                                         pmap_offset);
17759                         }
17760                         vm_object_unlock(object);
17761                         vm_map_unlock(map);
17762                         continue;
17763                 }
17764                 /*
17765                  * We can't sync this object if there isn't a pager.
17766                  * Don't bother to sync internal objects, since there can't
17767                  * be any "permanent" storage for these objects anyway.
17768                  */
17769                 if ((object->pager == MEMORY_OBJECT_NULL) ||
17770                     (object->internal) || (object->private)) {
17771                         vm_object_unlock(object);
17772                         vm_map_unlock(map);
17773                         continue;
17774                 }
17775                 /*
17776                  * keep reference on the object until syncing is done
17777                  */
17778                 vm_object_reference_locked(object);
17779                 vm_object_unlock(object);
17780
17781                 vm_map_unlock(map);
17782
17783                 do_sync_req = vm_object_sync(object,
17784                     offset,
17785                     flush_size,
17786                     sync_flags & VM_SYNC_INVALIDATE,
17787                     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17788                     (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17789                     sync_flags & VM_SYNC_SYNCHRONOUS);
17790
17791                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17792                         /*
17793                          * clear out the clustering and read-ahead hints
17794                          */
17795                         vm_object_lock(object);
17796
17797                         object->pages_created = 0;
17798                         object->pages_used = 0;
17799                         object->sequential = 0;
17800                         object->last_alloc = 0;
17801
17802                         vm_object_unlock(object);
17803                 }
17804                 vm_object_deallocate(object);
17805         } /* while */
17806
17807         /* for proper msync() behaviour */
17808         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17809                 return KERN_INVALID_ADDRESS;
17810         }
17811
17812         return KERN_SUCCESS;
17813 }/* vm_msync */
17814
17815 /*
17816  *      Routine:        convert_port_entry_to_map
17817  *      Purpose:
17818  *              Convert from a port specifying an entry or a task
17819  *              to a map. Doesn't consume the port ref; produces a map ref,
17820  *              which may be null.  Unlike convert_port_to_map, the
17821  *              port may be task or a named entry backed.
17822  *      Conditions:
17823  *              Nothing locked.
17824  */
17825
17826
17827 vm_map_t
17828 convert_port_entry_to_map(
17829         ipc_port_t      port)
17830 {
17831         vm_map_t map;
17832         vm_named_entry_t        named_entry;
17833         uint32_t        try_failed_count = 0;
17834
17835         if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17836                 while (TRUE) {
17837                         ip_lock(port);
17838                         if (ip_active(port) && (ip_kotype(port)
17839                             == IKOT_NAMED_ENTRY)) {
17840                                 named_entry =
17841                                     (vm_named_entry_t) ip_get_kobject(port);
17842                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17843                                         ip_unlock(port);
17844
17845                                         try_failed_count++;
17846                                         mutex_pause(try_failed_count);
17847                                         continue;
17848                                 }
17849                                 named_entry->ref_count++;
17850                                 lck_mtx_unlock(&(named_entry)->Lock);
17851                                 ip_unlock(port);
17852                                 if ((named_entry->is_sub_map) &&
17853                                     (named_entry->protection
17854                                     & VM_PROT_WRITE)) {
17855                                         map = named_entry->backing.map;
17856                                 } else {
17857                                         mach_destroy_memory_entry(port);
17858                                         return VM_MAP_NULL;
17859                                 }
17860                                 vm_map_reference_swap(map);
17861                                 mach_destroy_memory_entry(port);
17862                                 break;
17863                         } else {
17864                                 return VM_MAP_NULL;
17865                         }
17866                 }
17867         } else {
17868                 map = convert_port_to_map(port);
17869         }
17870
17871         return map;
17872 }
17873
17874 /*
17875  *      Routine:        convert_port_entry_to_object
17876  *      Purpose:
17877  *              Convert from a port specifying a named entry to an
17878  *              object. Doesn't consume the port ref; produces a map ref,
17879  *              which may be null.
17880  *      Conditions:
17881  *              Nothing locked.
17882  */
17883
17884
17885 vm_object_t
17886 convert_port_entry_to_object(
17887         ipc_port_t      port)
17888 {
17889         vm_object_t             object = VM_OBJECT_NULL;
17890         vm_named_entry_t        named_entry;
17891         uint32_t                try_failed_count = 0;
17892
17893         if (IP_VALID(port) &&
17894             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17895 try_again:
17896                 ip_lock(port);
17897                 if (ip_active(port) &&
17898                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17899                         named_entry = (vm_named_entry_t) ip_get_kobject(port);
17900                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17901                                 ip_unlock(port);
17902                                 try_failed_count++;
17903                                 mutex_pause(try_failed_count);
17904                                 goto try_again;
17905                         }
17906                         named_entry->ref_count++;
17907                         lck_mtx_unlock(&(named_entry)->Lock);
17908                         ip_unlock(port);
17909                         if (!(named_entry->is_sub_map) &&
17910                             !(named_entry->is_copy) &&
17911                             (named_entry->protection & VM_PROT_WRITE)) {
17912                                 object = named_entry->backing.object;
17913                                 vm_object_reference(object);
17914                         }
17915                         mach_destroy_memory_entry(port);
17916                 }
17917         }
17918
17919         return object;
17920 }
17921
17922 /*
17923  * Export routines to other components for the things we access locally through
17924  * macros.
17925  */
17926 #undef current_map
17927 vm_map_t
17928 current_map(void)
17929 {
17930         return current_map_fast();
17931 }
17932
17933 /*
17934  *      vm_map_reference:
17935  *
17936  *      Most code internal to the osfmk will go through a
17937  *      macro defining this.  This is always here for the
17938  *      use of other kernel components.
17939  */
17940 #undef vm_map_reference
17941 void
17942 vm_map_reference(
17943         vm_map_t        map)
17944 {
17945         if (map == VM_MAP_NULL) {
17946                 return;
17947         }
17948
17949         lck_mtx_lock(&map->s_lock);
17950 #if     TASK_SWAPPER
17951         assert(map->res_count > 0);
17952         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
17953         map->res_count++;
17954 #endif
17955         os_ref_retain_locked(&map->map_refcnt);
17956         lck_mtx_unlock(&map->s_lock);
17957 }
17958
17959 /*
17960  *      vm_map_deallocate:
17961  *
17962  *      Removes a reference from the specified map,
17963  *      destroying it if no references remain.
17964  *      The map should not be locked.
17965  */
17966 void
17967 vm_map_deallocate(
17968         vm_map_t        map)
17969 {
17970         unsigned int            ref;
17971
17972         if (map == VM_MAP_NULL) {
17973                 return;
17974         }
17975
17976         lck_mtx_lock(&map->s_lock);
17977         ref = os_ref_release_locked(&map->map_refcnt);
17978         if (ref > 0) {
17979                 vm_map_res_deallocate(map);
17980                 lck_mtx_unlock(&map->s_lock);
17981                 return;
17982         }
17983         assert(os_ref_get_count(&map->map_refcnt) == 0);
17984         lck_mtx_unlock(&map->s_lock);
17985
17986 #if     TASK_SWAPPER
17987         /*
17988          * The map residence count isn't decremented here because
17989          * the vm_map_delete below will traverse the entire map,
17990          * deleting entries, and the residence counts on objects
17991          * and sharing maps will go away then.
17992          */
17993 #endif
17994
17995         vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17996 }
17997
17998
17999 void
18000 vm_map_disable_NX(vm_map_t map)
18001 {
18002         if (map == NULL) {
18003                 return;
18004         }
18005         if (map->pmap == NULL) {
18006                 return;
18007         }
18008
18009         pmap_disable_NX(map->pmap);
18010 }
18011
18012 void
18013 vm_map_disallow_data_exec(vm_map_t map)
18014 {
18015         if (map == NULL) {
18016                 return;
18017         }
18018
18019         map->map_disallow_data_exec = TRUE;
18020 }
18021
18022 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
18023  * more descriptive.
18024  */
18025 void
18026 vm_map_set_32bit(vm_map_t map)
18027 {
18028 #if defined(__arm__) || defined(__arm64__)
18029         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
18030 #else
18031         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
18032 #endif
18033 }
18034
18035
18036 void
18037 vm_map_set_64bit(vm_map_t map)
18038 {
18039 #if defined(__arm__) || defined(__arm64__)
18040         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18041 #else
18042         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
18043 #endif
18044 }
18045
18046 /*
18047  * Expand the maximum size of an existing map to the maximum supported.
18048  */
18049 void
18050 vm_map_set_jumbo(vm_map_t map)
18051 {
18052 #if defined (__arm64__)
18053         vm_map_set_max_addr(map, ~0);
18054 #else /* arm64 */
18055         (void) map;
18056 #endif
18057 }
18058
18059 /*
18060  * This map has a JIT entitlement
18061  */
18062 void
18063 vm_map_set_jit_entitled(vm_map_t map)
18064 {
18065 #if defined (__arm64__)
18066         pmap_set_jit_entitled(map->pmap);
18067 #else /* arm64 */
18068         (void) map;
18069 #endif
18070 }
18071
18072 /*
18073  * Expand the maximum size of an existing map.
18074  */
18075 void
18076 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18077 {
18078 #if defined(__arm64__)
18079         vm_map_offset_t max_supported_offset = 0;
18080         vm_map_offset_t old_max_offset = map->max_offset;
18081         max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18082
18083         new_max_offset = trunc_page(new_max_offset);
18084
18085         /* The address space cannot be shrunk using this routine. */
18086         if (old_max_offset >= new_max_offset) {
18087                 return;
18088         }
18089
18090         if (max_supported_offset < new_max_offset) {
18091                 new_max_offset = max_supported_offset;
18092         }
18093
18094         map->max_offset = new_max_offset;
18095
18096         if (map->holes_list->prev->vme_end == old_max_offset) {
18097                 /*
18098                  * There is already a hole at the end of the map; simply make it bigger.
18099                  */
18100                 map->holes_list->prev->vme_end = map->max_offset;
18101         } else {
18102                 /*
18103                  * There is no hole at the end, so we need to create a new hole
18104                  * for the new empty space we're creating.
18105                  */
18106                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18107                 new_hole->start = old_max_offset;
18108                 new_hole->end = map->max_offset;
18109                 new_hole->prev = map->holes_list->prev;
18110                 new_hole->next = (struct vm_map_entry *)map->holes_list;
18111                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18112                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18113         }
18114 #else
18115         (void)map;
18116         (void)new_max_offset;
18117 #endif
18118 }
18119
18120 vm_map_offset_t
18121 vm_compute_max_offset(boolean_t is64)
18122 {
18123 #if defined(__arm__) || defined(__arm64__)
18124         return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
18125 #else
18126         return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
18127 #endif
18128 }
18129
18130 void
18131 vm_map_get_max_aslr_slide_section(
18132         vm_map_t                map __unused,
18133         int64_t                 *max_sections,
18134         int64_t                 *section_size)
18135 {
18136 #if defined(__arm64__)
18137         *max_sections = 3;
18138         *section_size = ARM_TT_TWIG_SIZE;
18139 #else
18140         *max_sections = 1;
18141         *section_size = 0;
18142 #endif
18143 }
18144
18145 uint64_t
18146 vm_map_get_max_aslr_slide_pages(vm_map_t map)
18147 {
18148 #if defined(__arm64__)
18149         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18150          * limited embedded address space; this is also meant to minimize pmap
18151          * memory usage on 16KB page systems.
18152          */
18153         return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
18154 #else
18155         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18156 #endif
18157 }
18158
18159 uint64_t
18160 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18161 {
18162 #if defined(__arm64__)
18163         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18164          * of independent entropy on 16KB page systems.
18165          */
18166         return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
18167 #else
18168         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18169 #endif
18170 }
18171
18172 #ifndef __arm__
18173 boolean_t
18174 vm_map_is_64bit(
18175         vm_map_t map)
18176 {
18177         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18178 }
18179 #endif
18180
18181 boolean_t
18182 vm_map_has_hard_pagezero(
18183         vm_map_t        map,
18184         vm_map_offset_t pagezero_size)
18185 {
18186         /*
18187          * XXX FBDP
18188          * We should lock the VM map (for read) here but we can get away
18189          * with it for now because there can't really be any race condition:
18190          * the VM map's min_offset is changed only when the VM map is created
18191          * and when the zero page is established (when the binary gets loaded),
18192          * and this routine gets called only when the task terminates and the
18193          * VM map is being torn down, and when a new map is created via
18194          * load_machfile()/execve().
18195          */
18196         return map->min_offset >= pagezero_size;
18197 }
18198
18199 /*
18200  * Raise a VM map's maximun offset.
18201  */
18202 kern_return_t
18203 vm_map_raise_max_offset(
18204         vm_map_t        map,
18205         vm_map_offset_t new_max_offset)
18206 {
18207         kern_return_t   ret;
18208
18209         vm_map_lock(map);
18210         ret = KERN_INVALID_ADDRESS;
18211
18212         if (new_max_offset >= map->max_offset) {
18213                 if (!vm_map_is_64bit(map)) {
18214                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18215                                 map->max_offset = new_max_offset;
18216                                 ret = KERN_SUCCESS;
18217                         }
18218                 } else {
18219                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18220                                 map->max_offset = new_max_offset;
18221                                 ret = KERN_SUCCESS;
18222                         }
18223                 }
18224         }
18225
18226         vm_map_unlock(map);
18227         return ret;
18228 }
18229
18230
18231 /*
18232  * Raise a VM map's minimum offset.
18233  * To strictly enforce "page zero" reservation.
18234  */
18235 kern_return_t
18236 vm_map_raise_min_offset(
18237         vm_map_t        map,
18238         vm_map_offset_t new_min_offset)
18239 {
18240         vm_map_entry_t  first_entry;
18241
18242         new_min_offset = vm_map_round_page(new_min_offset,
18243             VM_MAP_PAGE_MASK(map));
18244
18245         vm_map_lock(map);
18246
18247         if (new_min_offset < map->min_offset) {
18248                 /*
18249                  * Can't move min_offset backwards, as that would expose
18250                  * a part of the address space that was previously, and for
18251                  * possibly good reasons, inaccessible.
18252                  */
18253                 vm_map_unlock(map);
18254                 return KERN_INVALID_ADDRESS;
18255         }
18256         if (new_min_offset >= map->max_offset) {
18257                 /* can't go beyond the end of the address space */
18258                 vm_map_unlock(map);
18259                 return KERN_INVALID_ADDRESS;
18260         }
18261
18262         first_entry = vm_map_first_entry(map);
18263         if (first_entry != vm_map_to_entry(map) &&
18264             first_entry->vme_start < new_min_offset) {
18265                 /*
18266                  * Some memory was already allocated below the new
18267                  * minimun offset.  It's too late to change it now...
18268                  */
18269                 vm_map_unlock(map);
18270                 return KERN_NO_SPACE;
18271         }
18272
18273         map->min_offset = new_min_offset;
18274
18275         assert(map->holes_list);
18276         map->holes_list->start = new_min_offset;
18277         assert(new_min_offset < map->holes_list->end);
18278
18279         vm_map_unlock(map);
18280
18281         return KERN_SUCCESS;
18282 }
18283
18284 /*
18285  * Set the limit on the maximum amount of user wired memory allowed for this map.
18286  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18287  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
18288  * don't have to reach over to the BSD data structures.
18289  */
18290
18291 void
18292 vm_map_set_user_wire_limit(vm_map_t     map,
18293     vm_size_t    limit)
18294 {
18295         map->user_wire_limit = limit;
18296 }
18297
18298
18299 void
18300 vm_map_switch_protect(vm_map_t     map,
18301     boolean_t    val)
18302 {
18303         vm_map_lock(map);
18304         map->switch_protect = val;
18305         vm_map_unlock(map);
18306 }
18307
18308 /*
18309  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18310  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18311  * bump both counters.
18312  */
18313 void
18314 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18315 {
18316         pmap_t pmap = vm_map_pmap(map);
18317
18318         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18319         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18320 }
18321
18322 void
18323 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18324 {
18325         pmap_t pmap = vm_map_pmap(map);
18326
18327         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18328         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18329 }
18330
18331 /* Add (generate) code signature for memory range */
18332 #if CONFIG_DYNAMIC_CODE_SIGNING
18333 kern_return_t
18334 vm_map_sign(vm_map_t map,
18335     vm_map_offset_t start,
18336     vm_map_offset_t end)
18337 {
18338         vm_map_entry_t entry;
18339         vm_page_t m;
18340         vm_object_t object;
18341
18342         /*
18343          * Vet all the input parameters and current type and state of the
18344          * underlaying object.  Return with an error if anything is amiss.
18345          */
18346         if (map == VM_MAP_NULL) {
18347                 return KERN_INVALID_ARGUMENT;
18348         }
18349
18350         vm_map_lock_read(map);
18351
18352         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18353                 /*
18354                  * Must pass a valid non-submap address.
18355                  */
18356                 vm_map_unlock_read(map);
18357                 return KERN_INVALID_ADDRESS;
18358         }
18359
18360         if ((entry->vme_start > start) || (entry->vme_end < end)) {
18361                 /*
18362                  * Map entry doesn't cover the requested range. Not handling
18363                  * this situation currently.
18364                  */
18365                 vm_map_unlock_read(map);
18366                 return KERN_INVALID_ARGUMENT;
18367         }
18368
18369         object = VME_OBJECT(entry);
18370         if (object == VM_OBJECT_NULL) {
18371                 /*
18372                  * Object must already be present or we can't sign.
18373                  */
18374                 vm_map_unlock_read(map);
18375                 return KERN_INVALID_ARGUMENT;
18376         }
18377
18378         vm_object_lock(object);
18379         vm_map_unlock_read(map);
18380
18381         while (start < end) {
18382                 uint32_t refmod;
18383
18384                 m = vm_page_lookup(object,
18385                     start - entry->vme_start + VME_OFFSET(entry));
18386                 if (m == VM_PAGE_NULL) {
18387                         /* shoud we try to fault a page here? we can probably
18388                          * demand it exists and is locked for this request */
18389                         vm_object_unlock(object);
18390                         return KERN_FAILURE;
18391                 }
18392                 /* deal with special page status */
18393                 if (m->vmp_busy ||
18394                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18395                         vm_object_unlock(object);
18396                         return KERN_FAILURE;
18397                 }
18398
18399                 /* Page is OK... now "validate" it */
18400                 /* This is the place where we'll call out to create a code
18401                  * directory, later */
18402                 m->vmp_cs_validated = TRUE;
18403
18404                 /* The page is now "clean" for codesigning purposes. That means
18405                  * we don't consider it as modified (wpmapped) anymore. But
18406                  * we'll disconnect the page so we note any future modification
18407                  * attempts. */
18408                 m->vmp_wpmapped = FALSE;
18409                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18410
18411                 /* Pull the dirty status from the pmap, since we cleared the
18412                  * wpmapped bit */
18413                 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18414                         SET_PAGE_DIRTY(m, FALSE);
18415                 }
18416
18417                 /* On to the next page */
18418                 start += PAGE_SIZE;
18419         }
18420         vm_object_unlock(object);
18421
18422         return KERN_SUCCESS;
18423 }
18424 #endif
18425
18426 kern_return_t
18427 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18428 {
18429         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
18430         vm_map_entry_t next_entry;
18431         kern_return_t   kr = KERN_SUCCESS;
18432         vm_map_t        zap_map;
18433
18434         vm_map_lock(map);
18435
18436         /*
18437          * We use a "zap_map" to avoid having to unlock
18438          * the "map" in vm_map_delete().
18439          */
18440         zap_map = vm_map_create(PMAP_NULL,
18441             map->min_offset,
18442             map->max_offset,
18443             map->hdr.entries_pageable);
18444
18445         if (zap_map == VM_MAP_NULL) {
18446                 return KERN_RESOURCE_SHORTAGE;
18447         }
18448
18449         vm_map_set_page_shift(zap_map,
18450             VM_MAP_PAGE_SHIFT(map));
18451         vm_map_disable_hole_optimization(zap_map);
18452
18453         for (entry = vm_map_first_entry(map);
18454             entry != vm_map_to_entry(map);
18455             entry = next_entry) {
18456                 next_entry = entry->vme_next;
18457
18458                 if (VME_OBJECT(entry) &&
18459                     !entry->is_sub_map &&
18460                     (VME_OBJECT(entry)->internal == TRUE) &&
18461                     (VME_OBJECT(entry)->ref_count == 1)) {
18462                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18463                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18464
18465                         (void)vm_map_delete(map,
18466                             entry->vme_start,
18467                             entry->vme_end,
18468                             VM_MAP_REMOVE_SAVE_ENTRIES,
18469                             zap_map);
18470                 }
18471         }
18472
18473         vm_map_unlock(map);
18474
18475         /*
18476          * Get rid of the "zap_maps" and all the map entries that
18477          * they may still contain.
18478          */
18479         if (zap_map != VM_MAP_NULL) {
18480                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18481                 zap_map = VM_MAP_NULL;
18482         }
18483
18484         return kr;
18485 }
18486
18487
18488 #if DEVELOPMENT || DEBUG
18489
18490 int
18491 vm_map_disconnect_page_mappings(
18492         vm_map_t map,
18493         boolean_t do_unnest)
18494 {
18495         vm_map_entry_t entry;
18496         int     page_count = 0;
18497
18498         if (do_unnest == TRUE) {
18499 #ifndef NO_NESTED_PMAP
18500                 vm_map_lock(map);
18501
18502                 for (entry = vm_map_first_entry(map);
18503                     entry != vm_map_to_entry(map);
18504                     entry = entry->vme_next) {
18505                         if (entry->is_sub_map && entry->use_pmap) {
18506                                 /*
18507                                  * Make sure the range between the start of this entry and
18508                                  * the end of this entry is no longer nested, so that
18509                                  * we will only remove mappings from the pmap in use by this
18510                                  * this task
18511                                  */
18512                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18513                         }
18514                 }
18515                 vm_map_unlock(map);
18516 #endif
18517         }
18518         vm_map_lock_read(map);
18519
18520         page_count = map->pmap->stats.resident_count;
18521
18522         for (entry = vm_map_first_entry(map);
18523             entry != vm_map_to_entry(map);
18524             entry = entry->vme_next) {
18525                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18526                     (VME_OBJECT(entry)->phys_contiguous))) {
18527                         continue;
18528                 }
18529                 if (entry->is_sub_map) {
18530                         assert(!entry->use_pmap);
18531                 }
18532
18533                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18534         }
18535         vm_map_unlock_read(map);
18536
18537         return page_count;
18538 }
18539
18540 #endif
18541
18542
18543 #if CONFIG_FREEZE
18544
18545
18546 int c_freezer_swapout_page_count;
18547 int c_freezer_compression_count = 0;
18548 AbsoluteTime c_freezer_last_yield_ts = 0;
18549
18550 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18551 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18552
18553 kern_return_t
18554 vm_map_freeze(
18555         task_t       task,
18556         unsigned int *purgeable_count,
18557         unsigned int *wired_count,
18558         unsigned int *clean_count,
18559         unsigned int *dirty_count,
18560         unsigned int dirty_budget,
18561         unsigned int *shared_count,
18562         int          *freezer_error_code,
18563         boolean_t    eval_only)
18564 {
18565         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
18566         kern_return_t   kr = KERN_SUCCESS;
18567         boolean_t       evaluation_phase = TRUE;
18568         vm_object_t     cur_shared_object = NULL;
18569         int             cur_shared_obj_ref_cnt = 0;
18570         unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18571
18572         *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18573
18574         /*
18575          * We need the exclusive lock here so that we can
18576          * block any page faults or lookups while we are
18577          * in the middle of freezing this vm map.
18578          */
18579         vm_map_t map = task->map;
18580
18581         vm_map_lock(map);
18582
18583         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18584
18585         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18586                 if (vm_compressor_low_on_space()) {
18587                         *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18588                 }
18589
18590                 if (vm_swap_low_on_space()) {
18591                         *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18592                 }
18593
18594                 kr = KERN_NO_SPACE;
18595                 goto done;
18596         }
18597
18598         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18599                 /*
18600                  * In-memory compressor backing the freezer. No disk.
18601                  * So no need to do the evaluation phase.
18602                  */
18603                 evaluation_phase = FALSE;
18604
18605                 if (eval_only == TRUE) {
18606                         /*
18607                          * We don't support 'eval_only' mode
18608                          * in this non-swap config.
18609                          */
18610                         *freezer_error_code = FREEZER_ERROR_GENERIC;
18611                         kr = KERN_INVALID_ARGUMENT;
18612                         goto done;
18613                 }
18614
18615                 c_freezer_compression_count = 0;
18616                 clock_get_uptime(&c_freezer_last_yield_ts);
18617         }
18618 again:
18619
18620         for (entry2 = vm_map_first_entry(map);
18621             entry2 != vm_map_to_entry(map);
18622             entry2 = entry2->vme_next) {
18623                 vm_object_t     src_object = VME_OBJECT(entry2);
18624
18625                 if (src_object &&
18626                     !entry2->is_sub_map &&
18627                     !src_object->phys_contiguous) {
18628                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
18629
18630                         if (src_object->internal == TRUE) {
18631                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18632                                         /*
18633                                          * We skip purgeable objects during evaluation phase only.
18634                                          * If we decide to freeze this process, we'll explicitly
18635                                          * purge these objects before we go around again with
18636                                          * 'evaluation_phase' set to FALSE.
18637                                          */
18638
18639                                         if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18640                                                 /*
18641                                                  * We want to purge objects that may not belong to this task but are mapped
18642                                                  * in this task alone. Since we already purged this task's purgeable memory
18643                                                  * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18644                                                  * on this task's purgeable objects. Hence the check for only volatile objects.
18645                                                  */
18646                                                 if (evaluation_phase == FALSE &&
18647                                                     (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18648                                                     (src_object->ref_count == 1)) {
18649                                                         vm_object_lock(src_object);
18650                                                         vm_object_purge(src_object, 0);
18651                                                         vm_object_unlock(src_object);
18652                                                 }
18653                                                 continue;
18654                                         }
18655
18656                                         /*
18657                                          * Pages belonging to this object could be swapped to disk.
18658                                          * Make sure it's not a shared object because we could end
18659                                          * up just bringing it back in again.
18660                                          *
18661                                          * We try to optimize somewhat by checking for objects that are mapped
18662                                          * more than once within our own map. But we don't do full searches,
18663                                          * we just look at the entries following our current entry.
18664                                          */
18665
18666                                         if (src_object->ref_count > 1) {
18667                                                 if (src_object != cur_shared_object) {
18668                                                         obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18669                                                         dirty_shared_count += obj_pages_snapshot;
18670
18671                                                         cur_shared_object = src_object;
18672                                                         cur_shared_obj_ref_cnt = 1;
18673                                                         continue;
18674                                                 } else {
18675                                                         cur_shared_obj_ref_cnt++;
18676                                                         if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18677                                                                 /*
18678                                                                  * Fall through to below and treat this object as private.
18679                                                                  * So deduct its pages from our shared total and add it to the
18680                                                                  * private total.
18681                                                                  */
18682
18683                                                                 dirty_shared_count -= obj_pages_snapshot;
18684                                                                 dirty_private_count += obj_pages_snapshot;
18685                                                         } else {
18686                                                                 continue;
18687                                                         }
18688                                                 }
18689                                         }
18690
18691
18692                                         if (src_object->ref_count == 1) {
18693                                                 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18694                                         }
18695
18696                                         if (evaluation_phase == TRUE) {
18697                                                 continue;
18698                                         }
18699                                 }
18700
18701                                 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
18702                                 *wired_count += src_object->wired_page_count;
18703
18704                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18705                                         if (vm_compressor_low_on_space()) {
18706                                                 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18707                                         }
18708
18709                                         if (vm_swap_low_on_space()) {
18710                                                 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18711                                         }
18712
18713                                         kr = KERN_NO_SPACE;
18714                                         break;
18715                                 }
18716                                 if (paged_out_count >= dirty_budget) {
18717                                         break;
18718                                 }
18719                                 dirty_budget -= paged_out_count;
18720                         }
18721                 }
18722         }
18723
18724         *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18725         if (evaluation_phase) {
18726                 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18727
18728                 if (dirty_shared_count > shared_pages_threshold) {
18729                         *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18730                         kr = KERN_FAILURE;
18731                         goto done;
18732                 }
18733
18734                 if (dirty_shared_count &&
18735                     ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18736                         *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18737                         kr = KERN_FAILURE;
18738                         goto done;
18739                 }
18740
18741                 evaluation_phase = FALSE;
18742                 dirty_shared_count = dirty_private_count = 0;
18743
18744                 c_freezer_compression_count = 0;
18745                 clock_get_uptime(&c_freezer_last_yield_ts);
18746
18747                 if (eval_only) {
18748                         kr = KERN_SUCCESS;
18749                         goto done;
18750                 }
18751
18752                 vm_purgeable_purge_task_owned(task);
18753
18754                 goto again;
18755         } else {
18756                 kr = KERN_SUCCESS;
18757         }
18758
18759 done:
18760         vm_map_unlock(map);
18761
18762         if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18763                 vm_object_compressed_freezer_done();
18764
18765                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18766                         /*
18767                          * reset the counter tracking the # of swapped compressed pages
18768                          * because we are now done with this freeze session and task.
18769                          */
18770
18771                         *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18772                         c_freezer_swapout_page_count = 0;
18773                 }
18774         }
18775         return kr;
18776 }
18777
18778 #endif
18779
18780 /*
18781  * vm_map_entry_should_cow_for_true_share:
18782  *
18783  * Determines if the map entry should be clipped and setup for copy-on-write
18784  * to avoid applying "true_share" to a large VM object when only a subset is
18785  * targeted.
18786  *
18787  * For now, we target only the map entries created for the Objective C
18788  * Garbage Collector, which initially have the following properties:
18789  *      - alias == VM_MEMORY_MALLOC
18790  *      - wired_count == 0
18791  *      - !needs_copy
18792  * and a VM object with:
18793  *      - internal
18794  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18795  *      - !true_share
18796  *      - vo_size == ANON_CHUNK_SIZE
18797  *
18798  * Only non-kernel map entries.
18799  */
18800 boolean_t
18801 vm_map_entry_should_cow_for_true_share(
18802         vm_map_entry_t  entry)
18803 {
18804         vm_object_t     object;
18805
18806         if (entry->is_sub_map) {
18807                 /* entry does not point at a VM object */
18808                 return FALSE;
18809         }
18810
18811         if (entry->needs_copy) {
18812                 /* already set for copy_on_write: done! */
18813                 return FALSE;
18814         }
18815
18816         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18817             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18818                 /* not a malloc heap or Obj-C Garbage Collector heap */
18819                 return FALSE;
18820         }
18821
18822         if (entry->wired_count) {
18823                 /* wired: can't change the map entry... */
18824                 vm_counters.should_cow_but_wired++;
18825                 return FALSE;
18826         }
18827
18828         object = VME_OBJECT(entry);
18829
18830         if (object == VM_OBJECT_NULL) {
18831                 /* no object yet... */
18832                 return FALSE;
18833         }
18834
18835         if (!object->internal) {
18836                 /* not an internal object */
18837                 return FALSE;
18838         }
18839
18840         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18841                 /* not the default copy strategy */
18842                 return FALSE;
18843         }
18844
18845         if (object->true_share) {
18846                 /* already true_share: too late to avoid it */
18847                 return FALSE;
18848         }
18849
18850         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18851             object->vo_size != ANON_CHUNK_SIZE) {
18852                 /* ... not an object created for the ObjC Garbage Collector */
18853                 return FALSE;
18854         }
18855
18856         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18857             object->vo_size != 2048 * 4096) {
18858                 /* ... not a "MALLOC_SMALL" heap */
18859                 return FALSE;
18860         }
18861
18862         /*
18863          * All the criteria match: we have a large object being targeted for "true_share".
18864          * To limit the adverse side-effects linked with "true_share", tell the caller to
18865          * try and avoid setting up the entire object for "true_share" by clipping the
18866          * targeted range and setting it up for copy-on-write.
18867          */
18868         return TRUE;
18869 }
18870
18871 vm_map_offset_t
18872 vm_map_round_page_mask(
18873         vm_map_offset_t offset,
18874         vm_map_offset_t mask)
18875 {
18876         return VM_MAP_ROUND_PAGE(offset, mask);
18877 }
18878
18879 vm_map_offset_t
18880 vm_map_trunc_page_mask(
18881         vm_map_offset_t offset,
18882         vm_map_offset_t mask)
18883 {
18884         return VM_MAP_TRUNC_PAGE(offset, mask);
18885 }
18886
18887 boolean_t
18888 vm_map_page_aligned(
18889         vm_map_offset_t offset,
18890         vm_map_offset_t mask)
18891 {
18892         return ((offset) & mask) == 0;
18893 }
18894
18895 int
18896 vm_map_page_shift(
18897         vm_map_t map)
18898 {
18899         return VM_MAP_PAGE_SHIFT(map);
18900 }
18901
18902 int
18903 vm_map_page_size(
18904         vm_map_t map)
18905 {
18906         return VM_MAP_PAGE_SIZE(map);
18907 }
18908
18909 vm_map_offset_t
18910 vm_map_page_mask(
18911         vm_map_t map)
18912 {
18913         return VM_MAP_PAGE_MASK(map);
18914 }
18915
18916 kern_return_t
18917 vm_map_set_page_shift(
18918         vm_map_t        map,
18919         int             pageshift)
18920 {
18921         if (map->hdr.nentries != 0) {
18922                 /* too late to change page size */
18923                 return KERN_FAILURE;
18924         }
18925
18926         map->hdr.page_shift = pageshift;
18927
18928         return KERN_SUCCESS;
18929 }
18930
18931 kern_return_t
18932 vm_map_query_volatile(
18933         vm_map_t        map,
18934         mach_vm_size_t  *volatile_virtual_size_p,
18935         mach_vm_size_t  *volatile_resident_size_p,
18936         mach_vm_size_t  *volatile_compressed_size_p,
18937         mach_vm_size_t  *volatile_pmap_size_p,
18938         mach_vm_size_t  *volatile_compressed_pmap_size_p)
18939 {
18940         mach_vm_size_t  volatile_virtual_size;
18941         mach_vm_size_t  volatile_resident_count;
18942         mach_vm_size_t  volatile_compressed_count;
18943         mach_vm_size_t  volatile_pmap_count;
18944         mach_vm_size_t  volatile_compressed_pmap_count;
18945         mach_vm_size_t  resident_count;
18946         vm_map_entry_t  entry;
18947         vm_object_t     object;
18948
18949         /* map should be locked by caller */
18950
18951         volatile_virtual_size = 0;
18952         volatile_resident_count = 0;
18953         volatile_compressed_count = 0;
18954         volatile_pmap_count = 0;
18955         volatile_compressed_pmap_count = 0;
18956
18957         for (entry = vm_map_first_entry(map);
18958             entry != vm_map_to_entry(map);
18959             entry = entry->vme_next) {
18960                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
18961
18962                 if (entry->is_sub_map) {
18963                         continue;
18964                 }
18965                 if (!(entry->protection & VM_PROT_WRITE)) {
18966                         continue;
18967                 }
18968                 object = VME_OBJECT(entry);
18969                 if (object == VM_OBJECT_NULL) {
18970                         continue;
18971                 }
18972                 if (object->purgable != VM_PURGABLE_VOLATILE &&
18973                     object->purgable != VM_PURGABLE_EMPTY) {
18974                         continue;
18975                 }
18976                 if (VME_OFFSET(entry)) {
18977                         /*
18978                          * If the map entry has been split and the object now
18979                          * appears several times in the VM map, we don't want
18980                          * to count the object's resident_page_count more than
18981                          * once.  We count it only for the first one, starting
18982                          * at offset 0 and ignore the other VM map entries.
18983                          */
18984                         continue;
18985                 }
18986                 resident_count = object->resident_page_count;
18987                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18988                         resident_count = 0;
18989                 } else {
18990                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18991                 }
18992
18993                 volatile_virtual_size += entry->vme_end - entry->vme_start;
18994                 volatile_resident_count += resident_count;
18995                 if (object->pager) {
18996                         volatile_compressed_count +=
18997                             vm_compressor_pager_get_count(object->pager);
18998                 }
18999                 pmap_compressed_bytes = 0;
19000                 pmap_resident_bytes =
19001                     pmap_query_resident(map->pmap,
19002                     entry->vme_start,
19003                     entry->vme_end,
19004                     &pmap_compressed_bytes);
19005                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
19006                 volatile_compressed_pmap_count += (pmap_compressed_bytes
19007                     / PAGE_SIZE);
19008         }
19009
19010         /* map is still locked on return */
19011
19012         *volatile_virtual_size_p = volatile_virtual_size;
19013         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
19014         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
19015         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
19016         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
19017
19018         return KERN_SUCCESS;
19019 }
19020
19021 void
19022 vm_map_sizes(vm_map_t map,
19023     vm_map_size_t * psize,
19024     vm_map_size_t * pfree,
19025     vm_map_size_t * plargest_free)
19026 {
19027         vm_map_entry_t  entry;
19028         vm_map_offset_t prev;
19029         vm_map_size_t   free, total_free, largest_free;
19030         boolean_t       end;
19031
19032         if (!map) {
19033                 *psize = *pfree = *plargest_free = 0;
19034                 return;
19035         }
19036         total_free = largest_free = 0;
19037
19038         vm_map_lock_read(map);
19039         if (psize) {
19040                 *psize = map->max_offset - map->min_offset;
19041         }
19042
19043         prev = map->min_offset;
19044         for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19045                 end = (entry == vm_map_to_entry(map));
19046
19047                 if (end) {
19048                         free = entry->vme_end   - prev;
19049                 } else {
19050                         free = entry->vme_start - prev;
19051                 }
19052
19053                 total_free += free;
19054                 if (free > largest_free) {
19055                         largest_free = free;
19056                 }
19057
19058                 if (end) {
19059                         break;
19060                 }
19061                 prev = entry->vme_end;
19062         }
19063         vm_map_unlock_read(map);
19064         if (pfree) {
19065                 *pfree = total_free;
19066         }
19067         if (plargest_free) {
19068                 *plargest_free = largest_free;
19069         }
19070 }
19071
19072 #if VM_SCAN_FOR_SHADOW_CHAIN
19073 int vm_map_shadow_max(vm_map_t map);
19074 int
19075 vm_map_shadow_max(
19076         vm_map_t map)
19077 {
19078         int             shadows, shadows_max;
19079         vm_map_entry_t  entry;
19080         vm_object_t     object, next_object;
19081
19082         if (map == NULL) {
19083                 return 0;
19084         }
19085
19086         shadows_max = 0;
19087
19088         vm_map_lock_read(map);
19089
19090         for (entry = vm_map_first_entry(map);
19091             entry != vm_map_to_entry(map);
19092             entry = entry->vme_next) {
19093                 if (entry->is_sub_map) {
19094                         continue;
19095                 }
19096                 object = VME_OBJECT(entry);
19097                 if (object == NULL) {
19098                         continue;
19099                 }
19100                 vm_object_lock_shared(object);
19101                 for (shadows = 0;
19102                     object->shadow != NULL;
19103                     shadows++, object = next_object) {
19104                         next_object = object->shadow;
19105                         vm_object_lock_shared(next_object);
19106                         vm_object_unlock(object);
19107                 }
19108                 vm_object_unlock(object);
19109                 if (shadows > shadows_max) {
19110                         shadows_max = shadows;
19111                 }
19112         }
19113
19114         vm_map_unlock_read(map);
19115
19116         return shadows_max;
19117 }
19118 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19119
19120 void
19121 vm_commit_pagezero_status(vm_map_t lmap)
19122 {
19123         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19124 }
19125
19126 #if !CONFIG_EMBEDDED
19127 void
19128 vm_map_set_high_start(
19129         vm_map_t        map,
19130         vm_map_offset_t high_start)
19131 {
19132         map->vmmap_high_start = high_start;
19133 }
19134 #endif
19135
19136 #if PMAP_CS
19137 kern_return_t
19138 vm_map_entry_cs_associate(
19139         vm_map_t                map,
19140         vm_map_entry_t          entry,
19141         vm_map_kernel_flags_t   vmk_flags)
19142 {
19143         vm_object_t cs_object, cs_shadow;
19144         vm_object_offset_t cs_offset;
19145         void *cs_blobs;
19146         struct vnode *cs_vnode;
19147         kern_return_t cs_ret;
19148
19149         if (map->pmap == NULL ||
19150             entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19151             VME_OBJECT(entry) == VM_OBJECT_NULL ||
19152             !(entry->protection & VM_PROT_EXECUTE)) {
19153                 return KERN_SUCCESS;
19154         }
19155
19156         vm_map_lock_assert_exclusive(map);
19157
19158         if (entry->used_for_jit) {
19159                 cs_ret = pmap_cs_associate(map->pmap,
19160                     PMAP_CS_ASSOCIATE_JIT,
19161                     entry->vme_start,
19162                     entry->vme_end - entry->vme_start);
19163                 goto done;
19164         }
19165
19166         if (vmk_flags.vmkf_remap_prot_copy) {
19167                 cs_ret = pmap_cs_associate(map->pmap,
19168                     PMAP_CS_ASSOCIATE_COW,
19169                     entry->vme_start,
19170                     entry->vme_end - entry->vme_start);
19171                 goto done;
19172         }
19173
19174         vm_object_lock_shared(VME_OBJECT(entry));
19175         cs_offset = VME_OFFSET(entry);
19176         for (cs_object = VME_OBJECT(entry);
19177             (cs_object != VM_OBJECT_NULL &&
19178             !cs_object->code_signed);
19179             cs_object = cs_shadow) {
19180                 cs_shadow = cs_object->shadow;
19181                 if (cs_shadow != VM_OBJECT_NULL) {
19182                         cs_offset += cs_object->vo_shadow_offset;
19183                         vm_object_lock_shared(cs_shadow);
19184                 }
19185                 vm_object_unlock(cs_object);
19186         }
19187         if (cs_object == VM_OBJECT_NULL) {
19188                 return KERN_SUCCESS;
19189         }
19190
19191         cs_offset += cs_object->paging_offset;
19192         cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19193         cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
19194             &cs_blobs);
19195         assert(cs_ret == KERN_SUCCESS);
19196         cs_ret = cs_associate_blob_with_mapping(map->pmap,
19197             entry->vme_start,
19198             (entry->vme_end -
19199             entry->vme_start),
19200             cs_offset,
19201             cs_blobs);
19202         vm_object_unlock(cs_object);
19203         cs_object = VM_OBJECT_NULL;
19204
19205 done:
19206         if (cs_ret == KERN_SUCCESS) {
19207                 DTRACE_VM2(vm_map_entry_cs_associate_success,
19208                     vm_map_offset_t, entry->vme_start,
19209                     vm_map_offset_t, entry->vme_end);
19210                 if (vm_map_executable_immutable) {
19211                         /*
19212                          * Prevent this executable
19213                          * mapping from being unmapped
19214                          * or modified.
19215                          */
19216                         entry->permanent = TRUE;
19217                 }
19218                 /*
19219                  * pmap says it will validate the
19220                  * code-signing validity of pages
19221                  * faulted in via this mapping, so
19222                  * this map entry should be marked so
19223                  * that vm_fault() bypasses code-signing
19224                  * validation for faults coming through
19225                  * this mapping.
19226                  */
19227                 entry->pmap_cs_associated = TRUE;
19228         } else if (cs_ret == KERN_NOT_SUPPORTED) {
19229                 /*
19230                  * pmap won't check the code-signing
19231                  * validity of pages faulted in via
19232                  * this mapping, so VM should keep
19233                  * doing it.
19234                  */
19235                 DTRACE_VM3(vm_map_entry_cs_associate_off,
19236                     vm_map_offset_t, entry->vme_start,
19237                     vm_map_offset_t, entry->vme_end,
19238                     int, cs_ret);
19239         } else {
19240                 /*
19241                  * A real error: do not allow
19242                  * execution in this mapping.
19243                  */
19244                 DTRACE_VM3(vm_map_entry_cs_associate_failure,
19245                     vm_map_offset_t, entry->vme_start,
19246                     vm_map_offset_t, entry->vme_end,
19247                     int, cs_ret);
19248                 entry->protection &= ~VM_PROT_EXECUTE;
19249                 entry->max_protection &= ~VM_PROT_EXECUTE;
19250         }
19251
19252         return cs_ret;
19253 }
19254 #endif /* PMAP_CS */
19255
19256 /*
19257  * FORKED CORPSE FOOTPRINT
19258  *
19259  * A forked corpse gets a copy of the original VM map but its pmap is mostly
19260  * empty since it never ran and never got to fault in any pages.
19261  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19262  * a forked corpse would therefore return very little information.
19263  *
19264  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19265  * to vm_map_fork() to collect footprint information from the original VM map
19266  * and its pmap, and store it in the forked corpse's VM map.  That information
19267  * is stored in place of the VM map's "hole list" since we'll never need to
19268  * lookup for holes in the corpse's map.
19269  *
19270  * The corpse's footprint info looks like this:
19271  *
19272  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19273  * as follows:
19274  *                     +---------------------------------------+
19275  *            header-> | cf_size                               |
19276  *                     +-------------------+-------------------+
19277  *                     | cf_last_region    | cf_last_zeroes    |
19278  *                     +-------------------+-------------------+
19279  *           region1-> | cfr_vaddr                             |
19280  *                     +-------------------+-------------------+
19281  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
19282  *                     +---------------------------------------+
19283  *                     | d4 | d5 | ...                         |
19284  *                     +---------------------------------------+
19285  *                     | ...                                   |
19286  *                     +-------------------+-------------------+
19287  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
19288  *                     +-------------------+-------------------+
19289  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
19290  *                     +---------------------------------------+
19291  *                     | d0 | d1 ...                           |
19292  *                     +---------------------------------------+
19293  *                       ...
19294  *                     +---------------------------------------+
19295  *       last region-> | cfr_vaddr                             |
19296  *                     +---------------------------------------+
19297  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
19298  *                     +---------------------------------------+
19299  *                       ...
19300  *                     +---------------------------------------+
19301  *                     | dx | dy | dz | na | na | na | na | na |
19302  *                     +---------------------------------------+
19303  *
19304  * where:
19305  *      cf_size:        total size of the buffer (rounded to page size)
19306  *      cf_last_region: offset in the buffer of the last "region" sub-header
19307  *      cf_last_zeroes: number of trailing "zero" dispositions at the end
19308  *                      of last region
19309  *      cfr_vaddr:      virtual address of the start of the covered "region"
19310  *      cfr_num_pages:  number of pages in the covered "region"
19311  *      d*:             disposition of the page at that virtual address
19312  * Regions in the buffer are word-aligned.
19313  *
19314  * We estimate the size of the buffer based on the number of memory regions
19315  * and the virtual size of the address space.  While copying each memory region
19316  * during vm_map_fork(), we also collect the footprint info for that region
19317  * and store it in the buffer, packing it as much as possible (coalescing
19318  * contiguous memory regions to avoid having too many region headers and
19319  * avoiding long streaks of "zero" page dispositions by splitting footprint
19320  * "regions", so the number of regions in the footprint buffer might not match
19321  * the number of memory regions in the address space.
19322  *
19323  * We also have to copy the original task's "nonvolatile" ledgers since that's
19324  * part of the footprint and will need to be reported to any tool asking for
19325  * the footprint information of the forked corpse.
19326  */
19327
19328 uint64_t vm_map_corpse_footprint_count = 0;
19329 uint64_t vm_map_corpse_footprint_size_avg = 0;
19330 uint64_t vm_map_corpse_footprint_size_max = 0;
19331 uint64_t vm_map_corpse_footprint_full = 0;
19332 uint64_t vm_map_corpse_footprint_no_buf = 0;
19333
19334 /*
19335  * vm_map_corpse_footprint_new_region:
19336  *      closes the current footprint "region" and creates a new one
19337  *
19338  * Returns NULL if there's not enough space in the buffer for a new region.
19339  */
19340 static struct vm_map_corpse_footprint_region *
19341 vm_map_corpse_footprint_new_region(
19342         struct vm_map_corpse_footprint_header *footprint_header)
19343 {
19344         uintptr_t       footprint_edge;
19345         uint32_t        new_region_offset;
19346         struct vm_map_corpse_footprint_region *footprint_region;
19347         struct vm_map_corpse_footprint_region *new_footprint_region;
19348
19349         footprint_edge = ((uintptr_t)footprint_header +
19350             footprint_header->cf_size);
19351         footprint_region = ((struct vm_map_corpse_footprint_region *)
19352             ((char *)footprint_header +
19353             footprint_header->cf_last_region));
19354         assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19355             footprint_edge);
19356
19357         /* get rid of trailing zeroes in the last region */
19358         assert(footprint_region->cfr_num_pages >=
19359             footprint_header->cf_last_zeroes);
19360         footprint_region->cfr_num_pages -=
19361             footprint_header->cf_last_zeroes;
19362         footprint_header->cf_last_zeroes = 0;
19363
19364         /* reuse this region if it's now empty */
19365         if (footprint_region->cfr_num_pages == 0) {
19366                 return footprint_region;
19367         }
19368
19369         /* compute offset of new region */
19370         new_region_offset = footprint_header->cf_last_region;
19371         new_region_offset += sizeof(*footprint_region);
19372         new_region_offset += footprint_region->cfr_num_pages;
19373         new_region_offset = roundup(new_region_offset, sizeof(int));
19374
19375         /* check if we're going over the edge */
19376         if (((uintptr_t)footprint_header +
19377             new_region_offset +
19378             sizeof(*footprint_region)) >=
19379             footprint_edge) {
19380                 /* over the edge: no new region */
19381                 return NULL;
19382         }
19383
19384         /* adjust offset of last region in header */
19385         footprint_header->cf_last_region = new_region_offset;
19386
19387         new_footprint_region = (struct vm_map_corpse_footprint_region *)
19388             ((char *)footprint_header +
19389             footprint_header->cf_last_region);
19390         new_footprint_region->cfr_vaddr = 0;
19391         new_footprint_region->cfr_num_pages = 0;
19392         /* caller needs to initialize new region */
19393
19394         return new_footprint_region;
19395 }
19396
19397 /*
19398  * vm_map_corpse_footprint_collect:
19399  *      collect footprint information for "old_entry" in "old_map" and
19400  *      stores it in "new_map"'s vmmap_footprint_info.
19401  */
19402 kern_return_t
19403 vm_map_corpse_footprint_collect(
19404         vm_map_t        old_map,
19405         vm_map_entry_t  old_entry,
19406         vm_map_t        new_map)
19407 {
19408         vm_map_offset_t va;
19409         int             disp;
19410         kern_return_t   kr;
19411         struct vm_map_corpse_footprint_header *footprint_header;
19412         struct vm_map_corpse_footprint_region *footprint_region;
19413         struct vm_map_corpse_footprint_region *new_footprint_region;
19414         unsigned char   *next_disp_p;
19415         uintptr_t       footprint_edge;
19416         uint32_t        num_pages_tmp;
19417
19418         va = old_entry->vme_start;
19419
19420         vm_map_lock_assert_exclusive(old_map);
19421         vm_map_lock_assert_exclusive(new_map);
19422
19423         assert(new_map->has_corpse_footprint);
19424         assert(!old_map->has_corpse_footprint);
19425         if (!new_map->has_corpse_footprint ||
19426             old_map->has_corpse_footprint) {
19427                 /*
19428                  * This can only transfer footprint info from a
19429                  * map with a live pmap to a map with a corpse footprint.
19430                  */
19431                 return KERN_NOT_SUPPORTED;
19432         }
19433
19434         if (new_map->vmmap_corpse_footprint == NULL) {
19435                 vm_offset_t     buf;
19436                 vm_size_t       buf_size;
19437
19438                 buf = 0;
19439                 buf_size = (sizeof(*footprint_header) +
19440                     (old_map->hdr.nentries
19441                     *
19442                     (sizeof(*footprint_region) +
19443                     +3))            /* potential alignment for each region */
19444                     +
19445                     ((old_map->size / PAGE_SIZE)
19446                     *
19447                     sizeof(char)));           /* disposition for each page */
19448 //              printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19449                 buf_size = round_page(buf_size);
19450
19451                 /* limit buffer to 1 page to validate overflow detection */
19452 //              buf_size = PAGE_SIZE;
19453
19454                 /* limit size to a somewhat sane amount */
19455 #if CONFIG_EMBEDDED
19456 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
19457 #else /* CONFIG_EMBEDDED */
19458 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
19459 #endif /* CONFIG_EMBEDDED */
19460                 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19461                         buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19462                 }
19463
19464                 /*
19465                  * Allocate the pageable buffer (with a trailing guard page).
19466                  * It will be zero-filled on demand.
19467                  */
19468                 kr = kernel_memory_allocate(kernel_map,
19469                     &buf,
19470                     (buf_size
19471                     + PAGE_SIZE),                          /* trailing guard page */
19472                     0,                         /* mask */
19473                     KMA_PAGEABLE | KMA_GUARD_LAST,
19474                     VM_KERN_MEMORY_DIAG);
19475                 if (kr != KERN_SUCCESS) {
19476                         vm_map_corpse_footprint_no_buf++;
19477                         return kr;
19478                 }
19479
19480                 /* initialize header and 1st region */
19481                 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19482                 new_map->vmmap_corpse_footprint = footprint_header;
19483
19484                 footprint_header->cf_size = buf_size;
19485                 footprint_header->cf_last_region =
19486                     sizeof(*footprint_header);
19487                 footprint_header->cf_last_zeroes = 0;
19488
19489                 footprint_region = (struct vm_map_corpse_footprint_region *)
19490                     ((char *)footprint_header +
19491                     footprint_header->cf_last_region);
19492                 footprint_region->cfr_vaddr = 0;
19493                 footprint_region->cfr_num_pages = 0;
19494         } else {
19495                 /* retrieve header and last region */
19496                 footprint_header = (struct vm_map_corpse_footprint_header *)
19497                     new_map->vmmap_corpse_footprint;
19498                 footprint_region = (struct vm_map_corpse_footprint_region *)
19499                     ((char *)footprint_header +
19500                     footprint_header->cf_last_region);
19501         }
19502         footprint_edge = ((uintptr_t)footprint_header +
19503             footprint_header->cf_size);
19504
19505         if ((footprint_region->cfr_vaddr +
19506             (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19507             PAGE_SIZE))
19508             != old_entry->vme_start) {
19509                 uint64_t num_pages_delta;
19510                 uint32_t region_offset_delta;
19511
19512                 /*
19513                  * Not the next contiguous virtual address:
19514                  * start a new region or store "zero" dispositions for
19515                  * the missing pages?
19516                  */
19517                 /* size of gap in actual page dispositions */
19518                 num_pages_delta = (((old_entry->vme_start -
19519                     footprint_region->cfr_vaddr) / PAGE_SIZE)
19520                     - footprint_region->cfr_num_pages);
19521                 /* size of gap as a new footprint region header */
19522                 region_offset_delta =
19523                     (sizeof(*footprint_region) +
19524                     roundup((footprint_region->cfr_num_pages -
19525                     footprint_header->cf_last_zeroes),
19526                     sizeof(int)) -
19527                     (footprint_region->cfr_num_pages -
19528                     footprint_header->cf_last_zeroes));
19529 //              printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19530                 if (region_offset_delta < num_pages_delta ||
19531                     os_add3_overflow(footprint_region->cfr_num_pages,
19532                     (uint32_t) num_pages_delta,
19533                     1,
19534                     &num_pages_tmp)) {
19535                         /*
19536                          * Storing data for this gap would take more space
19537                          * than inserting a new footprint region header:
19538                          * let's start a new region and save space. If it's a
19539                          * tie, let's avoid using a new region, since that
19540                          * would require more region hops to find the right
19541                          * range during lookups.
19542                          *
19543                          * If the current region's cfr_num_pages would overflow
19544                          * if we added "zero" page dispositions for the gap,
19545                          * no choice but to start a new region.
19546                          */
19547 //                      printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19548                         new_footprint_region =
19549                             vm_map_corpse_footprint_new_region(footprint_header);
19550                         /* check that we're not going over the edge */
19551                         if (new_footprint_region == NULL) {
19552                                 goto over_the_edge;
19553                         }
19554                         footprint_region = new_footprint_region;
19555                         /* initialize new region as empty */
19556                         footprint_region->cfr_vaddr = old_entry->vme_start;
19557                         footprint_region->cfr_num_pages = 0;
19558                 } else {
19559                         /*
19560                          * Store "zero" page dispositions for the missing
19561                          * pages.
19562                          */
19563 //                      printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19564                         for (; num_pages_delta > 0; num_pages_delta--) {
19565                                 next_disp_p =
19566                                     ((unsigned char *) footprint_region +
19567                                     sizeof(*footprint_region) +
19568                                     footprint_region->cfr_num_pages);
19569                                 /* check that we're not going over the edge */
19570                                 if ((uintptr_t)next_disp_p >= footprint_edge) {
19571                                         goto over_the_edge;
19572                                 }
19573                                 /* store "zero" disposition for this gap page */
19574                                 footprint_region->cfr_num_pages++;
19575                                 *next_disp_p = (unsigned char) 0;
19576                                 footprint_header->cf_last_zeroes++;
19577                         }
19578                 }
19579         }
19580
19581         for (va = old_entry->vme_start;
19582             va < old_entry->vme_end;
19583             va += PAGE_SIZE) {
19584                 vm_object_t     object;
19585
19586                 object = VME_OBJECT(old_entry);
19587                 if (!old_entry->is_sub_map &&
19588                     old_entry->iokit_acct &&
19589                     object != VM_OBJECT_NULL &&
19590                     object->internal &&
19591                     object->purgable == VM_PURGABLE_DENY) {
19592                         /*
19593                          * Non-purgeable IOKit memory: phys_footprint
19594                          * includes the entire virtual mapping.
19595                          * Since the forked corpse's VM map entry will not
19596                          * have "iokit_acct", pretend that this page's
19597                          * disposition is "present & internal", so that it
19598                          * shows up in the forked corpse's footprint.
19599                          */
19600                         disp = (PMAP_QUERY_PAGE_PRESENT |
19601                             PMAP_QUERY_PAGE_INTERNAL);
19602                 } else {
19603                         disp = 0;
19604                         pmap_query_page_info(old_map->pmap,
19605                             va,
19606                             &disp);
19607                 }
19608
19609 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19610
19611                 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19612                         /*
19613                          * Ignore "zero" dispositions at start of
19614                          * region: just move start of region.
19615                          */
19616                         footprint_region->cfr_vaddr += PAGE_SIZE;
19617                         continue;
19618                 }
19619
19620                 /* would region's cfr_num_pages overflow? */
19621                 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19622                     &num_pages_tmp)) {
19623                         /* overflow: create a new region */
19624                         new_footprint_region =
19625                             vm_map_corpse_footprint_new_region(
19626                                 footprint_header);
19627                         if (new_footprint_region == NULL) {
19628                                 goto over_the_edge;
19629                         }
19630                         footprint_region = new_footprint_region;
19631                         footprint_region->cfr_vaddr = va;
19632                         footprint_region->cfr_num_pages = 0;
19633                 }
19634
19635                 next_disp_p = ((unsigned char *)footprint_region +
19636                     sizeof(*footprint_region) +
19637                     footprint_region->cfr_num_pages);
19638                 /* check that we're not going over the edge */
19639                 if ((uintptr_t)next_disp_p >= footprint_edge) {
19640                         goto over_the_edge;
19641                 }
19642                 /* store this dispostion */
19643                 *next_disp_p = (unsigned char) disp;
19644                 footprint_region->cfr_num_pages++;
19645
19646                 if (disp != 0) {
19647                         /* non-zero disp: break the current zero streak */
19648                         footprint_header->cf_last_zeroes = 0;
19649                         /* done */
19650                         continue;
19651                 }
19652
19653                 /* zero disp: add to the current streak of zeroes */
19654                 footprint_header->cf_last_zeroes++;
19655                 if ((footprint_header->cf_last_zeroes +
19656                     roundup((footprint_region->cfr_num_pages -
19657                     footprint_header->cf_last_zeroes) &
19658                     (sizeof(int) - 1),
19659                     sizeof(int))) <
19660                     (sizeof(*footprint_header))) {
19661                         /*
19662                          * There are not enough trailing "zero" dispositions
19663                          * (+ the extra padding we would need for the previous
19664                          * region); creating a new region would not save space
19665                          * at this point, so let's keep this "zero" disposition
19666                          * in this region and reconsider later.
19667                          */
19668                         continue;
19669                 }
19670                 /*
19671                  * Create a new region to avoid having too many consecutive
19672                  * "zero" dispositions.
19673                  */
19674                 new_footprint_region =
19675                     vm_map_corpse_footprint_new_region(footprint_header);
19676                 if (new_footprint_region == NULL) {
19677                         goto over_the_edge;
19678                 }
19679                 footprint_region = new_footprint_region;
19680                 /* initialize the new region as empty ... */
19681                 footprint_region->cfr_num_pages = 0;
19682                 /* ... and skip this "zero" disp */
19683                 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19684         }
19685
19686         return KERN_SUCCESS;
19687
19688 over_the_edge:
19689 //      printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19690         vm_map_corpse_footprint_full++;
19691         return KERN_RESOURCE_SHORTAGE;
19692 }
19693
19694 /*
19695  * vm_map_corpse_footprint_collect_done:
19696  *      completes the footprint collection by getting rid of any remaining
19697  *      trailing "zero" dispositions and trimming the unused part of the
19698  *      kernel buffer
19699  */
19700 void
19701 vm_map_corpse_footprint_collect_done(
19702         vm_map_t        new_map)
19703 {
19704         struct vm_map_corpse_footprint_header *footprint_header;
19705         struct vm_map_corpse_footprint_region *footprint_region;
19706         vm_size_t       buf_size, actual_size;
19707         kern_return_t   kr;
19708
19709         assert(new_map->has_corpse_footprint);
19710         if (!new_map->has_corpse_footprint ||
19711             new_map->vmmap_corpse_footprint == NULL) {
19712                 return;
19713         }
19714
19715         footprint_header = (struct vm_map_corpse_footprint_header *)
19716             new_map->vmmap_corpse_footprint;
19717         buf_size = footprint_header->cf_size;
19718
19719         footprint_region = (struct vm_map_corpse_footprint_region *)
19720             ((char *)footprint_header +
19721             footprint_header->cf_last_region);
19722
19723         /* get rid of trailing zeroes in last region */
19724         assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19725         footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19726         footprint_header->cf_last_zeroes = 0;
19727
19728         actual_size = (vm_size_t)(footprint_header->cf_last_region +
19729             sizeof(*footprint_region) +
19730             footprint_region->cfr_num_pages);
19731
19732 //      printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19733         vm_map_corpse_footprint_size_avg =
19734             (((vm_map_corpse_footprint_size_avg *
19735             vm_map_corpse_footprint_count) +
19736             actual_size) /
19737             (vm_map_corpse_footprint_count + 1));
19738         vm_map_corpse_footprint_count++;
19739         if (actual_size > vm_map_corpse_footprint_size_max) {
19740                 vm_map_corpse_footprint_size_max = actual_size;
19741         }
19742
19743         actual_size = round_page(actual_size);
19744         if (buf_size > actual_size) {
19745                 kr = vm_deallocate(kernel_map,
19746                     ((vm_address_t)footprint_header +
19747                     actual_size +
19748                     PAGE_SIZE),                 /* trailing guard page */
19749                     (buf_size - actual_size));
19750                 assertf(kr == KERN_SUCCESS,
19751                     "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19752                     footprint_header,
19753                     (uint64_t) buf_size,
19754                     (uint64_t) actual_size,
19755                     kr);
19756                 kr = vm_protect(kernel_map,
19757                     ((vm_address_t)footprint_header +
19758                     actual_size),
19759                     PAGE_SIZE,
19760                     FALSE,             /* set_maximum */
19761                     VM_PROT_NONE);
19762                 assertf(kr == KERN_SUCCESS,
19763                     "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19764                     footprint_header,
19765                     (uint64_t) buf_size,
19766                     (uint64_t) actual_size,
19767                     kr);
19768         }
19769
19770         footprint_header->cf_size = actual_size;
19771 }
19772
19773 /*
19774  * vm_map_corpse_footprint_query_page_info:
19775  *      retrieves the disposition of the page at virtual address "vaddr"
19776  *      in the forked corpse's VM map
19777  *
19778  * This is the equivalent of pmap_query_page_info() for a forked corpse.
19779  */
19780 kern_return_t
19781 vm_map_corpse_footprint_query_page_info(
19782         vm_map_t        map,
19783         vm_map_offset_t va,
19784         int             *disp)
19785 {
19786         struct vm_map_corpse_footprint_header *footprint_header;
19787         struct vm_map_corpse_footprint_region *footprint_region;
19788         uint32_t        footprint_region_offset;
19789         vm_map_offset_t region_start, region_end;
19790         int             disp_idx;
19791         kern_return_t   kr;
19792
19793         if (!map->has_corpse_footprint) {
19794                 *disp = 0;
19795                 kr = KERN_INVALID_ARGUMENT;
19796                 goto done;
19797         }
19798
19799         footprint_header = map->vmmap_corpse_footprint;
19800         if (footprint_header == NULL) {
19801                 *disp = 0;
19802 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19803                 kr = KERN_INVALID_ARGUMENT;
19804                 goto done;
19805         }
19806
19807         /* start looking at the hint ("cf_hint_region") */
19808         footprint_region_offset = footprint_header->cf_hint_region;
19809
19810 lookup_again:
19811         if (footprint_region_offset < sizeof(*footprint_header)) {
19812                 /* hint too low: start from 1st region */
19813                 footprint_region_offset = sizeof(*footprint_header);
19814         }
19815         if (footprint_region_offset >= footprint_header->cf_last_region) {
19816                 /* hint too high: re-start from 1st region */
19817                 footprint_region_offset = sizeof(*footprint_header);
19818         }
19819         footprint_region = (struct vm_map_corpse_footprint_region *)
19820             ((char *)footprint_header + footprint_region_offset);
19821         region_start = footprint_region->cfr_vaddr;
19822         region_end = (region_start +
19823             ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19824             PAGE_SIZE));
19825         if (va < region_start &&
19826             footprint_region_offset != sizeof(*footprint_header)) {
19827                 /* our range starts before the hint region */
19828
19829                 /* reset the hint (in a racy way...) */
19830                 footprint_header->cf_hint_region = sizeof(*footprint_header);
19831                 /* lookup "va" again from 1st region */
19832                 footprint_region_offset = sizeof(*footprint_header);
19833                 goto lookup_again;
19834         }
19835
19836         while (va >= region_end) {
19837                 if (footprint_region_offset >= footprint_header->cf_last_region) {
19838                         break;
19839                 }
19840                 /* skip the region's header */
19841                 footprint_region_offset += sizeof(*footprint_region);
19842                 /* skip the region's page dispositions */
19843                 footprint_region_offset += footprint_region->cfr_num_pages;
19844                 /* align to next word boundary */
19845                 footprint_region_offset =
19846                     roundup(footprint_region_offset,
19847                     sizeof(int));
19848                 footprint_region = (struct vm_map_corpse_footprint_region *)
19849                     ((char *)footprint_header + footprint_region_offset);
19850                 region_start = footprint_region->cfr_vaddr;
19851                 region_end = (region_start +
19852                     ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19853                     PAGE_SIZE));
19854         }
19855         if (va < region_start || va >= region_end) {
19856                 /* page not found */
19857                 *disp = 0;
19858 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19859                 kr = KERN_SUCCESS;
19860                 goto done;
19861         }
19862
19863         /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19864         footprint_header->cf_hint_region = footprint_region_offset;
19865
19866         /* get page disposition for "va" in this region */
19867         disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19868         *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19869
19870         kr = KERN_SUCCESS;
19871 done:
19872 //      if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19873         /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19874         DTRACE_VM4(footprint_query_page_info,
19875             vm_map_t, map,
19876             vm_map_offset_t, va,
19877             int, *disp,
19878             kern_return_t, kr);
19879
19880         return kr;
19881 }
19882
19883
19884 static void
19885 vm_map_corpse_footprint_destroy(
19886         vm_map_t        map)
19887 {
19888         if (map->has_corpse_footprint &&
19889             map->vmmap_corpse_footprint != 0) {
19890                 struct vm_map_corpse_footprint_header *footprint_header;
19891                 vm_size_t buf_size;
19892                 kern_return_t kr;
19893
19894                 footprint_header = map->vmmap_corpse_footprint;
19895                 buf_size = footprint_header->cf_size;
19896                 kr = vm_deallocate(kernel_map,
19897                     (vm_offset_t) map->vmmap_corpse_footprint,
19898                     ((vm_size_t) buf_size
19899                     + PAGE_SIZE));                 /* trailing guard page */
19900                 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19901                 map->vmmap_corpse_footprint = 0;
19902                 map->has_corpse_footprint = FALSE;
19903         }
19904 }
19905
19906 /*
19907  * vm_map_copy_footprint_ledgers:
19908  *      copies any ledger that's relevant to the memory footprint of "old_task"
19909  *      into the forked corpse's task ("new_task")
19910  */
19911 void
19912 vm_map_copy_footprint_ledgers(
19913         task_t  old_task,
19914         task_t  new_task)
19915 {
19916         vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19917         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19918         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19919         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19920         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19921         vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19922         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19923         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19924         vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19925         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19926         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
19927         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19928         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19929         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19930         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19931         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19932         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19933         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19934         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
19935         vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19936 }
19937
19938 /*
19939  * vm_map_copy_ledger:
19940  *      copy a single ledger from "old_task" to "new_task"
19941  */
19942 void
19943 vm_map_copy_ledger(
19944         task_t  old_task,
19945         task_t  new_task,
19946         int     ledger_entry)
19947 {
19948         ledger_amount_t old_balance, new_balance, delta;
19949
19950         assert(new_task->map->has_corpse_footprint);
19951         if (!new_task->map->has_corpse_footprint) {
19952                 return;
19953         }
19954
19955         /* turn off sanity checks for the ledger we're about to mess with */
19956         ledger_disable_panic_on_negative(new_task->ledger,
19957             ledger_entry);
19958
19959         /* adjust "new_task" to match "old_task" */
19960         ledger_get_balance(old_task->ledger,
19961             ledger_entry,
19962             &old_balance);
19963         ledger_get_balance(new_task->ledger,
19964             ledger_entry,
19965             &new_balance);
19966         if (new_balance == old_balance) {
19967                 /* new == old: done */
19968         } else if (new_balance > old_balance) {
19969                 /* new > old ==> new -= new - old */
19970                 delta = new_balance - old_balance;
19971                 ledger_debit(new_task->ledger,
19972                     ledger_entry,
19973                     delta);
19974         } else {
19975                 /* new < old ==> new += old - new */
19976                 delta = old_balance - new_balance;
19977                 ledger_credit(new_task->ledger,
19978                     ledger_entry,
19979                     delta);
19980         }
19981 }
19982
19983 #if MACH_ASSERT
19984
19985 extern int pmap_ledgers_panic;
19986 extern int pmap_ledgers_panic_leeway;
19987
19988 #define LEDGER_DRIFT(__LEDGER)                    \
19989         int             __LEDGER##_over;          \
19990         ledger_amount_t __LEDGER##_over_total;    \
19991         ledger_amount_t __LEDGER##_over_max;      \
19992         int             __LEDGER##_under;         \
19993         ledger_amount_t __LEDGER##_under_total;   \
19994         ledger_amount_t __LEDGER##_under_max
19995
19996 struct {
19997         uint64_t        num_pmaps_checked;
19998
19999         LEDGER_DRIFT(phys_footprint);
20000         LEDGER_DRIFT(internal);
20001         LEDGER_DRIFT(internal_compressed);
20002         LEDGER_DRIFT(iokit_mapped);
20003         LEDGER_DRIFT(alternate_accounting);
20004         LEDGER_DRIFT(alternate_accounting_compressed);
20005         LEDGER_DRIFT(page_table);
20006         LEDGER_DRIFT(purgeable_volatile);
20007         LEDGER_DRIFT(purgeable_nonvolatile);
20008         LEDGER_DRIFT(purgeable_volatile_compressed);
20009         LEDGER_DRIFT(purgeable_nonvolatile_compressed);
20010         LEDGER_DRIFT(tagged_nofootprint);
20011         LEDGER_DRIFT(tagged_footprint);
20012         LEDGER_DRIFT(tagged_nofootprint_compressed);
20013         LEDGER_DRIFT(tagged_footprint_compressed);
20014         LEDGER_DRIFT(network_volatile);
20015         LEDGER_DRIFT(network_nonvolatile);
20016         LEDGER_DRIFT(network_volatile_compressed);
20017         LEDGER_DRIFT(network_nonvolatile_compressed);
20018         LEDGER_DRIFT(media_nofootprint);
20019         LEDGER_DRIFT(media_footprint);
20020         LEDGER_DRIFT(media_nofootprint_compressed);
20021         LEDGER_DRIFT(media_footprint_compressed);
20022         LEDGER_DRIFT(graphics_nofootprint);
20023         LEDGER_DRIFT(graphics_footprint);
20024         LEDGER_DRIFT(graphics_nofootprint_compressed);
20025         LEDGER_DRIFT(graphics_footprint_compressed);
20026         LEDGER_DRIFT(neural_nofootprint);
20027         LEDGER_DRIFT(neural_footprint);
20028         LEDGER_DRIFT(neural_nofootprint_compressed);
20029         LEDGER_DRIFT(neural_footprint_compressed);
20030 } pmap_ledgers_drift;
20031
20032 void
20033 vm_map_pmap_check_ledgers(
20034         pmap_t          pmap,
20035         ledger_t        ledger,
20036         int             pid,
20037         char            *procname)
20038 {
20039         ledger_amount_t bal;
20040         boolean_t       do_panic;
20041
20042         do_panic = FALSE;
20043
20044         pmap_ledgers_drift.num_pmaps_checked++;
20045
20046 #define LEDGER_CHECK_BALANCE(__LEDGER)                                  \
20047 MACRO_BEGIN                                                             \
20048         int panic_on_negative = TRUE;                                   \
20049         ledger_get_balance(ledger,                                      \
20050                            task_ledgers.__LEDGER,                       \
20051                            &bal);                                       \
20052         ledger_get_panic_on_negative(ledger,                            \
20053                                      task_ledgers.__LEDGER,             \
20054                                      &panic_on_negative);               \
20055         if (bal != 0) {                                                 \
20056                 if (panic_on_negative ||                                \
20057                     (pmap_ledgers_panic &&                              \
20058                      pmap_ledgers_panic_leeway > 0 &&                   \
20059                      (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) ||  \
20060                       bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20061                         do_panic = TRUE;                                \
20062                 }                                                       \
20063                 printf("LEDGER BALANCE proc %d (%s) "                   \
20064                        "\"%s\" = %lld\n",                               \
20065                        pid, procname, #__LEDGER, bal);                  \
20066                 if (bal > 0) {                                          \
20067                         pmap_ledgers_drift.__LEDGER##_over++;           \
20068                         pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20069                         if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20070                                 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20071                         }                                               \
20072                 } else if (bal < 0) {                                   \
20073                         pmap_ledgers_drift.__LEDGER##_under++;          \
20074                         pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20075                         if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20076                                 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20077                         }                                               \
20078                 }                                                       \
20079         }                                                               \
20080 MACRO_END
20081
20082         LEDGER_CHECK_BALANCE(phys_footprint);
20083         LEDGER_CHECK_BALANCE(internal);
20084         LEDGER_CHECK_BALANCE(internal_compressed);
20085         LEDGER_CHECK_BALANCE(iokit_mapped);
20086         LEDGER_CHECK_BALANCE(alternate_accounting);
20087         LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20088         LEDGER_CHECK_BALANCE(page_table);
20089         LEDGER_CHECK_BALANCE(purgeable_volatile);
20090         LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20091         LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20092         LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20093         LEDGER_CHECK_BALANCE(tagged_nofootprint);
20094         LEDGER_CHECK_BALANCE(tagged_footprint);
20095         LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20096         LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20097         LEDGER_CHECK_BALANCE(network_volatile);
20098         LEDGER_CHECK_BALANCE(network_nonvolatile);
20099         LEDGER_CHECK_BALANCE(network_volatile_compressed);
20100         LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20101         LEDGER_CHECK_BALANCE(media_nofootprint);
20102         LEDGER_CHECK_BALANCE(media_footprint);
20103         LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20104         LEDGER_CHECK_BALANCE(media_footprint_compressed);
20105         LEDGER_CHECK_BALANCE(graphics_nofootprint);
20106         LEDGER_CHECK_BALANCE(graphics_footprint);
20107         LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20108         LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20109         LEDGER_CHECK_BALANCE(neural_nofootprint);
20110         LEDGER_CHECK_BALANCE(neural_footprint);
20111         LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20112         LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20113
20114         if (do_panic) {
20115                 if (pmap_ledgers_panic) {
20116                         panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20117                             pmap, pid, procname);
20118                 } else {
20119                         printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20120                             pmap, pid, procname);
20121                 }
20122         }
20123 }
20124 #endif /* MACH_ASSERT */