osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/exc_guard.h>
  88 #include <kern/kalloc.h>
  89 #include <kern/zalloc.h>
  90
  91 #include <vm/cpm.h>
  92 #include <vm/vm_compressor.h>
  93 #include <vm/vm_compressor_pager.h>
  94 #include <vm/vm_init.h>
  95 #include <vm/vm_fault.h>
  96 #include <vm/vm_map.h>
  97 #include <vm/vm_object.h>
  98 #include <vm/vm_page.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/pmap.h>
 101 #include <vm/vm_kern.h>
 102 #include <ipc/ipc_port.h>
 103 #include <kern/sched_prim.h>
 104 #include <kern/misc_protos.h>
 105
 106 #include <mach/vm_map_server.h>
 107 #include <mach/mach_host_server.h>
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_purgeable_internal.h>
 110
 111 #include <vm/vm_protos.h>
 112 #include <vm/vm_shared_region.h>
 113 #include <vm/vm_map_store.h>
 114
 115 #include <san/kasan.h>
 116
 117 #include <sys/codesign.h>
 118 #include <libkern/section_keywords.h>
 119 #if DEVELOPMENT || DEBUG
 120 extern int proc_selfcsflags(void);
 121 #if CONFIG_EMBEDDED
 122 extern int panic_on_unsigned_execute;
 123 #endif /* CONFIG_EMBEDDED */
 124 #endif /* DEVELOPMENT || DEBUG */
 125
 126 #if __arm64__
 127 extern const int fourk_binary_compatibility_unsafe;
 128 extern const int fourk_binary_compatibility_allow_wx;
 129 #endif /* __arm64__ */
 130 extern int proc_selfpid(void);
 131 extern char *proc_name_address(void *p);
 132
 133 #if VM_MAP_DEBUG_APPLE_PROTECT
 134 int vm_map_debug_apple_protect = 0;
 135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 136 #if VM_MAP_DEBUG_FOURK
 137 int vm_map_debug_fourk = 0;
 138 #endif /* VM_MAP_DEBUG_FOURK */
 139
 140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
 141 int vm_map_executable_immutable_verbose = 0;
 142
 143 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
 144
 145 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 146 /* Internal prototypes
 147  */
 148
 149 static void vm_map_simplify_range(
 150         vm_map_t        map,
 151         vm_map_offset_t start,
 152         vm_map_offset_t end);   /* forward */
 153
 154 static boolean_t        vm_map_range_check(
 155         vm_map_t        map,
 156         vm_map_offset_t start,
 157         vm_map_offset_t end,
 158         vm_map_entry_t  *entry);
 159
 160 static vm_map_entry_t   _vm_map_entry_create(
 161         struct vm_map_header    *map_header, boolean_t map_locked);
 162
 163 static void             _vm_map_entry_dispose(
 164         struct vm_map_header    *map_header,
 165         vm_map_entry_t          entry);
 166
 167 static void             vm_map_pmap_enter(
 168         vm_map_t                map,
 169         vm_map_offset_t         addr,
 170         vm_map_offset_t         end_addr,
 171         vm_object_t             object,
 172         vm_object_offset_t      offset,
 173         vm_prot_t               protection);
 174
 175 static void             _vm_map_clip_end(
 176         struct vm_map_header    *map_header,
 177         vm_map_entry_t          entry,
 178         vm_map_offset_t         end);
 179
 180 static void             _vm_map_clip_start(
 181         struct vm_map_header    *map_header,
 182         vm_map_entry_t          entry,
 183         vm_map_offset_t         start);
 184
 185 static void             vm_map_entry_delete(
 186         vm_map_t        map,
 187         vm_map_entry_t  entry);
 188
 189 static kern_return_t    vm_map_delete(
 190         vm_map_t        map,
 191         vm_map_offset_t start,
 192         vm_map_offset_t end,
 193         int             flags,
 194         vm_map_t        zap_map);
 195
 196 static void             vm_map_copy_insert(
 197         vm_map_t        map,
 198         vm_map_entry_t  after_where,
 199         vm_map_copy_t   copy);
 200
 201 static kern_return_t    vm_map_copy_overwrite_unaligned(
 202         vm_map_t        dst_map,
 203         vm_map_entry_t  entry,
 204         vm_map_copy_t   copy,
 205         vm_map_address_t start,
 206         boolean_t       discard_on_success);
 207
 208 static kern_return_t    vm_map_copy_overwrite_aligned(
 209         vm_map_t        dst_map,
 210         vm_map_entry_t  tmp_entry,
 211         vm_map_copy_t   copy,
 212         vm_map_offset_t start,
 213         pmap_t          pmap);
 214
 215 static kern_return_t    vm_map_copyin_kernel_buffer(
 216         vm_map_t        src_map,
 217         vm_map_address_t src_addr,
 218         vm_map_size_t   len,
 219         boolean_t       src_destroy,
 220         vm_map_copy_t   *copy_result);  /* OUT */
 221
 222 static kern_return_t    vm_map_copyout_kernel_buffer(
 223         vm_map_t        map,
 224         vm_map_address_t *addr, /* IN/OUT */
 225         vm_map_copy_t   copy,
 226         vm_map_size_t   copy_size,
 227         boolean_t       overwrite,
 228         boolean_t       consume_on_success);
 229
 230 static void             vm_map_fork_share(
 231         vm_map_t        old_map,
 232         vm_map_entry_t  old_entry,
 233         vm_map_t        new_map);
 234
 235 static boolean_t        vm_map_fork_copy(
 236         vm_map_t        old_map,
 237         vm_map_entry_t  *old_entry_p,
 238         vm_map_t        new_map,
 239         int             vm_map_copyin_flags);
 240
 241 static kern_return_t    vm_map_wire_nested(
 242         vm_map_t                   map,
 243         vm_map_offset_t            start,
 244         vm_map_offset_t            end,
 245         vm_prot_t                  caller_prot,
 246         vm_tag_t                   tag,
 247         boolean_t                  user_wire,
 248         pmap_t                     map_pmap,
 249         vm_map_offset_t            pmap_addr,
 250         ppnum_t                    *physpage_p);
 251
 252 static kern_return_t    vm_map_unwire_nested(
 253         vm_map_t                   map,
 254         vm_map_offset_t            start,
 255         vm_map_offset_t            end,
 256         boolean_t                  user_wire,
 257         pmap_t                     map_pmap,
 258         vm_map_offset_t            pmap_addr);
 259
 260 static kern_return_t    vm_map_overwrite_submap_recurse(
 261         vm_map_t                   dst_map,
 262         vm_map_offset_t            dst_addr,
 263         vm_map_size_t              dst_size);
 264
 265 static kern_return_t    vm_map_copy_overwrite_nested(
 266         vm_map_t                   dst_map,
 267         vm_map_offset_t            dst_addr,
 268         vm_map_copy_t              copy,
 269         boolean_t                  interruptible,
 270         pmap_t                     pmap,
 271         boolean_t                  discard_on_success);
 272
 273 static kern_return_t    vm_map_remap_extract(
 274         vm_map_t                map,
 275         vm_map_offset_t         addr,
 276         vm_map_size_t           size,
 277         boolean_t               copy,
 278         struct vm_map_header    *map_header,
 279         vm_prot_t               *cur_protection,
 280         vm_prot_t               *max_protection,
 281         vm_inherit_t            inheritance,
 282         boolean_t               pageable,
 283         boolean_t               same_map,
 284         vm_map_kernel_flags_t   vmk_flags);
 285
 286 static kern_return_t    vm_map_remap_range_allocate(
 287         vm_map_t                map,
 288         vm_map_address_t        *address,
 289         vm_map_size_t           size,
 290         vm_map_offset_t         mask,
 291         int                     flags,
 292         vm_map_kernel_flags_t   vmk_flags,
 293         vm_tag_t                tag,
 294         vm_map_entry_t          *map_entry);
 295
 296 static void             vm_map_region_look_for_page(
 297         vm_map_t                   map,
 298         vm_map_offset_t            va,
 299         vm_object_t                object,
 300         vm_object_offset_t         offset,
 301         int                        max_refcnt,
 302         int                        depth,
 303         vm_region_extended_info_t  extended,
 304         mach_msg_type_number_t count);
 305
 306 static int              vm_map_region_count_obj_refs(
 307         vm_map_entry_t             entry,
 308         vm_object_t                object);
 309
 310
 311 static kern_return_t    vm_map_willneed(
 312         vm_map_t        map,
 313         vm_map_offset_t start,
 314         vm_map_offset_t end);
 315
 316 static kern_return_t    vm_map_reuse_pages(
 317         vm_map_t        map,
 318         vm_map_offset_t start,
 319         vm_map_offset_t end);
 320
 321 static kern_return_t    vm_map_reusable_pages(
 322         vm_map_t        map,
 323         vm_map_offset_t start,
 324         vm_map_offset_t end);
 325
 326 static kern_return_t    vm_map_can_reuse(
 327         vm_map_t        map,
 328         vm_map_offset_t start,
 329         vm_map_offset_t end);
 330
 331 #if MACH_ASSERT
 332 static kern_return_t    vm_map_pageout(
 333         vm_map_t        map,
 334         vm_map_offset_t start,
 335         vm_map_offset_t end);
 336 #endif /* MACH_ASSERT */
 337
 338 static void             vm_map_corpse_footprint_destroy(
 339         vm_map_t        map);
 340
 341 pid_t find_largest_process_vm_map_entries(void);
 342
 343 /*
 344  * Macros to copy a vm_map_entry. We must be careful to correctly
 345  * manage the wired page count. vm_map_entry_copy() creates a new
 346  * map entry to the same memory - the wired count in the new entry
 347  * must be set to zero. vm_map_entry_copy_full() creates a new
 348  * entry that is identical to the old entry.  This preserves the
 349  * wire count; it's used for map splitting and zone changing in
 350  * vm_map_copyout.
 351  */
 352
 353 #if CONFIG_EMBEDDED
 354
 355 /*
 356  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 357  * But for security reasons on embedded platforms, we don't want the
 358  * new mapping to be "used for jit", so we always reset the flag here.
 359  * Same for "pmap_cs_associated".
 360  */
 361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD)         \
 362 MACRO_BEGIN                                             \
 363         (NEW)->used_for_jit = FALSE;                    \
 364         (NEW)->pmap_cs_associated = FALSE;                              \
 365 MACRO_END
 366
 367 #else /* CONFIG_EMBEDDED */
 368
 369 /*
 370  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 371  * On macOS, the new mapping can be "used for jit".
 372  */
 373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD)                         \
 374 MACRO_BEGIN                                                             \
 375         assert((NEW)->used_for_jit == (OLD)->used_for_jit);             \
 376         assert((NEW)->pmap_cs_associated == FALSE);                             \
 377 MACRO_END
 378
 379 #endif /* CONFIG_EMBEDDED */
 380
 381 #define vm_map_entry_copy(NEW, OLD)      \
 382 MACRO_BEGIN                             \
 383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 384         *(NEW) = *(OLD);                \
 385         (NEW)->is_shared = FALSE;       \
 386         (NEW)->needs_wakeup = FALSE;    \
 387         (NEW)->in_transition = FALSE;   \
 388         (NEW)->wired_count = 0;         \
 389         (NEW)->user_wired_count = 0;    \
 390         (NEW)->permanent = FALSE;       \
 391         VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD));    \
 392         (NEW)->from_reserved_zone = _vmec_reserved;     \
 393         if ((NEW)->iokit_acct) {                        \
 394              assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
 395              (NEW)->iokit_acct = FALSE;                 \
 396              (NEW)->use_pmap = TRUE;                    \
 397         }                                               \
 398         (NEW)->vme_resilient_codesign = FALSE; \
 399         (NEW)->vme_resilient_media = FALSE;     \
 400         (NEW)->vme_atomic = FALSE;      \
 401         (NEW)->vme_no_copy_on_read = FALSE;     \
 402 MACRO_END
 403
 404 #define vm_map_entry_copy_full(NEW, OLD)                 \
 405 MACRO_BEGIN                                             \
 406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 407 (*(NEW) = *(OLD));                                      \
 408 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 409 MACRO_END
 410
 411 /*
 412  * Normal lock_read_to_write() returns FALSE/0 on failure.
 413  * These functions evaluate to zero on success and non-zero value on failure.
 414  */
 415 __attribute__((always_inline))
 416 int
 417 vm_map_lock_read_to_write(vm_map_t map)
 418 {
 419         if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
 420                 DTRACE_VM(vm_map_lock_upgrade);
 421                 return 0;
 422         }
 423         return 1;
 424 }
 425
 426 __attribute__((always_inline))
 427 boolean_t
 428 vm_map_try_lock(vm_map_t map)
 429 {
 430         if (lck_rw_try_lock_exclusive(&(map)->lock)) {
 431                 DTRACE_VM(vm_map_lock_w);
 432                 return TRUE;
 433         }
 434         return FALSE;
 435 }
 436
 437 __attribute__((always_inline))
 438 boolean_t
 439 vm_map_try_lock_read(vm_map_t map)
 440 {
 441         if (lck_rw_try_lock_shared(&(map)->lock)) {
 442                 DTRACE_VM(vm_map_lock_r);
 443                 return TRUE;
 444         }
 445         return FALSE;
 446 }
 447
 448 /*
 449  *      Decide if we want to allow processes to execute from their data or stack areas.
 450  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 451  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 452  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 453  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 454  *      specific pmap files since the default behavior varies according to architecture.  The
 455  *      main reason it varies is because of the need to provide binary compatibility with old
 456  *      applications that were written before these restrictions came into being.  In the old
 457  *      days, an app could execute anything it could read, but this has slowly been tightened
 458  *      up over time.  The default behavior is:
 459  *
 460  *      32-bit PPC apps         may execute from both stack and data areas
 461  *      32-bit Intel apps       may exeucte from data areas but not stack
 462  *      64-bit PPC/Intel apps   may not execute from either data or stack
 463  *
 464  *      An application on any architecture may override these defaults by explicitly
 465  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 466  *      system call.  This code here just determines what happens when an app tries to
 467  *      execute from a page that lacks execute permission.
 468  *
 469  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 470  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 471  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 472  *      execution from data areas for a particular binary even if the arch normally permits it. As
 473  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 474  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 475  *      are not all NX-safe.
 476  */
 477
 478 extern int allow_data_exec, allow_stack_exec;
 479
 480 int
 481 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 482 {
 483         int current_abi;
 484
 485         if (map->pmap == kernel_pmap) {
 486                 return FALSE;
 487         }
 488
 489         /*
 490          * Determine if the app is running in 32 or 64 bit mode.
 491          */
 492
 493         if (vm_map_is_64bit(map)) {
 494                 current_abi = VM_ABI_64;
 495         } else {
 496                 current_abi = VM_ABI_32;
 497         }
 498
 499         /*
 500          * Determine if we should allow the execution based on whether it's a
 501          * stack or data area and the current architecture.
 502          */
 503
 504         if (user_tag == VM_MEMORY_STACK) {
 505                 return allow_stack_exec & current_abi;
 506         }
 507
 508         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 509 }
 510
 511
 512 /*
 513  *      Virtual memory maps provide for the mapping, protection,
 514  *      and sharing of virtual memory objects.  In addition,
 515  *      this module provides for an efficient virtual copy of
 516  *      memory from one map to another.
 517  *
 518  *      Synchronization is required prior to most operations.
 519  *
 520  *      Maps consist of an ordered doubly-linked list of simple
 521  *      entries; a single hint is used to speed up lookups.
 522  *
 523  *      Sharing maps have been deleted from this version of Mach.
 524  *      All shared objects are now mapped directly into the respective
 525  *      maps.  This requires a change in the copy on write strategy;
 526  *      the asymmetric (delayed) strategy is used for shared temporary
 527  *      objects instead of the symmetric (shadow) strategy.  All maps
 528  *      are now "top level" maps (either task map, kernel map or submap
 529  *      of the kernel map).
 530  *
 531  *      Since portions of maps are specified by start/end addreses,
 532  *      which may not align with existing map entries, all
 533  *      routines merely "clip" entries to these start/end values.
 534  *      [That is, an entry is split into two, bordering at a
 535  *      start or end value.]  Note that these clippings may not
 536  *      always be necessary (as the two resulting entries are then
 537  *      not changed); however, the clipping is done for convenience.
 538  *      No attempt is currently made to "glue back together" two
 539  *      abutting entries.
 540  *
 541  *      The symmetric (shadow) copy strategy implements virtual copy
 542  *      by copying VM object references from one map to
 543  *      another, and then marking both regions as copy-on-write.
 544  *      It is important to note that only one writeable reference
 545  *      to a VM object region exists in any map when this strategy
 546  *      is used -- this means that shadow object creation can be
 547  *      delayed until a write operation occurs.  The symmetric (delayed)
 548  *      strategy allows multiple maps to have writeable references to
 549  *      the same region of a vm object, and hence cannot delay creating
 550  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 551  *      Copying of permanent objects is completely different; see
 552  *      vm_object_copy_strategically() in vm_object.c.
 553  */
 554
 555 static zone_t   vm_map_zone;                            /* zone for vm_map structures */
 556 zone_t                  vm_map_entry_zone;                      /* zone for vm_map_entry structures */
 557 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking allocations */
 558 static zone_t   vm_map_copy_zone;                       /* zone for vm_map_copy structures */
 559 zone_t                  vm_map_holes_zone;                      /* zone for vm map holes (vm_map_links) structures */
 560
 561
 562 /*
 563  *      Placeholder object for submap operations.  This object is dropped
 564  *      into the range by a call to vm_map_find, and removed when
 565  *      vm_map_submap creates the submap.
 566  */
 567
 568 vm_object_t     vm_submap_object;
 569
 570 static void             *map_data;
 571 static vm_size_t        map_data_size;
 572 static void             *kentry_data;
 573 static vm_size_t        kentry_data_size;
 574 static void             *map_holes_data;
 575 static vm_size_t        map_holes_data_size;
 576
 577 #if CONFIG_EMBEDDED
 578 #define         NO_COALESCE_LIMIT  0
 579 #else
 580 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 581 #endif
 582
 583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 584 unsigned int not_in_kdp = 1;
 585
 586 unsigned int vm_map_set_cache_attr_count = 0;
 587
 588 kern_return_t
 589 vm_map_set_cache_attr(
 590         vm_map_t        map,
 591         vm_map_offset_t va)
 592 {
 593         vm_map_entry_t  map_entry;
 594         vm_object_t     object;
 595         kern_return_t   kr = KERN_SUCCESS;
 596
 597         vm_map_lock_read(map);
 598
 599         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 600             map_entry->is_sub_map) {
 601                 /*
 602                  * that memory is not properly mapped
 603                  */
 604                 kr = KERN_INVALID_ARGUMENT;
 605                 goto done;
 606         }
 607         object = VME_OBJECT(map_entry);
 608
 609         if (object == VM_OBJECT_NULL) {
 610                 /*
 611                  * there should be a VM object here at this point
 612                  */
 613                 kr = KERN_INVALID_ARGUMENT;
 614                 goto done;
 615         }
 616         vm_object_lock(object);
 617         object->set_cache_attr = TRUE;
 618         vm_object_unlock(object);
 619
 620         vm_map_set_cache_attr_count++;
 621 done:
 622         vm_map_unlock_read(map);
 623
 624         return kr;
 625 }
 626
 627
 628 #if CONFIG_CODE_DECRYPTION
 629 /*
 630  * vm_map_apple_protected:
 631  * This remaps the requested part of the object with an object backed by
 632  * the decrypting pager.
 633  * crypt_info contains entry points and session data for the crypt module.
 634  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 635  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 636  */
 637 kern_return_t
 638 vm_map_apple_protected(
 639         vm_map_t                map,
 640         vm_map_offset_t         start,
 641         vm_map_offset_t         end,
 642         vm_object_offset_t      crypto_backing_offset,
 643         struct pager_crypt_info *crypt_info)
 644 {
 645         boolean_t       map_locked;
 646         kern_return_t   kr;
 647         vm_map_entry_t  map_entry;
 648         struct vm_map_entry tmp_entry;
 649         memory_object_t unprotected_mem_obj;
 650         vm_object_t     protected_object;
 651         vm_map_offset_t map_addr;
 652         vm_map_offset_t start_aligned, end_aligned;
 653         vm_object_offset_t      crypto_start, crypto_end;
 654         int             vm_flags;
 655         vm_map_kernel_flags_t vmk_flags;
 656
 657         vm_flags = 0;
 658         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 659
 660         map_locked = FALSE;
 661         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 662
 663         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 664         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 665         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 666         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 667
 668 #if __arm64__
 669         /*
 670          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 671          * so we might have to loop and establish up to 3 mappings:
 672          *
 673          * + the first 16K-page, which might overlap with the previous
 674          *   4K-aligned mapping,
 675          * + the center,
 676          * + the last 16K-page, which might overlap with the next
 677          *   4K-aligned mapping.
 678          * Each of these mapping might be backed by a vnode pager (if
 679          * properly page-aligned) or a "fourk_pager", itself backed by a
 680          * vnode pager (if 4K-aligned but not page-aligned).
 681          */
 682 #endif /* __arm64__ */
 683
 684         map_addr = start_aligned;
 685         for (map_addr = start_aligned;
 686             map_addr < end;
 687             map_addr = tmp_entry.vme_end) {
 688                 vm_map_lock(map);
 689                 map_locked = TRUE;
 690
 691                 /* lookup the protected VM object */
 692                 if (!vm_map_lookup_entry(map,
 693                     map_addr,
 694                     &map_entry) ||
 695                     map_entry->is_sub_map ||
 696                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 697                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 698                         /* that memory is not properly mapped */
 699                         kr = KERN_INVALID_ARGUMENT;
 700                         goto done;
 701                 }
 702
 703                 /* get the protected object to be decrypted */
 704                 protected_object = VME_OBJECT(map_entry);
 705                 if (protected_object == VM_OBJECT_NULL) {
 706                         /* there should be a VM object here at this point */
 707                         kr = KERN_INVALID_ARGUMENT;
 708                         goto done;
 709                 }
 710                 /* ensure protected object stays alive while map is unlocked */
 711                 vm_object_reference(protected_object);
 712
 713                 /* limit the map entry to the area we want to cover */
 714                 vm_map_clip_start(map, map_entry, start_aligned);
 715                 vm_map_clip_end(map, map_entry, end_aligned);
 716
 717                 tmp_entry = *map_entry;
 718                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 719                 vm_map_unlock(map);
 720                 map_locked = FALSE;
 721
 722                 /*
 723                  * This map entry might be only partially encrypted
 724                  * (if not fully "page-aligned").
 725                  */
 726                 crypto_start = 0;
 727                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 728                 if (tmp_entry.vme_start < start) {
 729                         if (tmp_entry.vme_start != start_aligned) {
 730                                 kr = KERN_INVALID_ADDRESS;
 731                         }
 732                         crypto_start += (start - tmp_entry.vme_start);
 733                 }
 734                 if (tmp_entry.vme_end > end) {
 735                         if (tmp_entry.vme_end != end_aligned) {
 736                                 kr = KERN_INVALID_ADDRESS;
 737                         }
 738                         crypto_end -= (tmp_entry.vme_end - end);
 739                 }
 740
 741                 /*
 742                  * This "extra backing offset" is needed to get the decryption
 743                  * routine to use the right key.  It adjusts for the possibly
 744                  * relative offset of an interposed "4K" pager...
 745                  */
 746                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 747                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 748                 }
 749
 750                 /*
 751                  * Lookup (and create if necessary) the protected memory object
 752                  * matching that VM object.
 753                  * If successful, this also grabs a reference on the memory object,
 754                  * to guarantee that it doesn't go away before we get a chance to map
 755                  * it.
 756                  */
 757                 unprotected_mem_obj = apple_protect_pager_setup(
 758                         protected_object,
 759                         VME_OFFSET(&tmp_entry),
 760                         crypto_backing_offset,
 761                         crypt_info,
 762                         crypto_start,
 763                         crypto_end);
 764
 765                 /* release extra ref on protected object */
 766                 vm_object_deallocate(protected_object);
 767
 768                 if (unprotected_mem_obj == NULL) {
 769                         kr = KERN_FAILURE;
 770                         goto done;
 771                 }
 772
 773                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 774                 /* can overwrite an immutable mapping */
 775                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 776 #if __arm64__
 777                 if (tmp_entry.used_for_jit &&
 778                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 779                     PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 780                     fourk_binary_compatibility_unsafe &&
 781                     fourk_binary_compatibility_allow_wx) {
 782                         printf("** FOURK_COMPAT [%d]: "
 783                             "allowing write+execute at 0x%llx\n",
 784                             proc_selfpid(), tmp_entry.vme_start);
 785                         vmk_flags.vmkf_map_jit = TRUE;
 786                 }
 787 #endif /* __arm64__ */
 788
 789                 /* map this memory object in place of the current one */
 790                 map_addr = tmp_entry.vme_start;
 791                 kr = vm_map_enter_mem_object(map,
 792                     &map_addr,
 793                     (tmp_entry.vme_end -
 794                     tmp_entry.vme_start),
 795                     (mach_vm_offset_t) 0,
 796                     vm_flags,
 797                     vmk_flags,
 798                     VM_KERN_MEMORY_NONE,
 799                     (ipc_port_t)(uintptr_t) unprotected_mem_obj,
 800                     0,
 801                     TRUE,
 802                     tmp_entry.protection,
 803                     tmp_entry.max_protection,
 804                     tmp_entry.inheritance);
 805                 assertf(kr == KERN_SUCCESS,
 806                     "kr = 0x%x\n", kr);
 807                 assertf(map_addr == tmp_entry.vme_start,
 808                     "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 809                     (uint64_t)map_addr,
 810                     (uint64_t) tmp_entry.vme_start,
 811                     &tmp_entry);
 812
 813 #if VM_MAP_DEBUG_APPLE_PROTECT
 814                 if (vm_map_debug_apple_protect) {
 815                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 816                             " backing:[object:%p,offset:0x%llx,"
 817                             "crypto_backing_offset:0x%llx,"
 818                             "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 819                             map,
 820                             (uint64_t) map_addr,
 821                             (uint64_t) (map_addr + (tmp_entry.vme_end -
 822                             tmp_entry.vme_start)),
 823                             unprotected_mem_obj,
 824                             protected_object,
 825                             VME_OFFSET(&tmp_entry),
 826                             crypto_backing_offset,
 827                             crypto_start,
 828                             crypto_end);
 829                 }
 830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 831
 832                 /*
 833                  * Release the reference obtained by
 834                  * apple_protect_pager_setup().
 835                  * The mapping (if it succeeded) is now holding a reference on
 836                  * the memory object.
 837                  */
 838                 memory_object_deallocate(unprotected_mem_obj);
 839                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 840
 841                 /* continue with next map entry */
 842                 crypto_backing_offset += (tmp_entry.vme_end -
 843                     tmp_entry.vme_start);
 844                 crypto_backing_offset -= crypto_start;
 845         }
 846         kr = KERN_SUCCESS;
 847
 848 done:
 849         if (map_locked) {
 850                 vm_map_unlock(map);
 851         }
 852         return kr;
 853 }
 854 #endif  /* CONFIG_CODE_DECRYPTION */
 855
 856
 857 lck_grp_t               vm_map_lck_grp;
 858 lck_grp_attr_t  vm_map_lck_grp_attr;
 859 lck_attr_t              vm_map_lck_attr;
 860 lck_attr_t              vm_map_lck_rw_attr;
 861
 862 #if CONFIG_EMBEDDED
 863 int malloc_no_cow = 1;
 864 #define VM_PROTECT_WX_FAIL 0
 865 #else /* CONFIG_EMBEDDED */
 866 int malloc_no_cow = 0;
 867 #define VM_PROTECT_WX_FAIL 1
 868 #endif /* CONFIG_EMBEDDED */
 869 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
 870 #if DEBUG
 871 int vm_check_map_sanity = 0;
 872 #endif
 873
 874 /*
 875  *      vm_map_init:
 876  *
 877  *      Initialize the vm_map module.  Must be called before
 878  *      any other vm_map routines.
 879  *
 880  *      Map and entry structures are allocated from zones -- we must
 881  *      initialize those zones.
 882  *
 883  *      There are three zones of interest:
 884  *
 885  *      vm_map_zone:            used to allocate maps.
 886  *      vm_map_entry_zone:      used to allocate map entries.
 887  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 888  *
 889  *      The kernel allocates map entries from a special zone that is initially
 890  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 891  *      the kernel to allocate more memory to a entry zone when it became
 892  *      empty since the very act of allocating memory implies the creation
 893  *      of a new entry.
 894  */
 895 void
 896 vm_map_init(
 897         void)
 898 {
 899         vm_size_t entry_zone_alloc_size;
 900         const char *mez_name = "VM map entries";
 901
 902         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
 903             PAGE_SIZE, "maps");
 904         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 905 #if     defined(__LP64__)
 906         entry_zone_alloc_size = PAGE_SIZE * 5;
 907 #else
 908         entry_zone_alloc_size = PAGE_SIZE * 6;
 909 #endif
 910         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 911             1024 * 1024, entry_zone_alloc_size,
 912             mez_name);
 913         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 914         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 915         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 916
 917         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 918             kentry_data_size * 64, kentry_data_size,
 919             "Reserved VM map entries");
 920         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 921         /* Don't quarantine because we always need elements available */
 922         zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
 923
 924         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 925             16 * 1024, PAGE_SIZE, "VM map copies");
 926         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 927
 928         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 929             16 * 1024, PAGE_SIZE, "VM map holes");
 930         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 931
 932         /*
 933          *      Cram the map and kentry zones with initial data.
 934          *      Set reserved_zone non-collectible to aid zone_gc().
 935          */
 936         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 937         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 938         zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
 939
 940         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 941         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 942         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 943         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 944         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 945         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 946         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 947
 948         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 949         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 950         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 951         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 952         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 953         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 954
 955         /*
 956          * Add the stolen memory to zones, adjust zone size and stolen counts.
 957          * zcram only up to the maximum number of pages for each zone chunk.
 958          */
 959         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 960
 961         const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
 962         for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
 963                 zcram(vm_map_entry_reserved_zone,
 964                     (vm_offset_t)kentry_data + off,
 965                     MIN(kentry_data_size - off, stride));
 966         }
 967         for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
 968                 zcram(vm_map_holes_zone,
 969                     (vm_offset_t)map_holes_data + off,
 970                     MIN(map_holes_data_size - off, stride));
 971         }
 972
 973         /*
 974          * Since these are covered by zones, remove them from stolen page accounting.
 975          */
 976         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 977
 978         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 979         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 980         lck_attr_setdefault(&vm_map_lck_attr);
 981
 982         lck_attr_setdefault(&vm_map_lck_rw_attr);
 983         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 984
 985 #if VM_MAP_DEBUG_APPLE_PROTECT
 986         PE_parse_boot_argn("vm_map_debug_apple_protect",
 987             &vm_map_debug_apple_protect,
 988             sizeof(vm_map_debug_apple_protect));
 989 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 990 #if VM_MAP_DEBUG_APPLE_FOURK
 991         PE_parse_boot_argn("vm_map_debug_fourk",
 992             &vm_map_debug_fourk,
 993             sizeof(vm_map_debug_fourk));
 994 #endif /* VM_MAP_DEBUG_FOURK */
 995         PE_parse_boot_argn("vm_map_executable_immutable",
 996             &vm_map_executable_immutable,
 997             sizeof(vm_map_executable_immutable));
 998         PE_parse_boot_argn("vm_map_executable_immutable_verbose",
 999             &vm_map_executable_immutable_verbose,
1000             sizeof(vm_map_executable_immutable_verbose));
1001
1002         PE_parse_boot_argn("malloc_no_cow",
1003             &malloc_no_cow,
1004             sizeof(malloc_no_cow));
1005         if (malloc_no_cow) {
1006                 vm_memory_malloc_no_cow_mask = 0ULL;
1007                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1008                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1009                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1010                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1011 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1014                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1015                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1016                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1017 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018                 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1019                     &vm_memory_malloc_no_cow_mask,
1020                     sizeof(vm_memory_malloc_no_cow_mask));
1021         }
1022
1023 #if DEBUG
1024         PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1025         if (vm_check_map_sanity) {
1026                 kprintf("VM sanity checking enabled\n");
1027         } else {
1028                 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1029         }
1030 #endif /* DEBUG */
1031 }
1032
1033 void
1034 vm_map_steal_memory(
1035         void)
1036 {
1037         uint32_t kentry_initial_pages;
1038
1039         map_data_size = round_page(10 * sizeof(struct _vm_map));
1040         map_data = pmap_steal_memory(map_data_size);
1041
1042         /*
1043          * kentry_initial_pages corresponds to the number of kernel map entries
1044          * required during bootstrap until the asynchronous replenishment
1045          * scheme is activated and/or entries are available from the general
1046          * map entry pool.
1047          */
1048 #if     defined(__LP64__)
1049         kentry_initial_pages = 10;
1050 #else
1051         kentry_initial_pages = 6;
1052 #endif
1053
1054 #if CONFIG_GZALLOC
1055         /* If using the guard allocator, reserve more memory for the kernel
1056          * reserved map entry pool.
1057          */
1058         if (gzalloc_enabled()) {
1059                 kentry_initial_pages *= 1024;
1060         }
1061 #endif
1062
1063         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1064         kentry_data = pmap_steal_memory(kentry_data_size);
1065
1066         map_holes_data_size = kentry_data_size;
1067         map_holes_data = pmap_steal_memory(map_holes_data_size);
1068 }
1069
1070 boolean_t vm_map_supports_hole_optimization = FALSE;
1071
1072 void
1073 vm_kernel_reserved_entry_init(void)
1074 {
1075         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
1076
1077         /*
1078          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1079          */
1080         zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
1081         vm_map_supports_hole_optimization = TRUE;
1082 }
1083
1084 void
1085 vm_map_disable_hole_optimization(vm_map_t map)
1086 {
1087         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1088
1089         if (map->holelistenabled) {
1090                 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1091
1092                 while (hole_entry != NULL) {
1093                         next_hole_entry = hole_entry->vme_next;
1094
1095                         hole_entry->vme_next = NULL;
1096                         hole_entry->vme_prev = NULL;
1097                         zfree(vm_map_holes_zone, hole_entry);
1098
1099                         if (next_hole_entry == head_entry) {
1100                                 hole_entry = NULL;
1101                         } else {
1102                                 hole_entry = next_hole_entry;
1103                         }
1104                 }
1105
1106                 map->holes_list = NULL;
1107                 map->holelistenabled = FALSE;
1108
1109                 map->first_free = vm_map_first_entry(map);
1110                 SAVE_HINT_HOLE_WRITE(map, NULL);
1111         }
1112 }
1113
1114 boolean_t
1115 vm_kernel_map_is_kernel(vm_map_t map)
1116 {
1117         return map->pmap == kernel_pmap;
1118 }
1119
1120 /*
1121  *      vm_map_create:
1122  *
1123  *      Creates and returns a new empty VM map with
1124  *      the given physical map structure, and having
1125  *      the given lower and upper address bounds.
1126  */
1127
1128 vm_map_t
1129 vm_map_create(
1130         pmap_t          pmap,
1131         vm_map_offset_t min,
1132         vm_map_offset_t max,
1133         boolean_t       pageable)
1134 {
1135         int options;
1136
1137         options = 0;
1138         if (pageable) {
1139                 options |= VM_MAP_CREATE_PAGEABLE;
1140         }
1141         return vm_map_create_options(pmap, min, max, options);
1142 }
1143
1144 vm_map_t
1145 vm_map_create_options(
1146         pmap_t          pmap,
1147         vm_map_offset_t min,
1148         vm_map_offset_t max,
1149         int             options)
1150 {
1151         vm_map_t        result;
1152         struct vm_map_links     *hole_entry = NULL;
1153
1154         if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1155                 /* unknown option */
1156                 return VM_MAP_NULL;
1157         }
1158
1159         result = (vm_map_t) zalloc(vm_map_zone);
1160         if (result == VM_MAP_NULL) {
1161                 panic("vm_map_create");
1162         }
1163
1164         vm_map_first_entry(result) = vm_map_to_entry(result);
1165         vm_map_last_entry(result)  = vm_map_to_entry(result);
1166         result->hdr.nentries = 0;
1167         if (options & VM_MAP_CREATE_PAGEABLE) {
1168                 result->hdr.entries_pageable = TRUE;
1169         } else {
1170                 result->hdr.entries_pageable = FALSE;
1171         }
1172
1173         vm_map_store_init( &(result->hdr));
1174
1175         result->hdr.page_shift = PAGE_SHIFT;
1176
1177         result->size = 0;
1178         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1179         result->user_wire_size  = 0;
1180 #if !CONFIG_EMBEDDED
1181         result->vmmap_high_start = 0;
1182 #endif
1183         os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1184 #if     TASK_SWAPPER
1185         result->res_count = 1;
1186         result->sw_state = MAP_SW_IN;
1187 #endif  /* TASK_SWAPPER */
1188         result->pmap = pmap;
1189         result->min_offset = min;
1190         result->max_offset = max;
1191         result->wiring_required = FALSE;
1192         result->no_zero_fill = FALSE;
1193         result->mapped_in_other_pmaps = FALSE;
1194         result->wait_for_space = FALSE;
1195         result->switch_protect = FALSE;
1196         result->disable_vmentry_reuse = FALSE;
1197         result->map_disallow_data_exec = FALSE;
1198         result->is_nested_map = FALSE;
1199         result->map_disallow_new_exec = FALSE;
1200         result->highest_entry_end = 0;
1201         result->first_free = vm_map_to_entry(result);
1202         result->hint = vm_map_to_entry(result);
1203         result->jit_entry_exists = FALSE;
1204
1205         /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1206         if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1207                 result->has_corpse_footprint = TRUE;
1208                 result->holelistenabled = FALSE;
1209                 result->vmmap_corpse_footprint = NULL;
1210         } else {
1211                 result->has_corpse_footprint = FALSE;
1212                 if (vm_map_supports_hole_optimization) {
1213                         hole_entry = zalloc(vm_map_holes_zone);
1214
1215                         hole_entry->start = min;
1216 #if defined(__arm__) || defined(__arm64__)
1217                         hole_entry->end = result->max_offset;
1218 #else
1219                         hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1220 #endif
1221                         result->holes_list = result->hole_hint = hole_entry;
1222                         hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1223                         result->holelistenabled = TRUE;
1224                 } else {
1225                         result->holelistenabled = FALSE;
1226                 }
1227         }
1228
1229         vm_map_lock_init(result);
1230         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1231
1232         return result;
1233 }
1234
1235 /*
1236  *      vm_map_entry_create:    [ internal use only ]
1237  *
1238  *      Allocates a VM map entry for insertion in the
1239  *      given map (or map copy).  No fields are filled.
1240  */
1241 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1242
1243 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1244         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1245 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1246
1247 static vm_map_entry_t
1248 _vm_map_entry_create(
1249         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1250 {
1251         zone_t  zone;
1252         vm_map_entry_t  entry;
1253
1254         zone = vm_map_entry_zone;
1255
1256         assert(map_header->entries_pageable ? !map_locked : TRUE);
1257
1258         if (map_header->entries_pageable) {
1259                 entry = (vm_map_entry_t) zalloc(zone);
1260         } else {
1261                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1262
1263                 if (entry == VM_MAP_ENTRY_NULL) {
1264                         zone = vm_map_entry_reserved_zone;
1265                         entry = (vm_map_entry_t) zalloc(zone);
1266                         OSAddAtomic(1, &reserved_zalloc_count);
1267                 } else {
1268                         OSAddAtomic(1, &nonreserved_zalloc_count);
1269                 }
1270         }
1271
1272         if (entry == VM_MAP_ENTRY_NULL) {
1273                 panic("vm_map_entry_create");
1274         }
1275         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1276
1277         vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1278 #if     MAP_ENTRY_CREATION_DEBUG
1279         entry->vme_creation_maphdr = map_header;
1280         backtrace(&entry->vme_creation_bt[0],
1281             (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1282 #endif
1283         return entry;
1284 }
1285
1286 /*
1287  *      vm_map_entry_dispose:   [ internal use only ]
1288  *
1289  *      Inverse of vm_map_entry_create.
1290  *
1291  *      write map lock held so no need to
1292  *      do anything special to insure correctness
1293  *      of the stores
1294  */
1295 #define vm_map_entry_dispose(map, entry)                        \
1296         _vm_map_entry_dispose(&(map)->hdr, (entry))
1297
1298 #define vm_map_copy_entry_dispose(map, entry) \
1299         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1300
1301 static void
1302 _vm_map_entry_dispose(
1303         struct vm_map_header    *map_header,
1304         vm_map_entry_t          entry)
1305 {
1306         zone_t          zone;
1307
1308         if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1309                 zone = vm_map_entry_zone;
1310         } else {
1311                 zone = vm_map_entry_reserved_zone;
1312         }
1313
1314         if (!map_header->entries_pageable) {
1315                 if (zone == vm_map_entry_zone) {
1316                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1317                 } else {
1318                         OSAddAtomic(-1, &reserved_zalloc_count);
1319                 }
1320         }
1321
1322         zfree(zone, entry);
1323 }
1324
1325 #if MACH_ASSERT
1326 static boolean_t first_free_check = FALSE;
1327 boolean_t
1328 first_free_is_valid(
1329         vm_map_t        map)
1330 {
1331         if (!first_free_check) {
1332                 return TRUE;
1333         }
1334
1335         return first_free_is_valid_store( map );
1336 }
1337 #endif /* MACH_ASSERT */
1338
1339
1340 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1341         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1342
1343 #define vm_map_copy_entry_unlink(copy, entry)                           \
1344         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1345
1346 #if     MACH_ASSERT && TASK_SWAPPER
1347 /*
1348  *      vm_map_res_reference:
1349  *
1350  *      Adds another valid residence count to the given map.
1351  *
1352  *      Map is locked so this function can be called from
1353  *      vm_map_swapin.
1354  *
1355  */
1356 void
1357 vm_map_res_reference(vm_map_t map)
1358 {
1359         /* assert map is locked */
1360         assert(map->res_count >= 0);
1361         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1362         if (map->res_count == 0) {
1363                 lck_mtx_unlock(&map->s_lock);
1364                 vm_map_lock(map);
1365                 vm_map_swapin(map);
1366                 lck_mtx_lock(&map->s_lock);
1367                 ++map->res_count;
1368                 vm_map_unlock(map);
1369         } else {
1370                 ++map->res_count;
1371         }
1372 }
1373
1374 /*
1375  *      vm_map_reference_swap:
1376  *
1377  *      Adds valid reference and residence counts to the given map.
1378  *
1379  *      The map may not be in memory (i.e. zero residence count).
1380  *
1381  */
1382 void
1383 vm_map_reference_swap(vm_map_t map)
1384 {
1385         assert(map != VM_MAP_NULL);
1386         lck_mtx_lock(&map->s_lock);
1387         assert(map->res_count >= 0);
1388         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1389         os_ref_retain_locked(&map->map_refcnt);
1390         vm_map_res_reference(map);
1391         lck_mtx_unlock(&map->s_lock);
1392 }
1393
1394 /*
1395  *      vm_map_res_deallocate:
1396  *
1397  *      Decrement residence count on a map; possibly causing swapout.
1398  *
1399  *      The map must be in memory (i.e. non-zero residence count).
1400  *
1401  *      The map is locked, so this function is callable from vm_map_deallocate.
1402  *
1403  */
1404 void
1405 vm_map_res_deallocate(vm_map_t map)
1406 {
1407         assert(map->res_count > 0);
1408         if (--map->res_count == 0) {
1409                 lck_mtx_unlock(&map->s_lock);
1410                 vm_map_lock(map);
1411                 vm_map_swapout(map);
1412                 vm_map_unlock(map);
1413                 lck_mtx_lock(&map->s_lock);
1414         }
1415         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1416 }
1417 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1418
1419 /*
1420  *      vm_map_destroy:
1421  *
1422  *      Actually destroy a map.
1423  */
1424 void
1425 vm_map_destroy(
1426         vm_map_t        map,
1427         int             flags)
1428 {
1429         vm_map_lock(map);
1430
1431         /* final cleanup: no need to unnest shared region */
1432         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1433         /* final cleanup: ok to remove immutable mappings */
1434         flags |= VM_MAP_REMOVE_IMMUTABLE;
1435         /* final cleanup: allow gaps in range */
1436         flags |= VM_MAP_REMOVE_GAPS_OK;
1437
1438         /* clean up regular map entries */
1439         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1440             flags, VM_MAP_NULL);
1441         /* clean up leftover special mappings (commpage, etc...) */
1442 #if     !defined(__arm__) && !defined(__arm64__)
1443         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1444             flags, VM_MAP_NULL);
1445 #endif /* !__arm__ && !__arm64__ */
1446
1447         vm_map_disable_hole_optimization(map);
1448         vm_map_corpse_footprint_destroy(map);
1449
1450         vm_map_unlock(map);
1451
1452         assert(map->hdr.nentries == 0);
1453
1454         if (map->pmap) {
1455                 pmap_destroy(map->pmap);
1456         }
1457
1458         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1459                 /*
1460                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1461                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1462                  * structure or kalloc'ed via lck_mtx_init.
1463                  * An example is s_lock_ext within struct _vm_map.
1464                  *
1465                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1466                  * can add another tag to detect embedded vs alloc'ed indirect external
1467                  * mutexes but that'll be additional checks in the lock path and require
1468                  * updating dependencies for the old vs new tag.
1469                  *
1470                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1471                  * just when lock debugging is ON, we choose to forego explicitly destroying
1472                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1473                  * count on vm_map_lck_grp, which has no serious side-effect.
1474                  */
1475         } else {
1476                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1477                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1478         }
1479
1480         zfree(vm_map_zone, map);
1481 }
1482
1483 /*
1484  * Returns pid of the task with the largest number of VM map entries.
1485  * Used in the zone-map-exhaustion jetsam path.
1486  */
1487 pid_t
1488 find_largest_process_vm_map_entries(void)
1489 {
1490         pid_t victim_pid = -1;
1491         int max_vm_map_entries = 0;
1492         task_t task = TASK_NULL;
1493         queue_head_t *task_list = &tasks;
1494
1495         lck_mtx_lock(&tasks_threads_lock);
1496         queue_iterate(task_list, task, task_t, tasks) {
1497                 if (task == kernel_task || !task->active) {
1498                         continue;
1499                 }
1500
1501                 vm_map_t task_map = task->map;
1502                 if (task_map != VM_MAP_NULL) {
1503                         int task_vm_map_entries = task_map->hdr.nentries;
1504                         if (task_vm_map_entries > max_vm_map_entries) {
1505                                 max_vm_map_entries = task_vm_map_entries;
1506                                 victim_pid = pid_from_task(task);
1507                         }
1508                 }
1509         }
1510         lck_mtx_unlock(&tasks_threads_lock);
1511
1512         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1513         return victim_pid;
1514 }
1515
1516 #if     TASK_SWAPPER
1517 /*
1518  * vm_map_swapin/vm_map_swapout
1519  *
1520  * Swap a map in and out, either referencing or releasing its resources.
1521  * These functions are internal use only; however, they must be exported
1522  * because they may be called from macros, which are exported.
1523  *
1524  * In the case of swapout, there could be races on the residence count,
1525  * so if the residence count is up, we return, assuming that a
1526  * vm_map_deallocate() call in the near future will bring us back.
1527  *
1528  * Locking:
1529  *      -- We use the map write lock for synchronization among races.
1530  *      -- The map write lock, and not the simple s_lock, protects the
1531  *         swap state of the map.
1532  *      -- If a map entry is a share map, then we hold both locks, in
1533  *         hierarchical order.
1534  *
1535  * Synchronization Notes:
1536  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1537  *      will block on the map lock and proceed when swapout is through.
1538  *      2) A vm_map_reference() call at this time is illegal, and will
1539  *      cause a panic.  vm_map_reference() is only allowed on resident
1540  *      maps, since it refuses to block.
1541  *      3) A vm_map_swapin() call during a swapin will block, and
1542  *      proceeed when the first swapin is done, turning into a nop.
1543  *      This is the reason the res_count is not incremented until
1544  *      after the swapin is complete.
1545  *      4) There is a timing hole after the checks of the res_count, before
1546  *      the map lock is taken, during which a swapin may get the lock
1547  *      before a swapout about to happen.  If this happens, the swapin
1548  *      will detect the state and increment the reference count, causing
1549  *      the swapout to be a nop, thereby delaying it until a later
1550  *      vm_map_deallocate.  If the swapout gets the lock first, then
1551  *      the swapin will simply block until the swapout is done, and
1552  *      then proceed.
1553  *
1554  * Because vm_map_swapin() is potentially an expensive operation, it
1555  * should be used with caution.
1556  *
1557  * Invariants:
1558  *      1) A map with a residence count of zero is either swapped, or
1559  *         being swapped.
1560  *      2) A map with a non-zero residence count is either resident,
1561  *         or being swapped in.
1562  */
1563
1564 int vm_map_swap_enable = 1;
1565
1566 void
1567 vm_map_swapin(vm_map_t map)
1568 {
1569         vm_map_entry_t entry;
1570
1571         if (!vm_map_swap_enable) {      /* debug */
1572                 return;
1573         }
1574
1575         /*
1576          * Map is locked
1577          * First deal with various races.
1578          */
1579         if (map->sw_state == MAP_SW_IN) {
1580                 /*
1581                  * we raced with swapout and won.  Returning will incr.
1582                  * the res_count, turning the swapout into a nop.
1583                  */
1584                 return;
1585         }
1586
1587         /*
1588          * The residence count must be zero.  If we raced with another
1589          * swapin, the state would have been IN; if we raced with a
1590          * swapout (after another competing swapin), we must have lost
1591          * the race to get here (see above comment), in which case
1592          * res_count is still 0.
1593          */
1594         assert(map->res_count == 0);
1595
1596         /*
1597          * There are no intermediate states of a map going out or
1598          * coming in, since the map is locked during the transition.
1599          */
1600         assert(map->sw_state == MAP_SW_OUT);
1601
1602         /*
1603          * We now operate upon each map entry.  If the entry is a sub-
1604          * or share-map, we call vm_map_res_reference upon it.
1605          * If the entry is an object, we call vm_object_res_reference
1606          * (this may iterate through the shadow chain).
1607          * Note that we hold the map locked the entire time,
1608          * even if we get back here via a recursive call in
1609          * vm_map_res_reference.
1610          */
1611         entry = vm_map_first_entry(map);
1612
1613         while (entry != vm_map_to_entry(map)) {
1614                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1615                         if (entry->is_sub_map) {
1616                                 vm_map_t lmap = VME_SUBMAP(entry);
1617                                 lck_mtx_lock(&lmap->s_lock);
1618                                 vm_map_res_reference(lmap);
1619                                 lck_mtx_unlock(&lmap->s_lock);
1620                         } else {
1621                                 vm_object_t object = VME_OBEJCT(entry);
1622                                 vm_object_lock(object);
1623                                 /*
1624                                  * This call may iterate through the
1625                                  * shadow chain.
1626                                  */
1627                                 vm_object_res_reference(object);
1628                                 vm_object_unlock(object);
1629                         }
1630                 }
1631                 entry = entry->vme_next;
1632         }
1633         assert(map->sw_state == MAP_SW_OUT);
1634         map->sw_state = MAP_SW_IN;
1635 }
1636
1637 void
1638 vm_map_swapout(vm_map_t map)
1639 {
1640         vm_map_entry_t entry;
1641
1642         /*
1643          * Map is locked
1644          * First deal with various races.
1645          * If we raced with a swapin and lost, the residence count
1646          * will have been incremented to 1, and we simply return.
1647          */
1648         lck_mtx_lock(&map->s_lock);
1649         if (map->res_count != 0) {
1650                 lck_mtx_unlock(&map->s_lock);
1651                 return;
1652         }
1653         lck_mtx_unlock(&map->s_lock);
1654
1655         /*
1656          * There are no intermediate states of a map going out or
1657          * coming in, since the map is locked during the transition.
1658          */
1659         assert(map->sw_state == MAP_SW_IN);
1660
1661         if (!vm_map_swap_enable) {
1662                 return;
1663         }
1664
1665         /*
1666          * We now operate upon each map entry.  If the entry is a sub-
1667          * or share-map, we call vm_map_res_deallocate upon it.
1668          * If the entry is an object, we call vm_object_res_deallocate
1669          * (this may iterate through the shadow chain).
1670          * Note that we hold the map locked the entire time,
1671          * even if we get back here via a recursive call in
1672          * vm_map_res_deallocate.
1673          */
1674         entry = vm_map_first_entry(map);
1675
1676         while (entry != vm_map_to_entry(map)) {
1677                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1678                         if (entry->is_sub_map) {
1679                                 vm_map_t lmap = VME_SUBMAP(entry);
1680                                 lck_mtx_lock(&lmap->s_lock);
1681                                 vm_map_res_deallocate(lmap);
1682                                 lck_mtx_unlock(&lmap->s_lock);
1683                         } else {
1684                                 vm_object_t object = VME_OBJECT(entry);
1685                                 vm_object_lock(object);
1686                                 /*
1687                                  * This call may take a long time,
1688                                  * since it could actively push
1689                                  * out pages (if we implement it
1690                                  * that way).
1691                                  */
1692                                 vm_object_res_deallocate(object);
1693                                 vm_object_unlock(object);
1694                         }
1695                 }
1696                 entry = entry->vme_next;
1697         }
1698         assert(map->sw_state == MAP_SW_IN);
1699         map->sw_state = MAP_SW_OUT;
1700 }
1701
1702 #endif  /* TASK_SWAPPER */
1703
1704 /*
1705  *      vm_map_lookup_entry:    [ internal use only ]
1706  *
1707  *      Calls into the vm map store layer to find the map
1708  *      entry containing (or immediately preceding) the
1709  *      specified address in the given map; the entry is returned
1710  *      in the "entry" parameter.  The boolean
1711  *      result indicates whether the address is
1712  *      actually contained in the map.
1713  */
1714 boolean_t
1715 vm_map_lookup_entry(
1716         vm_map_t                map,
1717         vm_map_offset_t address,
1718         vm_map_entry_t          *entry)         /* OUT */
1719 {
1720         return vm_map_store_lookup_entry( map, address, entry );
1721 }
1722
1723 /*
1724  *      Routine:        vm_map_find_space
1725  *      Purpose:
1726  *              Allocate a range in the specified virtual address map,
1727  *              returning the entry allocated for that range.
1728  *              Used by kmem_alloc, etc.
1729  *
1730  *              The map must be NOT be locked. It will be returned locked
1731  *              on KERN_SUCCESS, unlocked on failure.
1732  *
1733  *              If an entry is allocated, the object/offset fields
1734  *              are initialized to zero.
1735  */
1736 kern_return_t
1737 vm_map_find_space(
1738         vm_map_t        map,
1739         vm_map_offset_t         *address,       /* OUT */
1740         vm_map_size_t           size,
1741         vm_map_offset_t         mask,
1742         int                     flags __unused,
1743         vm_map_kernel_flags_t   vmk_flags,
1744         vm_tag_t                tag,
1745         vm_map_entry_t          *o_entry)       /* OUT */
1746 {
1747         vm_map_entry_t                  entry, new_entry;
1748         vm_map_offset_t start;
1749         vm_map_offset_t end;
1750         vm_map_entry_t                  hole_entry;
1751
1752         if (size == 0) {
1753                 *address = 0;
1754                 return KERN_INVALID_ARGUMENT;
1755         }
1756
1757         if (vmk_flags.vmkf_guard_after) {
1758                 /* account for the back guard page in the size */
1759                 size += VM_MAP_PAGE_SIZE(map);
1760         }
1761
1762         new_entry = vm_map_entry_create(map, FALSE);
1763
1764         /*
1765          *      Look for the first possible address; if there's already
1766          *      something at this address, we have to start after it.
1767          */
1768
1769         vm_map_lock(map);
1770
1771         if (map->disable_vmentry_reuse == TRUE) {
1772                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1773         } else {
1774                 if (map->holelistenabled) {
1775                         hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1776
1777                         if (hole_entry == NULL) {
1778                                 /*
1779                                  * No more space in the map?
1780                                  */
1781                                 vm_map_entry_dispose(map, new_entry);
1782                                 vm_map_unlock(map);
1783                                 return KERN_NO_SPACE;
1784                         }
1785
1786                         entry = hole_entry;
1787                         start = entry->vme_start;
1788                 } else {
1789                         assert(first_free_is_valid(map));
1790                         if ((entry = map->first_free) == vm_map_to_entry(map)) {
1791                                 start = map->min_offset;
1792                         } else {
1793                                 start = entry->vme_end;
1794                         }
1795                 }
1796         }
1797
1798         /*
1799          *      In any case, the "entry" always precedes
1800          *      the proposed new region throughout the loop:
1801          */
1802
1803         while (TRUE) {
1804                 vm_map_entry_t  next;
1805
1806                 /*
1807                  *      Find the end of the proposed new region.
1808                  *      Be sure we didn't go beyond the end, or
1809                  *      wrap around the address.
1810                  */
1811
1812                 if (vmk_flags.vmkf_guard_before) {
1813                         /* reserve space for the front guard page */
1814                         start += VM_MAP_PAGE_SIZE(map);
1815                 }
1816                 end = ((start + mask) & ~mask);
1817
1818                 if (end < start) {
1819                         vm_map_entry_dispose(map, new_entry);
1820                         vm_map_unlock(map);
1821                         return KERN_NO_SPACE;
1822                 }
1823                 start = end;
1824                 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1825                 end += size;
1826                 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1827
1828                 if ((end > map->max_offset) || (end < start)) {
1829                         vm_map_entry_dispose(map, new_entry);
1830                         vm_map_unlock(map);
1831                         return KERN_NO_SPACE;
1832                 }
1833
1834                 next = entry->vme_next;
1835
1836                 if (map->holelistenabled) {
1837                         if (entry->vme_end >= end) {
1838                                 break;
1839                         }
1840                 } else {
1841                         /*
1842                          *      If there are no more entries, we must win.
1843                          *
1844                          *      OR
1845                          *
1846                          *      If there is another entry, it must be
1847                          *      after the end of the potential new region.
1848                          */
1849
1850                         if (next == vm_map_to_entry(map)) {
1851                                 break;
1852                         }
1853
1854                         if (next->vme_start >= end) {
1855                                 break;
1856                         }
1857                 }
1858
1859                 /*
1860                  *      Didn't fit -- move to the next entry.
1861                  */
1862
1863                 entry = next;
1864
1865                 if (map->holelistenabled) {
1866                         if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1867                                 /*
1868                                  * Wrapped around
1869                                  */
1870                                 vm_map_entry_dispose(map, new_entry);
1871                                 vm_map_unlock(map);
1872                                 return KERN_NO_SPACE;
1873                         }
1874                         start = entry->vme_start;
1875                 } else {
1876                         start = entry->vme_end;
1877                 }
1878         }
1879
1880         if (map->holelistenabled) {
1881                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1882                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1883                 }
1884         }
1885
1886         /*
1887          *      At this point,
1888          *              "start" and "end" should define the endpoints of the
1889          *                      available new range, and
1890          *              "entry" should refer to the region before the new
1891          *                      range, and
1892          *
1893          *              the map should be locked.
1894          */
1895
1896         if (vmk_flags.vmkf_guard_before) {
1897                 /* go back for the front guard page */
1898                 start -= VM_MAP_PAGE_SIZE(map);
1899         }
1900         *address = start;
1901
1902         assert(start < end);
1903         new_entry->vme_start = start;
1904         new_entry->vme_end = end;
1905         assert(page_aligned(new_entry->vme_start));
1906         assert(page_aligned(new_entry->vme_end));
1907         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1908             VM_MAP_PAGE_MASK(map)));
1909         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1910             VM_MAP_PAGE_MASK(map)));
1911
1912         new_entry->is_shared = FALSE;
1913         new_entry->is_sub_map = FALSE;
1914         new_entry->use_pmap = TRUE;
1915         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1916         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1917
1918         new_entry->needs_copy = FALSE;
1919
1920         new_entry->inheritance = VM_INHERIT_DEFAULT;
1921         new_entry->protection = VM_PROT_DEFAULT;
1922         new_entry->max_protection = VM_PROT_ALL;
1923         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1924         new_entry->wired_count = 0;
1925         new_entry->user_wired_count = 0;
1926
1927         new_entry->in_transition = FALSE;
1928         new_entry->needs_wakeup = FALSE;
1929         new_entry->no_cache = FALSE;
1930         new_entry->permanent = FALSE;
1931         new_entry->superpage_size = FALSE;
1932         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1933                 new_entry->map_aligned = TRUE;
1934         } else {
1935                 new_entry->map_aligned = FALSE;
1936         }
1937
1938         new_entry->used_for_jit = FALSE;
1939         new_entry->pmap_cs_associated = FALSE;
1940         new_entry->zero_wired_pages = FALSE;
1941         new_entry->iokit_acct = FALSE;
1942         new_entry->vme_resilient_codesign = FALSE;
1943         new_entry->vme_resilient_media = FALSE;
1944         if (vmk_flags.vmkf_atomic_entry) {
1945                 new_entry->vme_atomic = TRUE;
1946         } else {
1947                 new_entry->vme_atomic = FALSE;
1948         }
1949
1950         VME_ALIAS_SET(new_entry, tag);
1951
1952         /*
1953          *      Insert the new entry into the list
1954          */
1955
1956         vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1957
1958         map->size += size;
1959
1960         /*
1961          *      Update the lookup hint
1962          */
1963         SAVE_HINT_MAP_WRITE(map, new_entry);
1964
1965         *o_entry = new_entry;
1966         return KERN_SUCCESS;
1967 }
1968
1969 int vm_map_pmap_enter_print = FALSE;
1970 int vm_map_pmap_enter_enable = FALSE;
1971
1972 /*
1973  *      Routine:        vm_map_pmap_enter [internal only]
1974  *
1975  *      Description:
1976  *              Force pages from the specified object to be entered into
1977  *              the pmap at the specified address if they are present.
1978  *              As soon as a page not found in the object the scan ends.
1979  *
1980  *      Returns:
1981  *              Nothing.
1982  *
1983  *      In/out conditions:
1984  *              The source map should not be locked on entry.
1985  */
1986 __unused static void
1987 vm_map_pmap_enter(
1988         vm_map_t                map,
1989         vm_map_offset_t         addr,
1990         vm_map_offset_t         end_addr,
1991         vm_object_t             object,
1992         vm_object_offset_t      offset,
1993         vm_prot_t               protection)
1994 {
1995         int                     type_of_fault;
1996         kern_return_t           kr;
1997         struct vm_object_fault_info fault_info = {};
1998
1999         if (map->pmap == 0) {
2000                 return;
2001         }
2002
2003         while (addr < end_addr) {
2004                 vm_page_t       m;
2005
2006
2007                 /*
2008                  * TODO:
2009                  * From vm_map_enter(), we come into this function without the map
2010                  * lock held or the object lock held.
2011                  * We haven't taken a reference on the object either.
2012                  * We should do a proper lookup on the map to make sure
2013                  * that things are sane before we go locking objects that
2014                  * could have been deallocated from under us.
2015                  */
2016
2017                 vm_object_lock(object);
2018
2019                 m = vm_page_lookup(object, offset);
2020
2021                 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2022                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2023                         vm_object_unlock(object);
2024                         return;
2025                 }
2026
2027                 if (vm_map_pmap_enter_print) {
2028                         printf("vm_map_pmap_enter:");
2029                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2030                             map, (unsigned long long)addr, object, (unsigned long long)offset);
2031                 }
2032                 type_of_fault = DBG_CACHE_HIT_FAULT;
2033                 kr = vm_fault_enter(m, map->pmap,
2034                     addr, protection, protection,
2035                     VM_PAGE_WIRED(m),
2036                     FALSE,                 /* change_wiring */
2037                     VM_KERN_MEMORY_NONE,                 /* tag - not wiring */
2038                     &fault_info,
2039                     NULL,                  /* need_retry */
2040                     &type_of_fault);
2041
2042                 vm_object_unlock(object);
2043
2044                 offset += PAGE_SIZE_64;
2045                 addr += PAGE_SIZE;
2046         }
2047 }
2048
2049 boolean_t vm_map_pmap_is_empty(
2050         vm_map_t        map,
2051         vm_map_offset_t start,
2052         vm_map_offset_t end);
2053 boolean_t
2054 vm_map_pmap_is_empty(
2055         vm_map_t        map,
2056         vm_map_offset_t start,
2057         vm_map_offset_t end)
2058 {
2059 #ifdef MACHINE_PMAP_IS_EMPTY
2060         return pmap_is_empty(map->pmap, start, end);
2061 #else   /* MACHINE_PMAP_IS_EMPTY */
2062         vm_map_offset_t offset;
2063         ppnum_t         phys_page;
2064
2065         if (map->pmap == NULL) {
2066                 return TRUE;
2067         }
2068
2069         for (offset = start;
2070             offset < end;
2071             offset += PAGE_SIZE) {
2072                 phys_page = pmap_find_phys(map->pmap, offset);
2073                 if (phys_page) {
2074                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2075                             "page %d at 0x%llx\n",
2076                             map, (long long)start, (long long)end,
2077                             phys_page, (long long)offset);
2078                         return FALSE;
2079                 }
2080         }
2081         return TRUE;
2082 #endif  /* MACHINE_PMAP_IS_EMPTY */
2083 }
2084
2085 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2086 kern_return_t
2087 vm_map_random_address_for_size(
2088         vm_map_t        map,
2089         vm_map_offset_t *address,
2090         vm_map_size_t   size)
2091 {
2092         kern_return_t   kr = KERN_SUCCESS;
2093         int             tries = 0;
2094         vm_map_offset_t random_addr = 0;
2095         vm_map_offset_t hole_end;
2096
2097         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2098         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2099         vm_map_size_t   vm_hole_size = 0;
2100         vm_map_size_t   addr_space_size;
2101
2102         addr_space_size = vm_map_max(map) - vm_map_min(map);
2103
2104         assert(page_aligned(size));
2105
2106         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2107                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2108                 random_addr = vm_map_trunc_page(
2109                         vm_map_min(map) + (random_addr % addr_space_size),
2110                         VM_MAP_PAGE_MASK(map));
2111
2112                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2113                         if (prev_entry == vm_map_to_entry(map)) {
2114                                 next_entry = vm_map_first_entry(map);
2115                         } else {
2116                                 next_entry = prev_entry->vme_next;
2117                         }
2118                         if (next_entry == vm_map_to_entry(map)) {
2119                                 hole_end = vm_map_max(map);
2120                         } else {
2121                                 hole_end = next_entry->vme_start;
2122                         }
2123                         vm_hole_size = hole_end - random_addr;
2124                         if (vm_hole_size >= size) {
2125                                 *address = random_addr;
2126                                 break;
2127                         }
2128                 }
2129                 tries++;
2130         }
2131
2132         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2133                 kr = KERN_NO_SPACE;
2134         }
2135         return kr;
2136 }
2137
2138 static boolean_t
2139 vm_memory_malloc_no_cow(
2140         int alias)
2141 {
2142         uint64_t alias_mask;
2143
2144         if (alias > 63) {
2145                 return FALSE;
2146         }
2147
2148         alias_mask = 1ULL << alias;
2149         if (alias_mask & vm_memory_malloc_no_cow_mask) {
2150                 return TRUE;
2151         }
2152         return FALSE;
2153 }
2154
2155 /*
2156  *      Routine:        vm_map_enter
2157  *
2158  *      Description:
2159  *              Allocate a range in the specified virtual address map.
2160  *              The resulting range will refer to memory defined by
2161  *              the given memory object and offset into that object.
2162  *
2163  *              Arguments are as defined in the vm_map call.
2164  */
2165 int _map_enter_debug = 0;
2166 static unsigned int vm_map_enter_restore_successes = 0;
2167 static unsigned int vm_map_enter_restore_failures = 0;
2168 kern_return_t
2169 vm_map_enter(
2170         vm_map_t                map,
2171         vm_map_offset_t         *address,       /* IN/OUT */
2172         vm_map_size_t           size,
2173         vm_map_offset_t         mask,
2174         int                     flags,
2175         vm_map_kernel_flags_t   vmk_flags,
2176         vm_tag_t                alias,
2177         vm_object_t             object,
2178         vm_object_offset_t      offset,
2179         boolean_t               needs_copy,
2180         vm_prot_t               cur_protection,
2181         vm_prot_t               max_protection,
2182         vm_inherit_t            inheritance)
2183 {
2184         vm_map_entry_t          entry, new_entry;
2185         vm_map_offset_t         start, tmp_start, tmp_offset;
2186         vm_map_offset_t         end, tmp_end;
2187         vm_map_offset_t         tmp2_start, tmp2_end;
2188         vm_map_offset_t         desired_empty_end;
2189         vm_map_offset_t         step;
2190         kern_return_t           result = KERN_SUCCESS;
2191         vm_map_t                zap_old_map = VM_MAP_NULL;
2192         vm_map_t                zap_new_map = VM_MAP_NULL;
2193         boolean_t               map_locked = FALSE;
2194         boolean_t               pmap_empty = TRUE;
2195         boolean_t               new_mapping_established = FALSE;
2196         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2197         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2198         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2199         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2200         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2201         boolean_t               is_submap = vmk_flags.vmkf_submap;
2202         boolean_t               permanent = vmk_flags.vmkf_permanent;
2203         boolean_t               no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2204         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2205         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2206         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2207         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2208         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2209         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2210         vm_tag_t                user_alias;
2211         vm_map_offset_t         effective_min_offset, effective_max_offset;
2212         kern_return_t           kr;
2213         boolean_t               clear_map_aligned = FALSE;
2214         vm_map_entry_t          hole_entry;
2215         vm_map_size_t           chunk_size = 0;
2216
2217         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2218
2219         if (flags & VM_FLAGS_4GB_CHUNK) {
2220 #if defined(__LP64__)
2221                 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2222 #else /* __LP64__ */
2223                 chunk_size = ANON_CHUNK_SIZE;
2224 #endif /* __LP64__ */
2225         } else {
2226                 chunk_size = ANON_CHUNK_SIZE;
2227         }
2228
2229         if (superpage_size) {
2230                 switch (superpage_size) {
2231                         /*
2232                          * Note that the current implementation only supports
2233                          * a single size for superpages, SUPERPAGE_SIZE, per
2234                          * architecture. As soon as more sizes are supposed
2235                          * to be supported, SUPERPAGE_SIZE has to be replaced
2236                          * with a lookup of the size depending on superpage_size.
2237                          */
2238 #ifdef __x86_64__
2239                 case SUPERPAGE_SIZE_ANY:
2240                         /* handle it like 2 MB and round up to page size */
2241                         size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2242                 case SUPERPAGE_SIZE_2MB:
2243                         break;
2244 #endif
2245                 default:
2246                         return KERN_INVALID_ARGUMENT;
2247                 }
2248                 mask = SUPERPAGE_SIZE - 1;
2249                 if (size & (SUPERPAGE_SIZE - 1)) {
2250                         return KERN_INVALID_ARGUMENT;
2251                 }
2252                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2253         }
2254
2255
2256         if ((cur_protection & VM_PROT_WRITE) &&
2257             (cur_protection & VM_PROT_EXECUTE) &&
2258 #if !CONFIG_EMBEDDED
2259             map != kernel_map &&
2260             (cs_process_global_enforcement() ||
2261             (vmk_flags.vmkf_cs_enforcement_override
2262             ? vmk_flags.vmkf_cs_enforcement
2263             : cs_process_enforcement(NULL))) &&
2264 #endif /* !CONFIG_EMBEDDED */
2265             !entry_for_jit) {
2266                 DTRACE_VM3(cs_wx,
2267                     uint64_t, 0,
2268                     uint64_t, 0,
2269                     vm_prot_t, cur_protection);
2270                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2271 #if VM_PROTECT_WX_FAIL
2272                     "failing\n",
2273 #else /* VM_PROTECT_WX_FAIL */
2274                     "turning off execute\n",
2275 #endif /* VM_PROTECT_WX_FAIL */
2276                     proc_selfpid(),
2277                     (current_task()->bsd_info
2278                     ? proc_name_address(current_task()->bsd_info)
2279                     : "?"),
2280                     __FUNCTION__);
2281                 cur_protection &= ~VM_PROT_EXECUTE;
2282 #if VM_PROTECT_WX_FAIL
2283                 return KERN_PROTECTION_FAILURE;
2284 #endif /* VM_PROTECT_WX_FAIL */
2285         }
2286
2287         /*
2288          * If the task has requested executable lockdown,
2289          * deny any new executable mapping.
2290          */
2291         if (map->map_disallow_new_exec == TRUE) {
2292                 if (cur_protection & VM_PROT_EXECUTE) {
2293                         return KERN_PROTECTION_FAILURE;
2294                 }
2295         }
2296
2297         if (resilient_codesign) {
2298                 assert(!is_submap);
2299                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2300                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2301                         return KERN_PROTECTION_FAILURE;
2302                 }
2303         }
2304
2305         if (resilient_media) {
2306                 assert(!is_submap);
2307 //              assert(!needs_copy);
2308                 if (object != VM_OBJECT_NULL &&
2309                     !object->internal) {
2310                         /*
2311                          * This mapping is directly backed by an external
2312                          * memory manager (e.g. a vnode pager for a file):
2313                          * we would not have any safe place to inject
2314                          * a zero-filled page if an actual page is not
2315                          * available, without possibly impacting the actual
2316                          * contents of the mapped object (e.g. the file),
2317                          * so we can't provide any media resiliency here.
2318                          */
2319                         return KERN_INVALID_ARGUMENT;
2320                 }
2321         }
2322
2323         if (is_submap) {
2324                 if (purgable) {
2325                         /* submaps can not be purgeable */
2326                         return KERN_INVALID_ARGUMENT;
2327                 }
2328                 if (object == VM_OBJECT_NULL) {
2329                         /* submaps can not be created lazily */
2330                         return KERN_INVALID_ARGUMENT;
2331                 }
2332         }
2333         if (vmk_flags.vmkf_already) {
2334                 /*
2335                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2336                  * is already present.  For it to be meaningul, the requested
2337                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2338                  * we shouldn't try and remove what was mapped there first
2339                  * (!VM_FLAGS_OVERWRITE).
2340                  */
2341                 if ((flags & VM_FLAGS_ANYWHERE) ||
2342                     (flags & VM_FLAGS_OVERWRITE)) {
2343                         return KERN_INVALID_ARGUMENT;
2344                 }
2345         }
2346
2347         effective_min_offset = map->min_offset;
2348
2349         if (vmk_flags.vmkf_beyond_max) {
2350                 /*
2351                  * Allow an insertion beyond the map's max offset.
2352                  */
2353 #if     !defined(__arm__) && !defined(__arm64__)
2354                 if (vm_map_is_64bit(map)) {
2355                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2356                 } else
2357 #endif  /* __arm__ */
2358                 effective_max_offset = 0x00000000FFFFF000ULL;
2359         } else {
2360 #if     !defined(CONFIG_EMBEDDED)
2361                 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2362                         effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2363                 } else {
2364                         effective_max_offset = map->max_offset;
2365                 }
2366 #else
2367                 effective_max_offset = map->max_offset;
2368 #endif
2369         }
2370
2371         if (size == 0 ||
2372             (offset & PAGE_MASK_64) != 0) {
2373                 *address = 0;
2374                 return KERN_INVALID_ARGUMENT;
2375         }
2376
2377         if (map->pmap == kernel_pmap) {
2378                 user_alias = VM_KERN_MEMORY_NONE;
2379         } else {
2380                 user_alias = alias;
2381         }
2382
2383         if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2384                 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2385         }
2386
2387 #define RETURN(value)   { result = value; goto BailOut; }
2388
2389         assert(page_aligned(*address));
2390         assert(page_aligned(size));
2391
2392         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2393                 /*
2394                  * In most cases, the caller rounds the size up to the
2395                  * map's page size.
2396                  * If we get a size that is explicitly not map-aligned here,
2397                  * we'll have to respect the caller's wish and mark the
2398                  * mapping as "not map-aligned" to avoid tripping the
2399                  * map alignment checks later.
2400                  */
2401                 clear_map_aligned = TRUE;
2402         }
2403         if (!anywhere &&
2404             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2405                 /*
2406                  * We've been asked to map at a fixed address and that
2407                  * address is not aligned to the map's specific alignment.
2408                  * The caller should know what it's doing (i.e. most likely
2409                  * mapping some fragmented copy map, transferring memory from
2410                  * a VM map with a different alignment), so clear map_aligned
2411                  * for this new VM map entry and proceed.
2412                  */
2413                 clear_map_aligned = TRUE;
2414         }
2415
2416         /*
2417          * Only zero-fill objects are allowed to be purgable.
2418          * LP64todo - limit purgable objects to 32-bits for now
2419          */
2420         if (purgable &&
2421             (offset != 0 ||
2422             (object != VM_OBJECT_NULL &&
2423             (object->vo_size != size ||
2424             object->purgable == VM_PURGABLE_DENY))
2425             || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2426                 return KERN_INVALID_ARGUMENT;
2427         }
2428
2429         if (!anywhere && overwrite) {
2430                 /*
2431                  * Create a temporary VM map to hold the old mappings in the
2432                  * affected area while we create the new one.
2433                  * This avoids releasing the VM map lock in
2434                  * vm_map_entry_delete() and allows atomicity
2435                  * when we want to replace some mappings with a new one.
2436                  * It also allows us to restore the old VM mappings if the
2437                  * new mapping fails.
2438                  */
2439                 zap_old_map = vm_map_create(PMAP_NULL,
2440                     *address,
2441                     *address + size,
2442                     map->hdr.entries_pageable);
2443                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2444                 vm_map_disable_hole_optimization(zap_old_map);
2445         }
2446
2447 StartAgain:;
2448
2449         start = *address;
2450
2451         if (anywhere) {
2452                 vm_map_lock(map);
2453                 map_locked = TRUE;
2454
2455                 if (entry_for_jit) {
2456 #if CONFIG_EMBEDDED
2457                         if (map->jit_entry_exists) {
2458                                 result = KERN_INVALID_ARGUMENT;
2459                                 goto BailOut;
2460                         }
2461                         random_address = TRUE;
2462 #endif /* CONFIG_EMBEDDED */
2463                 }
2464
2465                 if (random_address) {
2466                         /*
2467                          * Get a random start address.
2468                          */
2469                         result = vm_map_random_address_for_size(map, address, size);
2470                         if (result != KERN_SUCCESS) {
2471                                 goto BailOut;
2472                         }
2473                         start = *address;
2474                 }
2475 #if !CONFIG_EMBEDDED
2476                 else if ((start == 0 || start == vm_map_min(map)) &&
2477                     !map->disable_vmentry_reuse &&
2478                     map->vmmap_high_start != 0) {
2479                         start = map->vmmap_high_start;
2480                 }
2481 #endif
2482
2483
2484                 /*
2485                  *      Calculate the first possible address.
2486                  */
2487
2488                 if (start < effective_min_offset) {
2489                         start = effective_min_offset;
2490                 }
2491                 if (start > effective_max_offset) {
2492                         RETURN(KERN_NO_SPACE);
2493                 }
2494
2495                 /*
2496                  *      Look for the first possible address;
2497                  *      if there's already something at this
2498                  *      address, we have to start after it.
2499                  */
2500
2501                 if (map->disable_vmentry_reuse == TRUE) {
2502                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2503                 } else {
2504                         if (map->holelistenabled) {
2505                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2506
2507                                 if (hole_entry == NULL) {
2508                                         /*
2509                                          * No more space in the map?
2510                                          */
2511                                         result = KERN_NO_SPACE;
2512                                         goto BailOut;
2513                                 } else {
2514                                         boolean_t found_hole = FALSE;
2515
2516                                         do {
2517                                                 if (hole_entry->vme_start >= start) {
2518                                                         start = hole_entry->vme_start;
2519                                                         found_hole = TRUE;
2520                                                         break;
2521                                                 }
2522
2523                                                 if (hole_entry->vme_end > start) {
2524                                                         found_hole = TRUE;
2525                                                         break;
2526                                                 }
2527                                                 hole_entry = hole_entry->vme_next;
2528                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2529
2530                                         if (found_hole == FALSE) {
2531                                                 result = KERN_NO_SPACE;
2532                                                 goto BailOut;
2533                                         }
2534
2535                                         entry = hole_entry;
2536
2537                                         if (start == 0) {
2538                                                 start += PAGE_SIZE_64;
2539                                         }
2540                                 }
2541                         } else {
2542                                 assert(first_free_is_valid(map));
2543
2544                                 entry = map->first_free;
2545
2546                                 if (entry == vm_map_to_entry(map)) {
2547                                         entry = NULL;
2548                                 } else {
2549                                         if (entry->vme_next == vm_map_to_entry(map)) {
2550                                                 /*
2551                                                  * Hole at the end of the map.
2552                                                  */
2553                                                 entry = NULL;
2554                                         } else {
2555                                                 if (start < (entry->vme_next)->vme_start) {
2556                                                         start = entry->vme_end;
2557                                                         start = vm_map_round_page(start,
2558                                                             VM_MAP_PAGE_MASK(map));
2559                                                 } else {
2560                                                         /*
2561                                                          * Need to do a lookup.
2562                                                          */
2563                                                         entry = NULL;
2564                                                 }
2565                                         }
2566                                 }
2567
2568                                 if (entry == NULL) {
2569                                         vm_map_entry_t  tmp_entry;
2570                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2571                                                 assert(!entry_for_jit);
2572                                                 start = tmp_entry->vme_end;
2573                                                 start = vm_map_round_page(start,
2574                                                     VM_MAP_PAGE_MASK(map));
2575                                         }
2576                                         entry = tmp_entry;
2577                                 }
2578                         }
2579                 }
2580
2581                 /*
2582                  *      In any case, the "entry" always precedes
2583                  *      the proposed new region throughout the
2584                  *      loop:
2585                  */
2586
2587                 while (TRUE) {
2588                         vm_map_entry_t  next;
2589
2590                         /*
2591                          *      Find the end of the proposed new region.
2592                          *      Be sure we didn't go beyond the end, or
2593                          *      wrap around the address.
2594                          */
2595
2596                         end = ((start + mask) & ~mask);
2597                         end = vm_map_round_page(end,
2598                             VM_MAP_PAGE_MASK(map));
2599                         if (end < start) {
2600                                 RETURN(KERN_NO_SPACE);
2601                         }
2602                         start = end;
2603                         assert(VM_MAP_PAGE_ALIGNED(start,
2604                             VM_MAP_PAGE_MASK(map)));
2605                         end += size;
2606
2607                         /* We want an entire page of empty space, but don't increase the allocation size. */
2608                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2609
2610                         if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2611                                 if (map->wait_for_space) {
2612                                         assert(!keep_map_locked);
2613                                         if (size <= (effective_max_offset -
2614                                             effective_min_offset)) {
2615                                                 assert_wait((event_t)map,
2616                                                     THREAD_ABORTSAFE);
2617                                                 vm_map_unlock(map);
2618                                                 map_locked = FALSE;
2619                                                 thread_block(THREAD_CONTINUE_NULL);
2620                                                 goto StartAgain;
2621                                         }
2622                                 }
2623                                 RETURN(KERN_NO_SPACE);
2624                         }
2625
2626                         next = entry->vme_next;
2627
2628                         if (map->holelistenabled) {
2629                                 if (entry->vme_end >= desired_empty_end) {
2630                                         break;
2631                                 }
2632                         } else {
2633                                 /*
2634                                  *      If there are no more entries, we must win.
2635                                  *
2636                                  *      OR
2637                                  *
2638                                  *      If there is another entry, it must be
2639                                  *      after the end of the potential new region.
2640                                  */
2641
2642                                 if (next == vm_map_to_entry(map)) {
2643                                         break;
2644                                 }
2645
2646                                 if (next->vme_start >= desired_empty_end) {
2647                                         break;
2648                                 }
2649                         }
2650
2651                         /*
2652                          *      Didn't fit -- move to the next entry.
2653                          */
2654
2655                         entry = next;
2656
2657                         if (map->holelistenabled) {
2658                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2659                                         /*
2660                                          * Wrapped around
2661                                          */
2662                                         result = KERN_NO_SPACE;
2663                                         goto BailOut;
2664                                 }
2665                                 start = entry->vme_start;
2666                         } else {
2667                                 start = entry->vme_end;
2668                         }
2669
2670                         start = vm_map_round_page(start,
2671                             VM_MAP_PAGE_MASK(map));
2672                 }
2673
2674                 if (map->holelistenabled) {
2675                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2676                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2677                         }
2678                 }
2679
2680                 *address = start;
2681                 assert(VM_MAP_PAGE_ALIGNED(*address,
2682                     VM_MAP_PAGE_MASK(map)));
2683         } else {
2684                 /*
2685                  *      Verify that:
2686                  *              the address doesn't itself violate
2687                  *              the mask requirement.
2688                  */
2689
2690                 vm_map_lock(map);
2691                 map_locked = TRUE;
2692                 if ((start & mask) != 0) {
2693                         RETURN(KERN_NO_SPACE);
2694                 }
2695
2696                 /*
2697                  *      ...     the address is within bounds
2698                  */
2699
2700                 end = start + size;
2701
2702                 if ((start < effective_min_offset) ||
2703                     (end > effective_max_offset) ||
2704                     (start >= end)) {
2705                         RETURN(KERN_INVALID_ADDRESS);
2706                 }
2707
2708                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2709                         int remove_flags;
2710                         /*
2711                          * Fixed mapping and "overwrite" flag: attempt to
2712                          * remove all existing mappings in the specified
2713                          * address range, saving them in our "zap_old_map".
2714                          */
2715                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2716                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2717                         if (vmk_flags.vmkf_overwrite_immutable) {
2718                                 /* we can overwrite immutable mappings */
2719                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2720                         }
2721                         (void) vm_map_delete(map, start, end,
2722                             remove_flags,
2723                             zap_old_map);
2724                 }
2725
2726                 /*
2727                  *      ...     the starting address isn't allocated
2728                  */
2729
2730                 if (vm_map_lookup_entry(map, start, &entry)) {
2731                         if (!(vmk_flags.vmkf_already)) {
2732                                 RETURN(KERN_NO_SPACE);
2733                         }
2734                         /*
2735                          * Check if what's already there is what we want.
2736                          */
2737                         tmp_start = start;
2738                         tmp_offset = offset;
2739                         if (entry->vme_start < start) {
2740                                 tmp_start -= start - entry->vme_start;
2741                                 tmp_offset -= start - entry->vme_start;
2742                         }
2743                         for (; entry->vme_start < end;
2744                             entry = entry->vme_next) {
2745                                 /*
2746                                  * Check if the mapping's attributes
2747                                  * match the existing map entry.
2748                                  */
2749                                 if (entry == vm_map_to_entry(map) ||
2750                                     entry->vme_start != tmp_start ||
2751                                     entry->is_sub_map != is_submap ||
2752                                     VME_OFFSET(entry) != tmp_offset ||
2753                                     entry->needs_copy != needs_copy ||
2754                                     entry->protection != cur_protection ||
2755                                     entry->max_protection != max_protection ||
2756                                     entry->inheritance != inheritance ||
2757                                     entry->iokit_acct != iokit_acct ||
2758                                     VME_ALIAS(entry) != alias) {
2759                                         /* not the same mapping ! */
2760                                         RETURN(KERN_NO_SPACE);
2761                                 }
2762                                 /*
2763                                  * Check if the same object is being mapped.
2764                                  */
2765                                 if (is_submap) {
2766                                         if (VME_SUBMAP(entry) !=
2767                                             (vm_map_t) object) {
2768                                                 /* not the same submap */
2769                                                 RETURN(KERN_NO_SPACE);
2770                                         }
2771                                 } else {
2772                                         if (VME_OBJECT(entry) != object) {
2773                                                 /* not the same VM object... */
2774                                                 vm_object_t obj2;
2775
2776                                                 obj2 = VME_OBJECT(entry);
2777                                                 if ((obj2 == VM_OBJECT_NULL ||
2778                                                     obj2->internal) &&
2779                                                     (object == VM_OBJECT_NULL ||
2780                                                     object->internal)) {
2781                                                         /*
2782                                                          * ... but both are
2783                                                          * anonymous memory,
2784                                                          * so equivalent.
2785                                                          */
2786                                                 } else {
2787                                                         RETURN(KERN_NO_SPACE);
2788                                                 }
2789                                         }
2790                                 }
2791
2792                                 tmp_offset += entry->vme_end - entry->vme_start;
2793                                 tmp_start += entry->vme_end - entry->vme_start;
2794                                 if (entry->vme_end >= end) {
2795                                         /* reached the end of our mapping */
2796                                         break;
2797                                 }
2798                         }
2799                         /* it all matches:  let's use what's already there ! */
2800                         RETURN(KERN_MEMORY_PRESENT);
2801                 }
2802
2803                 /*
2804                  *      ...     the next region doesn't overlap the
2805                  *              end point.
2806                  */
2807
2808                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2809                     (entry->vme_next->vme_start < end)) {
2810                         RETURN(KERN_NO_SPACE);
2811                 }
2812         }
2813
2814         /*
2815          *      At this point,
2816          *              "start" and "end" should define the endpoints of the
2817          *                      available new range, and
2818          *              "entry" should refer to the region before the new
2819          *                      range, and
2820          *
2821          *              the map should be locked.
2822          */
2823
2824         /*
2825          *      See whether we can avoid creating a new entry (and object) by
2826          *      extending one of our neighbors.  [So far, we only attempt to
2827          *      extend from below.]  Note that we can never extend/join
2828          *      purgable objects because they need to remain distinct
2829          *      entities in order to implement their "volatile object"
2830          *      semantics.
2831          */
2832
2833         if (purgable ||
2834             entry_for_jit ||
2835             vm_memory_malloc_no_cow(user_alias)) {
2836                 if (object == VM_OBJECT_NULL) {
2837                         object = vm_object_allocate(size);
2838                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2839                         object->true_share = FALSE;
2840                         if (purgable) {
2841                                 task_t owner;
2842                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2843                                 if (map->pmap == kernel_pmap) {
2844                                         /*
2845                                          * Purgeable mappings made in a kernel
2846                                          * map are "owned" by the kernel itself
2847                                          * rather than the current user task
2848                                          * because they're likely to be used by
2849                                          * more than this user task (see
2850                                          * execargs_purgeable_allocate(), for
2851                                          * example).
2852                                          */
2853                                         owner = kernel_task;
2854                                 } else {
2855                                         owner = current_task();
2856                                 }
2857                                 assert(object->vo_owner == NULL);
2858                                 assert(object->resident_page_count == 0);
2859                                 assert(object->wired_page_count == 0);
2860                                 vm_object_lock(object);
2861                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2862                                 vm_object_unlock(object);
2863                         }
2864                         offset = (vm_object_offset_t)0;
2865                 }
2866         } else if ((is_submap == FALSE) &&
2867             (object == VM_OBJECT_NULL) &&
2868             (entry != vm_map_to_entry(map)) &&
2869             (entry->vme_end == start) &&
2870             (!entry->is_shared) &&
2871             (!entry->is_sub_map) &&
2872             (!entry->in_transition) &&
2873             (!entry->needs_wakeup) &&
2874             (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2875             (entry->protection == cur_protection) &&
2876             (entry->max_protection == max_protection) &&
2877             (entry->inheritance == inheritance) &&
2878             ((user_alias == VM_MEMORY_REALLOC) ||
2879             (VME_ALIAS(entry) == alias)) &&
2880             (entry->no_cache == no_cache) &&
2881             (entry->permanent == permanent) &&
2882             /* no coalescing for immutable executable mappings */
2883             !((entry->protection & VM_PROT_EXECUTE) &&
2884             entry->permanent) &&
2885             (!entry->superpage_size && !superpage_size) &&
2886             /*
2887              * No coalescing if not map-aligned, to avoid propagating
2888              * that condition any further than needed:
2889              */
2890             (!entry->map_aligned || !clear_map_aligned) &&
2891             (!entry->zero_wired_pages) &&
2892             (!entry->used_for_jit && !entry_for_jit) &&
2893             (!entry->pmap_cs_associated) &&
2894             (entry->iokit_acct == iokit_acct) &&
2895             (!entry->vme_resilient_codesign) &&
2896             (!entry->vme_resilient_media) &&
2897             (!entry->vme_atomic) &&
2898             (entry->vme_no_copy_on_read == no_copy_on_read) &&
2899
2900             ((entry->vme_end - entry->vme_start) + size <=
2901             (user_alias == VM_MEMORY_REALLOC ?
2902             ANON_CHUNK_SIZE :
2903             NO_COALESCE_LIMIT)) &&
2904
2905             (entry->wired_count == 0)) {        /* implies user_wired_count == 0 */
2906                 if (vm_object_coalesce(VME_OBJECT(entry),
2907                     VM_OBJECT_NULL,
2908                     VME_OFFSET(entry),
2909                     (vm_object_offset_t) 0,
2910                     (vm_map_size_t)(entry->vme_end - entry->vme_start),
2911                     (vm_map_size_t)(end - entry->vme_end))) {
2912                         /*
2913                          *      Coalesced the two objects - can extend
2914                          *      the previous map entry to include the
2915                          *      new range.
2916                          */
2917                         map->size += (end - entry->vme_end);
2918                         assert(entry->vme_start < end);
2919                         assert(VM_MAP_PAGE_ALIGNED(end,
2920                             VM_MAP_PAGE_MASK(map)));
2921                         if (__improbable(vm_debug_events)) {
2922                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2923                         }
2924                         entry->vme_end = end;
2925                         if (map->holelistenabled) {
2926                                 vm_map_store_update_first_free(map, entry, TRUE);
2927                         } else {
2928                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2929                         }
2930                         new_mapping_established = TRUE;
2931                         RETURN(KERN_SUCCESS);
2932                 }
2933         }
2934
2935         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2936         new_entry = NULL;
2937
2938         for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2939                 tmp2_end = tmp2_start + step;
2940                 /*
2941                  *      Create a new entry
2942                  *
2943                  * XXX FBDP
2944                  * The reserved "page zero" in each process's address space can
2945                  * be arbitrarily large.  Splitting it into separate objects and
2946                  * therefore different VM map entries serves no purpose and just
2947                  * slows down operations on the VM map, so let's not split the
2948                  * allocation into chunks if the max protection is NONE.  That
2949                  * memory should never be accessible, so it will never get to the
2950                  * default pager.
2951                  */
2952                 tmp_start = tmp2_start;
2953                 if (object == VM_OBJECT_NULL &&
2954                     size > chunk_size &&
2955                     max_protection != VM_PROT_NONE &&
2956                     superpage_size == 0) {
2957                         tmp_end = tmp_start + chunk_size;
2958                 } else {
2959                         tmp_end = tmp2_end;
2960                 }
2961                 do {
2962                         new_entry = vm_map_entry_insert(
2963                                 map, entry, tmp_start, tmp_end,
2964                                 object, offset, needs_copy,
2965                                 FALSE, FALSE,
2966                                 cur_protection, max_protection,
2967                                 VM_BEHAVIOR_DEFAULT,
2968                                 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2969                                 0,
2970                                 no_cache,
2971                                 permanent,
2972                                 no_copy_on_read,
2973                                 superpage_size,
2974                                 clear_map_aligned,
2975                                 is_submap,
2976                                 entry_for_jit,
2977                                 alias);
2978
2979                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2980
2981                         if (resilient_codesign &&
2982                             !((cur_protection | max_protection) &
2983                             (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2984                                 new_entry->vme_resilient_codesign = TRUE;
2985                         }
2986
2987                         if (resilient_media &&
2988                             (object == VM_OBJECT_NULL ||
2989                             object->internal)) {
2990                                 new_entry->vme_resilient_media = TRUE;
2991                         }
2992
2993                         assert(!new_entry->iokit_acct);
2994                         if (!is_submap &&
2995                             object != VM_OBJECT_NULL &&
2996                             (object->purgable != VM_PURGABLE_DENY ||
2997                             object->vo_ledger_tag)) {
2998                                 assert(new_entry->use_pmap);
2999                                 assert(!new_entry->iokit_acct);
3000                                 /*
3001                                  * Turn off pmap accounting since
3002                                  * purgeable (or tagged) objects have their
3003                                  * own ledgers.
3004                                  */
3005                                 new_entry->use_pmap = FALSE;
3006                         } else if (!is_submap &&
3007                             iokit_acct &&
3008                             object != VM_OBJECT_NULL &&
3009                             object->internal) {
3010                                 /* alternate accounting */
3011                                 assert(!new_entry->iokit_acct);
3012                                 assert(new_entry->use_pmap);
3013                                 new_entry->iokit_acct = TRUE;
3014                                 new_entry->use_pmap = FALSE;
3015                                 DTRACE_VM4(
3016                                         vm_map_iokit_mapped_region,
3017                                         vm_map_t, map,
3018                                         vm_map_offset_t, new_entry->vme_start,
3019                                         vm_map_offset_t, new_entry->vme_end,
3020                                         int, VME_ALIAS(new_entry));
3021                                 vm_map_iokit_mapped_region(
3022                                         map,
3023                                         (new_entry->vme_end -
3024                                         new_entry->vme_start));
3025                         } else if (!is_submap) {
3026                                 assert(!new_entry->iokit_acct);
3027                                 assert(new_entry->use_pmap);
3028                         }
3029
3030                         if (is_submap) {
3031                                 vm_map_t        submap;
3032                                 boolean_t       submap_is_64bit;
3033                                 boolean_t       use_pmap;
3034
3035                                 assert(new_entry->is_sub_map);
3036                                 assert(!new_entry->use_pmap);
3037                                 assert(!new_entry->iokit_acct);
3038                                 submap = (vm_map_t) object;
3039                                 submap_is_64bit = vm_map_is_64bit(submap);
3040                                 use_pmap = vmk_flags.vmkf_nested_pmap;
3041 #ifndef NO_NESTED_PMAP
3042                                 if (use_pmap && submap->pmap == NULL) {
3043                                         ledger_t ledger = map->pmap->ledger;
3044                                         /* we need a sub pmap to nest... */
3045                                         submap->pmap = pmap_create_options(ledger, 0,
3046                                             submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3047                                         if (submap->pmap == NULL) {
3048                                                 /* let's proceed without nesting... */
3049                                         }
3050 #if     defined(__arm__) || defined(__arm64__)
3051                                         else {
3052                                                 pmap_set_nested(submap->pmap);
3053                                         }
3054 #endif
3055                                 }
3056                                 if (use_pmap && submap->pmap != NULL) {
3057                                         kr = pmap_nest(map->pmap,
3058                                             submap->pmap,
3059                                             tmp_start,
3060                                             tmp_start,
3061                                             tmp_end - tmp_start);
3062                                         if (kr != KERN_SUCCESS) {
3063                                                 printf("vm_map_enter: "
3064                                                     "pmap_nest(0x%llx,0x%llx) "
3065                                                     "error 0x%x\n",
3066                                                     (long long)tmp_start,
3067                                                     (long long)tmp_end,
3068                                                     kr);
3069                                         } else {
3070                                                 /* we're now nested ! */
3071                                                 new_entry->use_pmap = TRUE;
3072                                                 pmap_empty = FALSE;
3073                                         }
3074                                 }
3075 #endif /* NO_NESTED_PMAP */
3076                         }
3077                         entry = new_entry;
3078
3079                         if (superpage_size) {
3080                                 vm_page_t pages, m;
3081                                 vm_object_t sp_object;
3082                                 vm_object_offset_t sp_offset;
3083
3084                                 VME_OFFSET_SET(entry, 0);
3085
3086                                 /* allocate one superpage */
3087                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3088                                 if (kr != KERN_SUCCESS) {
3089                                         /* deallocate whole range... */
3090                                         new_mapping_established = TRUE;
3091                                         /* ... but only up to "tmp_end" */
3092                                         size -= end - tmp_end;
3093                                         RETURN(kr);
3094                                 }
3095
3096                                 /* create one vm_object per superpage */
3097                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3098                                 sp_object->phys_contiguous = TRUE;
3099                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3100                                 VME_OBJECT_SET(entry, sp_object);
3101                                 assert(entry->use_pmap);
3102
3103                                 /* enter the base pages into the object */
3104                                 vm_object_lock(sp_object);
3105                                 for (sp_offset = 0;
3106                                     sp_offset < SUPERPAGE_SIZE;
3107                                     sp_offset += PAGE_SIZE) {
3108                                         m = pages;
3109                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3110                                         pages = NEXT_PAGE(m);
3111                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3112                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3113                                 }
3114                                 vm_object_unlock(sp_object);
3115                         }
3116                 } while (tmp_end != tmp2_end &&
3117                     (tmp_start = tmp_end) &&
3118                     (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3119                     tmp_end + chunk_size : tmp2_end));
3120         }
3121
3122         new_mapping_established = TRUE;
3123
3124 BailOut:
3125         assert(map_locked == TRUE);
3126
3127         if (result == KERN_SUCCESS) {
3128                 vm_prot_t pager_prot;
3129                 memory_object_t pager;
3130
3131 #if DEBUG
3132                 if (pmap_empty &&
3133                     !(vmk_flags.vmkf_no_pmap_check)) {
3134                         assert(vm_map_pmap_is_empty(map,
3135                             *address,
3136                             *address + size));
3137                 }
3138 #endif /* DEBUG */
3139
3140                 /*
3141                  * For "named" VM objects, let the pager know that the
3142                  * memory object is being mapped.  Some pagers need to keep
3143                  * track of this, to know when they can reclaim the memory
3144                  * object, for example.
3145                  * VM calls memory_object_map() for each mapping (specifying
3146                  * the protection of each mapping) and calls
3147                  * memory_object_last_unmap() when all the mappings are gone.
3148                  */
3149                 pager_prot = max_protection;
3150                 if (needs_copy) {
3151                         /*
3152                          * Copy-On-Write mapping: won't modify
3153                          * the memory object.
3154                          */
3155                         pager_prot &= ~VM_PROT_WRITE;
3156                 }
3157                 if (!is_submap &&
3158                     object != VM_OBJECT_NULL &&
3159                     object->named &&
3160                     object->pager != MEMORY_OBJECT_NULL) {
3161                         vm_object_lock(object);
3162                         pager = object->pager;
3163                         if (object->named &&
3164                             pager != MEMORY_OBJECT_NULL) {
3165                                 assert(object->pager_ready);
3166                                 vm_object_mapping_wait(object, THREAD_UNINT);
3167                                 vm_object_mapping_begin(object);
3168                                 vm_object_unlock(object);
3169
3170                                 kr = memory_object_map(pager, pager_prot);
3171                                 assert(kr == KERN_SUCCESS);
3172
3173                                 vm_object_lock(object);
3174                                 vm_object_mapping_end(object);
3175                         }
3176                         vm_object_unlock(object);
3177                 }
3178         }
3179
3180         assert(map_locked == TRUE);
3181
3182         if (!keep_map_locked) {
3183                 vm_map_unlock(map);
3184                 map_locked = FALSE;
3185         }
3186
3187         /*
3188          * We can't hold the map lock if we enter this block.
3189          */
3190
3191         if (result == KERN_SUCCESS) {
3192                 /*      Wire down the new entry if the user
3193                  *      requested all new map entries be wired.
3194                  */
3195                 if ((map->wiring_required) || (superpage_size)) {
3196                         assert(!keep_map_locked);
3197                         pmap_empty = FALSE; /* pmap won't be empty */
3198                         kr = vm_map_wire_kernel(map, start, end,
3199                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
3200                             TRUE);
3201                         result = kr;
3202                 }
3203
3204         }
3205
3206         if (result != KERN_SUCCESS) {
3207                 if (new_mapping_established) {
3208                         /*
3209                          * We have to get rid of the new mappings since we
3210                          * won't make them available to the user.
3211                          * Try and do that atomically, to minimize the risk
3212                          * that someone else create new mappings that range.
3213                          */
3214                         zap_new_map = vm_map_create(PMAP_NULL,
3215                             *address,
3216                             *address + size,
3217                             map->hdr.entries_pageable);
3218                         vm_map_set_page_shift(zap_new_map,
3219                             VM_MAP_PAGE_SHIFT(map));
3220                         vm_map_disable_hole_optimization(zap_new_map);
3221
3222                         if (!map_locked) {
3223                                 vm_map_lock(map);
3224                                 map_locked = TRUE;
3225                         }
3226                         (void) vm_map_delete(map, *address, *address + size,
3227                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3228                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3229                             zap_new_map);
3230                 }
3231                 if (zap_old_map != VM_MAP_NULL &&
3232                     zap_old_map->hdr.nentries != 0) {
3233                         vm_map_entry_t  entry1, entry2;
3234
3235                         /*
3236                          * The new mapping failed.  Attempt to restore
3237                          * the old mappings, saved in the "zap_old_map".
3238                          */
3239                         if (!map_locked) {
3240                                 vm_map_lock(map);
3241                                 map_locked = TRUE;
3242                         }
3243
3244                         /* first check if the coast is still clear */
3245                         start = vm_map_first_entry(zap_old_map)->vme_start;
3246                         end = vm_map_last_entry(zap_old_map)->vme_end;
3247                         if (vm_map_lookup_entry(map, start, &entry1) ||
3248                             vm_map_lookup_entry(map, end, &entry2) ||
3249                             entry1 != entry2) {
3250                                 /*
3251                                  * Part of that range has already been
3252                                  * re-mapped:  we can't restore the old
3253                                  * mappings...
3254                                  */
3255                                 vm_map_enter_restore_failures++;
3256                         } else {
3257                                 /*
3258                                  * Transfer the saved map entries from
3259                                  * "zap_old_map" to the original "map",
3260                                  * inserting them all after "entry1".
3261                                  */
3262                                 for (entry2 = vm_map_first_entry(zap_old_map);
3263                                     entry2 != vm_map_to_entry(zap_old_map);
3264                                     entry2 = vm_map_first_entry(zap_old_map)) {
3265                                         vm_map_size_t entry_size;
3266
3267                                         entry_size = (entry2->vme_end -
3268                                             entry2->vme_start);
3269                                         vm_map_store_entry_unlink(zap_old_map,
3270                                             entry2);
3271                                         zap_old_map->size -= entry_size;
3272                                         vm_map_store_entry_link(map, entry1, entry2,
3273                                             VM_MAP_KERNEL_FLAGS_NONE);
3274                                         map->size += entry_size;
3275                                         entry1 = entry2;
3276                                 }
3277                                 if (map->wiring_required) {
3278                                         /*
3279                                          * XXX TODO: we should rewire the
3280                                          * old pages here...
3281                                          */
3282                                 }
3283                                 vm_map_enter_restore_successes++;
3284                         }
3285                 }
3286         }
3287
3288         /*
3289          * The caller is responsible for releasing the lock if it requested to
3290          * keep the map locked.
3291          */
3292         if (map_locked && !keep_map_locked) {
3293                 vm_map_unlock(map);
3294         }
3295
3296         /*
3297          * Get rid of the "zap_maps" and all the map entries that
3298          * they may still contain.
3299          */
3300         if (zap_old_map != VM_MAP_NULL) {
3301                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3302                 zap_old_map = VM_MAP_NULL;
3303         }
3304         if (zap_new_map != VM_MAP_NULL) {
3305                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3306                 zap_new_map = VM_MAP_NULL;
3307         }
3308
3309         return result;
3310
3311 #undef  RETURN
3312 }
3313
3314 #if __arm64__
3315 extern const struct memory_object_pager_ops fourk_pager_ops;
3316 kern_return_t
3317 vm_map_enter_fourk(
3318         vm_map_t                map,
3319         vm_map_offset_t         *address,       /* IN/OUT */
3320         vm_map_size_t           size,
3321         vm_map_offset_t         mask,
3322         int                     flags,
3323         vm_map_kernel_flags_t   vmk_flags,
3324         vm_tag_t                alias,
3325         vm_object_t             object,
3326         vm_object_offset_t      offset,
3327         boolean_t               needs_copy,
3328         vm_prot_t               cur_protection,
3329         vm_prot_t               max_protection,
3330         vm_inherit_t            inheritance)
3331 {
3332         vm_map_entry_t          entry, new_entry;
3333         vm_map_offset_t         start, fourk_start;
3334         vm_map_offset_t         end, fourk_end;
3335         vm_map_size_t           fourk_size;
3336         kern_return_t           result = KERN_SUCCESS;
3337         vm_map_t                zap_old_map = VM_MAP_NULL;
3338         vm_map_t                zap_new_map = VM_MAP_NULL;
3339         boolean_t               map_locked = FALSE;
3340         boolean_t               pmap_empty = TRUE;
3341         boolean_t               new_mapping_established = FALSE;
3342         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3343         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3344         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3345         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3346         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3347         boolean_t               is_submap = vmk_flags.vmkf_submap;
3348         boolean_t               permanent = vmk_flags.vmkf_permanent;
3349         boolean_t               no_copy_on_read = vmk_flags.vmkf_permanent;
3350         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3351 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3352         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3353         vm_map_offset_t         effective_min_offset, effective_max_offset;
3354         kern_return_t           kr;
3355         boolean_t               clear_map_aligned = FALSE;
3356         memory_object_t         fourk_mem_obj;
3357         vm_object_t             fourk_object;
3358         vm_map_offset_t         fourk_pager_offset;
3359         int                     fourk_pager_index_start, fourk_pager_index_num;
3360         int                     cur_idx;
3361         boolean_t               fourk_copy;
3362         vm_object_t             copy_object;
3363         vm_object_offset_t      copy_offset;
3364
3365         fourk_mem_obj = MEMORY_OBJECT_NULL;
3366         fourk_object = VM_OBJECT_NULL;
3367
3368         if (superpage_size) {
3369                 return KERN_NOT_SUPPORTED;
3370         }
3371
3372         if ((cur_protection & VM_PROT_WRITE) &&
3373             (cur_protection & VM_PROT_EXECUTE) &&
3374 #if !CONFIG_EMBEDDED
3375             map != kernel_map &&
3376             cs_process_enforcement(NULL) &&
3377 #endif /* !CONFIG_EMBEDDED */
3378             !entry_for_jit) {
3379                 DTRACE_VM3(cs_wx,
3380                     uint64_t, 0,
3381                     uint64_t, 0,
3382                     vm_prot_t, cur_protection);
3383                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3384                     "turning off execute\n",
3385                     proc_selfpid(),
3386                     (current_task()->bsd_info
3387                     ? proc_name_address(current_task()->bsd_info)
3388                     : "?"),
3389                     __FUNCTION__);
3390                 cur_protection &= ~VM_PROT_EXECUTE;
3391         }
3392
3393         /*
3394          * If the task has requested executable lockdown,
3395          * deny any new executable mapping.
3396          */
3397         if (map->map_disallow_new_exec == TRUE) {
3398                 if (cur_protection & VM_PROT_EXECUTE) {
3399                         return KERN_PROTECTION_FAILURE;
3400                 }
3401         }
3402
3403         if (is_submap) {
3404                 return KERN_NOT_SUPPORTED;
3405         }
3406         if (vmk_flags.vmkf_already) {
3407                 return KERN_NOT_SUPPORTED;
3408         }
3409         if (purgable || entry_for_jit) {
3410                 return KERN_NOT_SUPPORTED;
3411         }
3412
3413         effective_min_offset = map->min_offset;
3414
3415         if (vmk_flags.vmkf_beyond_max) {
3416                 return KERN_NOT_SUPPORTED;
3417         } else {
3418                 effective_max_offset = map->max_offset;
3419         }
3420
3421         if (size == 0 ||
3422             (offset & FOURK_PAGE_MASK) != 0) {
3423                 *address = 0;
3424                 return KERN_INVALID_ARGUMENT;
3425         }
3426
3427 #define RETURN(value)   { result = value; goto BailOut; }
3428
3429         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3430         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3431
3432         if (!anywhere && overwrite) {
3433                 return KERN_NOT_SUPPORTED;
3434         }
3435         if (!anywhere && overwrite) {
3436                 /*
3437                  * Create a temporary VM map to hold the old mappings in the
3438                  * affected area while we create the new one.
3439                  * This avoids releasing the VM map lock in
3440                  * vm_map_entry_delete() and allows atomicity
3441                  * when we want to replace some mappings with a new one.
3442                  * It also allows us to restore the old VM mappings if the
3443                  * new mapping fails.
3444                  */
3445                 zap_old_map = vm_map_create(PMAP_NULL,
3446                     *address,
3447                     *address + size,
3448                     map->hdr.entries_pageable);
3449                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3450                 vm_map_disable_hole_optimization(zap_old_map);
3451         }
3452
3453         fourk_start = *address;
3454         fourk_size = size;
3455         fourk_end = fourk_start + fourk_size;
3456
3457         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3458         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3459         size = end - start;
3460
3461         if (anywhere) {
3462                 return KERN_NOT_SUPPORTED;
3463         } else {
3464                 /*
3465                  *      Verify that:
3466                  *              the address doesn't itself violate
3467                  *              the mask requirement.
3468                  */
3469
3470                 vm_map_lock(map);
3471                 map_locked = TRUE;
3472                 if ((start & mask) != 0) {
3473                         RETURN(KERN_NO_SPACE);
3474                 }
3475
3476                 /*
3477                  *      ...     the address is within bounds
3478                  */
3479
3480                 end = start + size;
3481
3482                 if ((start < effective_min_offset) ||
3483                     (end > effective_max_offset) ||
3484                     (start >= end)) {
3485                         RETURN(KERN_INVALID_ADDRESS);
3486                 }
3487
3488                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3489                         /*
3490                          * Fixed mapping and "overwrite" flag: attempt to
3491                          * remove all existing mappings in the specified
3492                          * address range, saving them in our "zap_old_map".
3493                          */
3494                         (void) vm_map_delete(map, start, end,
3495                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3496                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3497                             zap_old_map);
3498                 }
3499
3500                 /*
3501                  *      ...     the starting address isn't allocated
3502                  */
3503                 if (vm_map_lookup_entry(map, start, &entry)) {
3504                         vm_object_t cur_object, shadow_object;
3505
3506                         /*
3507                          * We might already some 4K mappings
3508                          * in a 16K page here.
3509                          */
3510
3511                         if (entry->vme_end - entry->vme_start
3512                             != SIXTEENK_PAGE_SIZE) {
3513                                 RETURN(KERN_NO_SPACE);
3514                         }
3515                         if (entry->is_sub_map) {
3516                                 RETURN(KERN_NO_SPACE);
3517                         }
3518                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3519                                 RETURN(KERN_NO_SPACE);
3520                         }
3521
3522                         /* go all the way down the shadow chain */
3523                         cur_object = VME_OBJECT(entry);
3524                         vm_object_lock(cur_object);
3525                         while (cur_object->shadow != VM_OBJECT_NULL) {
3526                                 shadow_object = cur_object->shadow;
3527                                 vm_object_lock(shadow_object);
3528                                 vm_object_unlock(cur_object);
3529                                 cur_object = shadow_object;
3530                                 shadow_object = VM_OBJECT_NULL;
3531                         }
3532                         if (cur_object->internal ||
3533                             cur_object->pager == NULL) {
3534                                 vm_object_unlock(cur_object);
3535                                 RETURN(KERN_NO_SPACE);
3536                         }
3537                         if (cur_object->pager->mo_pager_ops
3538                             != &fourk_pager_ops) {
3539                                 vm_object_unlock(cur_object);
3540                                 RETURN(KERN_NO_SPACE);
3541                         }
3542                         fourk_object = cur_object;
3543                         fourk_mem_obj = fourk_object->pager;
3544
3545                         /* keep the "4K" object alive */
3546                         vm_object_reference_locked(fourk_object);
3547                         vm_object_unlock(fourk_object);
3548
3549                         /* merge permissions */
3550                         entry->protection |= cur_protection;
3551                         entry->max_protection |= max_protection;
3552                         if ((entry->protection & (VM_PROT_WRITE |
3553                             VM_PROT_EXECUTE)) ==
3554                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3555                             fourk_binary_compatibility_unsafe &&
3556                             fourk_binary_compatibility_allow_wx) {
3557                                 /* write+execute: need to be "jit" */
3558                                 entry->used_for_jit = TRUE;
3559                         }
3560
3561                         goto map_in_fourk_pager;
3562                 }
3563
3564                 /*
3565                  *      ...     the next region doesn't overlap the
3566                  *              end point.
3567                  */
3568
3569                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3570                     (entry->vme_next->vme_start < end)) {
3571                         RETURN(KERN_NO_SPACE);
3572                 }
3573         }
3574
3575         /*
3576          *      At this point,
3577          *              "start" and "end" should define the endpoints of the
3578          *                      available new range, and
3579          *              "entry" should refer to the region before the new
3580          *                      range, and
3581          *
3582          *              the map should be locked.
3583          */
3584
3585         /* create a new "4K" pager */
3586         fourk_mem_obj = fourk_pager_create();
3587         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3588         assert(fourk_object);
3589
3590         /* keep the "4" object alive */
3591         vm_object_reference(fourk_object);
3592
3593         /* create a "copy" object, to map the "4K" object copy-on-write */
3594         fourk_copy = TRUE;
3595         result = vm_object_copy_strategically(fourk_object,
3596             0,
3597             end - start,
3598             &copy_object,
3599             &copy_offset,
3600             &fourk_copy);
3601         assert(result == KERN_SUCCESS);
3602         assert(copy_object != VM_OBJECT_NULL);
3603         assert(copy_offset == 0);
3604
3605         /* take a reference on the copy object, for this mapping */
3606         vm_object_reference(copy_object);
3607
3608         /* map the "4K" pager's copy object */
3609         new_entry =
3610             vm_map_entry_insert(map, entry,
3611             vm_map_trunc_page(start,
3612             VM_MAP_PAGE_MASK(map)),
3613             vm_map_round_page(end,
3614             VM_MAP_PAGE_MASK(map)),
3615             copy_object,
3616             0,                         /* offset */
3617             FALSE,                         /* needs_copy */
3618             FALSE,
3619             FALSE,
3620             cur_protection, max_protection,
3621             VM_BEHAVIOR_DEFAULT,
3622             ((entry_for_jit)
3623             ? VM_INHERIT_NONE
3624             : inheritance),
3625             0,
3626             no_cache,
3627             permanent,
3628             no_copy_on_read,
3629             superpage_size,
3630             clear_map_aligned,
3631             is_submap,
3632             FALSE,                         /* jit */
3633             alias);
3634         entry = new_entry;
3635
3636 #if VM_MAP_DEBUG_FOURK
3637         if (vm_map_debug_fourk) {
3638                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3639                     map,
3640                     (uint64_t) entry->vme_start,
3641                     (uint64_t) entry->vme_end,
3642                     fourk_mem_obj);
3643         }
3644 #endif /* VM_MAP_DEBUG_FOURK */
3645
3646         new_mapping_established = TRUE;
3647
3648 map_in_fourk_pager:
3649         /* "map" the original "object" where it belongs in the "4K" pager */
3650         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3651         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3652         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3653                 fourk_pager_index_num = 4;
3654         } else {
3655                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3656         }
3657         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3658                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3659         }
3660         for (cur_idx = 0;
3661             cur_idx < fourk_pager_index_num;
3662             cur_idx++) {
3663                 vm_object_t             old_object;
3664                 vm_object_offset_t      old_offset;
3665
3666                 kr = fourk_pager_populate(fourk_mem_obj,
3667                     TRUE,                       /* overwrite */
3668                     fourk_pager_index_start + cur_idx,
3669                     object,
3670                     (object
3671                     ? (offset +
3672                     (cur_idx * FOURK_PAGE_SIZE))
3673                     : 0),
3674                     &old_object,
3675                     &old_offset);
3676 #if VM_MAP_DEBUG_FOURK
3677                 if (vm_map_debug_fourk) {
3678                         if (old_object == (vm_object_t) -1 &&
3679                             old_offset == (vm_object_offset_t) -1) {
3680                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3681                                     "pager [%p:0x%llx] "
3682                                     "populate[%d] "
3683                                     "[object:%p,offset:0x%llx]\n",
3684                                     map,
3685                                     (uint64_t) entry->vme_start,
3686                                     (uint64_t) entry->vme_end,
3687                                     fourk_mem_obj,
3688                                     VME_OFFSET(entry),
3689                                     fourk_pager_index_start + cur_idx,
3690                                     object,
3691                                     (object
3692                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3693                                     : 0));
3694                         } else {
3695                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3696                                     "pager [%p:0x%llx] "
3697                                     "populate[%d] [object:%p,offset:0x%llx] "
3698                                     "old [%p:0x%llx]\n",
3699                                     map,
3700                                     (uint64_t) entry->vme_start,
3701                                     (uint64_t) entry->vme_end,
3702                                     fourk_mem_obj,
3703                                     VME_OFFSET(entry),
3704                                     fourk_pager_index_start + cur_idx,
3705                                     object,
3706                                     (object
3707                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3708                                     : 0),
3709                                     old_object,
3710                                     old_offset);
3711                         }
3712                 }
3713 #endif /* VM_MAP_DEBUG_FOURK */
3714
3715                 assert(kr == KERN_SUCCESS);
3716                 if (object != old_object &&
3717                     object != VM_OBJECT_NULL &&
3718                     object != (vm_object_t) -1) {
3719                         vm_object_reference(object);
3720                 }
3721                 if (object != old_object &&
3722                     old_object != VM_OBJECT_NULL &&
3723                     old_object != (vm_object_t) -1) {
3724                         vm_object_deallocate(old_object);
3725                 }
3726         }
3727
3728 BailOut:
3729         assert(map_locked == TRUE);
3730
3731         if (fourk_object != VM_OBJECT_NULL) {
3732                 vm_object_deallocate(fourk_object);
3733                 fourk_object = VM_OBJECT_NULL;
3734                 fourk_mem_obj = MEMORY_OBJECT_NULL;
3735         }
3736
3737         if (result == KERN_SUCCESS) {
3738                 vm_prot_t pager_prot;
3739                 memory_object_t pager;
3740
3741 #if DEBUG
3742                 if (pmap_empty &&
3743                     !(vmk_flags.vmkf_no_pmap_check)) {
3744                         assert(vm_map_pmap_is_empty(map,
3745                             *address,
3746                             *address + size));
3747                 }
3748 #endif /* DEBUG */
3749
3750                 /*
3751                  * For "named" VM objects, let the pager know that the
3752                  * memory object is being mapped.  Some pagers need to keep
3753                  * track of this, to know when they can reclaim the memory
3754                  * object, for example.
3755                  * VM calls memory_object_map() for each mapping (specifying
3756                  * the protection of each mapping) and calls
3757                  * memory_object_last_unmap() when all the mappings are gone.
3758                  */
3759                 pager_prot = max_protection;
3760                 if (needs_copy) {
3761                         /*
3762                          * Copy-On-Write mapping: won't modify
3763                          * the memory object.
3764                          */
3765                         pager_prot &= ~VM_PROT_WRITE;
3766                 }
3767                 if (!is_submap &&
3768                     object != VM_OBJECT_NULL &&
3769                     object->named &&
3770                     object->pager != MEMORY_OBJECT_NULL) {
3771                         vm_object_lock(object);
3772                         pager = object->pager;
3773                         if (object->named &&
3774                             pager != MEMORY_OBJECT_NULL) {
3775                                 assert(object->pager_ready);
3776                                 vm_object_mapping_wait(object, THREAD_UNINT);
3777                                 vm_object_mapping_begin(object);
3778                                 vm_object_unlock(object);
3779
3780                                 kr = memory_object_map(pager, pager_prot);
3781                                 assert(kr == KERN_SUCCESS);
3782
3783                                 vm_object_lock(object);
3784                                 vm_object_mapping_end(object);
3785                         }
3786                         vm_object_unlock(object);
3787                 }
3788                 if (!is_submap &&
3789                     fourk_object != VM_OBJECT_NULL &&
3790                     fourk_object->named &&
3791                     fourk_object->pager != MEMORY_OBJECT_NULL) {
3792                         vm_object_lock(fourk_object);
3793                         pager = fourk_object->pager;
3794                         if (fourk_object->named &&
3795                             pager != MEMORY_OBJECT_NULL) {
3796                                 assert(fourk_object->pager_ready);
3797                                 vm_object_mapping_wait(fourk_object,
3798                                     THREAD_UNINT);
3799                                 vm_object_mapping_begin(fourk_object);
3800                                 vm_object_unlock(fourk_object);
3801
3802                                 kr = memory_object_map(pager, VM_PROT_READ);
3803                                 assert(kr == KERN_SUCCESS);
3804
3805                                 vm_object_lock(fourk_object);
3806                                 vm_object_mapping_end(fourk_object);
3807                         }
3808                         vm_object_unlock(fourk_object);
3809                 }
3810         }
3811
3812         assert(map_locked == TRUE);
3813
3814         if (!keep_map_locked) {
3815                 vm_map_unlock(map);
3816                 map_locked = FALSE;
3817         }
3818
3819         /*
3820          * We can't hold the map lock if we enter this block.
3821          */
3822
3823         if (result == KERN_SUCCESS) {
3824                 /*      Wire down the new entry if the user
3825                  *      requested all new map entries be wired.
3826                  */
3827                 if ((map->wiring_required) || (superpage_size)) {
3828                         assert(!keep_map_locked);
3829                         pmap_empty = FALSE; /* pmap won't be empty */
3830                         kr = vm_map_wire_kernel(map, start, end,
3831                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
3832                             TRUE);
3833                         result = kr;
3834                 }
3835
3836         }
3837
3838         if (result != KERN_SUCCESS) {
3839                 if (new_mapping_established) {
3840                         /*
3841                          * We have to get rid of the new mappings since we
3842                          * won't make them available to the user.
3843                          * Try and do that atomically, to minimize the risk
3844                          * that someone else create new mappings that range.
3845                          */
3846                         zap_new_map = vm_map_create(PMAP_NULL,
3847                             *address,
3848                             *address + size,
3849                             map->hdr.entries_pageable);
3850                         vm_map_set_page_shift(zap_new_map,
3851                             VM_MAP_PAGE_SHIFT(map));
3852                         vm_map_disable_hole_optimization(zap_new_map);
3853
3854                         if (!map_locked) {
3855                                 vm_map_lock(map);
3856                                 map_locked = TRUE;
3857                         }
3858                         (void) vm_map_delete(map, *address, *address + size,
3859                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3860                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3861                             zap_new_map);
3862                 }
3863                 if (zap_old_map != VM_MAP_NULL &&
3864                     zap_old_map->hdr.nentries != 0) {
3865                         vm_map_entry_t  entry1, entry2;
3866
3867                         /*
3868                          * The new mapping failed.  Attempt to restore
3869                          * the old mappings, saved in the "zap_old_map".
3870                          */
3871                         if (!map_locked) {
3872                                 vm_map_lock(map);
3873                                 map_locked = TRUE;
3874                         }
3875
3876                         /* first check if the coast is still clear */
3877                         start = vm_map_first_entry(zap_old_map)->vme_start;
3878                         end = vm_map_last_entry(zap_old_map)->vme_end;
3879                         if (vm_map_lookup_entry(map, start, &entry1) ||
3880                             vm_map_lookup_entry(map, end, &entry2) ||
3881                             entry1 != entry2) {
3882                                 /*
3883                                  * Part of that range has already been
3884                                  * re-mapped:  we can't restore the old
3885                                  * mappings...
3886                                  */
3887                                 vm_map_enter_restore_failures++;
3888                         } else {
3889                                 /*
3890                                  * Transfer the saved map entries from
3891                                  * "zap_old_map" to the original "map",
3892                                  * inserting them all after "entry1".
3893                                  */
3894                                 for (entry2 = vm_map_first_entry(zap_old_map);
3895                                     entry2 != vm_map_to_entry(zap_old_map);
3896                                     entry2 = vm_map_first_entry(zap_old_map)) {
3897                                         vm_map_size_t entry_size;
3898
3899                                         entry_size = (entry2->vme_end -
3900                                             entry2->vme_start);
3901                                         vm_map_store_entry_unlink(zap_old_map,
3902                                             entry2);
3903                                         zap_old_map->size -= entry_size;
3904                                         vm_map_store_entry_link(map, entry1, entry2,
3905                                             VM_MAP_KERNEL_FLAGS_NONE);
3906                                         map->size += entry_size;
3907                                         entry1 = entry2;
3908                                 }
3909                                 if (map->wiring_required) {
3910                                         /*
3911                                          * XXX TODO: we should rewire the
3912                                          * old pages here...
3913                                          */
3914                                 }
3915                                 vm_map_enter_restore_successes++;
3916                         }
3917                 }
3918         }
3919
3920         /*
3921          * The caller is responsible for releasing the lock if it requested to
3922          * keep the map locked.
3923          */
3924         if (map_locked && !keep_map_locked) {
3925                 vm_map_unlock(map);
3926         }
3927
3928         /*
3929          * Get rid of the "zap_maps" and all the map entries that
3930          * they may still contain.
3931          */
3932         if (zap_old_map != VM_MAP_NULL) {
3933                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3934                 zap_old_map = VM_MAP_NULL;
3935         }
3936         if (zap_new_map != VM_MAP_NULL) {
3937                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3938                 zap_new_map = VM_MAP_NULL;
3939         }
3940
3941         return result;
3942
3943 #undef  RETURN
3944 }
3945 #endif /* __arm64__ */
3946
3947 /*
3948  * Counters for the prefault optimization.
3949  */
3950 int64_t vm_prefault_nb_pages = 0;
3951 int64_t vm_prefault_nb_bailout = 0;
3952
3953 static kern_return_t
3954 vm_map_enter_mem_object_helper(
3955         vm_map_t                target_map,
3956         vm_map_offset_t         *address,
3957         vm_map_size_t           initial_size,
3958         vm_map_offset_t         mask,
3959         int                     flags,
3960         vm_map_kernel_flags_t   vmk_flags,
3961         vm_tag_t                tag,
3962         ipc_port_t              port,
3963         vm_object_offset_t      offset,
3964         boolean_t               copy,
3965         vm_prot_t               cur_protection,
3966         vm_prot_t               max_protection,
3967         vm_inherit_t            inheritance,
3968         upl_page_list_ptr_t     page_list,
3969         unsigned int            page_list_count)
3970 {
3971         vm_map_address_t        map_addr;
3972         vm_map_size_t           map_size;
3973         vm_object_t             object;
3974         vm_object_size_t        size;
3975         kern_return_t           result;
3976         boolean_t               mask_cur_protection, mask_max_protection;
3977         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
3978         vm_map_offset_t         offset_in_mapping = 0;
3979 #if __arm64__
3980         boolean_t               fourk = vmk_flags.vmkf_fourk;
3981 #endif /* __arm64__ */
3982
3983         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3984
3985         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3986         mask_max_protection = max_protection & VM_PROT_IS_MASK;
3987         cur_protection &= ~VM_PROT_IS_MASK;
3988         max_protection &= ~VM_PROT_IS_MASK;
3989
3990         /*
3991          * Check arguments for validity
3992          */
3993         if ((target_map == VM_MAP_NULL) ||
3994             (cur_protection & ~VM_PROT_ALL) ||
3995             (max_protection & ~VM_PROT_ALL) ||
3996             (inheritance > VM_INHERIT_LAST_VALID) ||
3997             (try_prefault && (copy || !page_list)) ||
3998             initial_size == 0) {
3999                 return KERN_INVALID_ARGUMENT;
4000         }
4001
4002 #if __arm64__
4003         if (fourk) {
4004                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4005                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4006         } else
4007 #endif /* __arm64__ */
4008         {
4009                 map_addr = vm_map_trunc_page(*address,
4010                     VM_MAP_PAGE_MASK(target_map));
4011                 map_size = vm_map_round_page(initial_size,
4012                     VM_MAP_PAGE_MASK(target_map));
4013         }
4014         size = vm_object_round_page(initial_size);
4015
4016         /*
4017          * Find the vm object (if any) corresponding to this port.
4018          */
4019         if (!IP_VALID(port)) {
4020                 object = VM_OBJECT_NULL;
4021                 offset = 0;
4022                 copy = FALSE;
4023         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4024                 vm_named_entry_t        named_entry;
4025
4026                 named_entry = (vm_named_entry_t) port->ip_kobject;
4027
4028                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4029                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4030                         offset += named_entry->data_offset;
4031                 }
4032
4033                 /* a few checks to make sure user is obeying rules */
4034                 if (size == 0) {
4035                         if (offset >= named_entry->size) {
4036                                 return KERN_INVALID_RIGHT;
4037                         }
4038                         size = named_entry->size - offset;
4039                 }
4040                 if (mask_max_protection) {
4041                         max_protection &= named_entry->protection;
4042                 }
4043                 if (mask_cur_protection) {
4044                         cur_protection &= named_entry->protection;
4045                 }
4046                 if ((named_entry->protection & max_protection) !=
4047                     max_protection) {
4048                         return KERN_INVALID_RIGHT;
4049                 }
4050                 if ((named_entry->protection & cur_protection) !=
4051                     cur_protection) {
4052                         return KERN_INVALID_RIGHT;
4053                 }
4054                 if (offset + size < offset) {
4055                         /* overflow */
4056                         return KERN_INVALID_ARGUMENT;
4057                 }
4058                 if (named_entry->size < (offset + initial_size)) {
4059                         return KERN_INVALID_ARGUMENT;
4060                 }
4061
4062                 if (named_entry->is_copy) {
4063                         /* for a vm_map_copy, we can only map it whole */
4064                         if ((size != named_entry->size) &&
4065                             (vm_map_round_page(size,
4066                             VM_MAP_PAGE_MASK(target_map)) ==
4067                             named_entry->size)) {
4068                                 /* XXX FBDP use the rounded size... */
4069                                 size = vm_map_round_page(
4070                                         size,
4071                                         VM_MAP_PAGE_MASK(target_map));
4072                         }
4073
4074                         if (!(flags & VM_FLAGS_ANYWHERE) &&
4075                             (offset != 0 ||
4076                             size != named_entry->size)) {
4077                                 /*
4078                                  * XXX for a mapping at a "fixed" address,
4079                                  * we can't trim after mapping the whole
4080                                  * memory entry, so reject a request for a
4081                                  * partial mapping.
4082                                  */
4083                                 return KERN_INVALID_ARGUMENT;
4084                         }
4085                 }
4086
4087                 /* the callers parameter offset is defined to be the */
4088                 /* offset from beginning of named entry offset in object */
4089                 offset = offset + named_entry->offset;
4090
4091                 if (!VM_MAP_PAGE_ALIGNED(size,
4092                     VM_MAP_PAGE_MASK(target_map))) {
4093                         /*
4094                          * Let's not map more than requested;
4095                          * vm_map_enter() will handle this "not map-aligned"
4096                          * case.
4097                          */
4098                         map_size = size;
4099                 }
4100
4101                 named_entry_lock(named_entry);
4102                 if (named_entry->is_sub_map) {
4103                         vm_map_t                submap;
4104
4105                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4106                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4107                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4108                         }
4109
4110                         submap = named_entry->backing.map;
4111                         vm_map_lock(submap);
4112                         vm_map_reference(submap);
4113                         vm_map_unlock(submap);
4114                         named_entry_unlock(named_entry);
4115
4116                         vmk_flags.vmkf_submap = TRUE;
4117
4118                         result = vm_map_enter(target_map,
4119                             &map_addr,
4120                             map_size,
4121                             mask,
4122                             flags,
4123                             vmk_flags,
4124                             tag,
4125                             (vm_object_t)(uintptr_t) submap,
4126                             offset,
4127                             copy,
4128                             cur_protection,
4129                             max_protection,
4130                             inheritance);
4131                         if (result != KERN_SUCCESS) {
4132                                 vm_map_deallocate(submap);
4133                         } else {
4134                                 /*
4135                                  * No need to lock "submap" just to check its
4136                                  * "mapped" flag: that flag is never reset
4137                                  * once it's been set and if we race, we'll
4138                                  * just end up setting it twice, which is OK.
4139                                  */
4140                                 if (submap->mapped_in_other_pmaps == FALSE &&
4141                                     vm_map_pmap(submap) != PMAP_NULL &&
4142                                     vm_map_pmap(submap) !=
4143                                     vm_map_pmap(target_map)) {
4144                                         /*
4145                                          * This submap is being mapped in a map
4146                                          * that uses a different pmap.
4147                                          * Set its "mapped_in_other_pmaps" flag
4148                                          * to indicate that we now need to
4149                                          * remove mappings from all pmaps rather
4150                                          * than just the submap's pmap.
4151                                          */
4152                                         vm_map_lock(submap);
4153                                         submap->mapped_in_other_pmaps = TRUE;
4154                                         vm_map_unlock(submap);
4155                                 }
4156                                 *address = map_addr;
4157                         }
4158                         return result;
4159                 } else if (named_entry->is_copy) {
4160                         kern_return_t   kr;
4161                         vm_map_copy_t   copy_map;
4162                         vm_map_entry_t  copy_entry;
4163                         vm_map_offset_t copy_addr;
4164
4165                         if (flags & ~(VM_FLAGS_FIXED |
4166                             VM_FLAGS_ANYWHERE |
4167                             VM_FLAGS_OVERWRITE |
4168                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4169                             VM_FLAGS_RETURN_DATA_ADDR |
4170                             VM_FLAGS_ALIAS_MASK)) {
4171                                 named_entry_unlock(named_entry);
4172                                 return KERN_INVALID_ARGUMENT;
4173                         }
4174
4175                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4176                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4177                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4178                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4179                                         offset_in_mapping &= ~((signed)(0xFFF));
4180                                 }
4181                                 offset = vm_object_trunc_page(offset);
4182                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4183                         }
4184
4185                         copy_map = named_entry->backing.copy;
4186                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4187                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4188                                 /* unsupported type; should not happen */
4189                                 printf("vm_map_enter_mem_object: "
4190                                     "memory_entry->backing.copy "
4191                                     "unsupported type 0x%x\n",
4192                                     copy_map->type);
4193                                 named_entry_unlock(named_entry);
4194                                 return KERN_INVALID_ARGUMENT;
4195                         }
4196
4197                         /* reserve a contiguous range */
4198                         kr = vm_map_enter(target_map,
4199                             &map_addr,
4200                             /* map whole mem entry, trim later: */
4201                             named_entry->size,
4202                             mask,
4203                             flags & (VM_FLAGS_ANYWHERE |
4204                             VM_FLAGS_OVERWRITE |
4205                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4206                             VM_FLAGS_RETURN_DATA_ADDR),
4207                             vmk_flags,
4208                             tag,
4209                             VM_OBJECT_NULL,
4210                             0,
4211                             FALSE,               /* copy */
4212                             cur_protection,
4213                             max_protection,
4214                             inheritance);
4215                         if (kr != KERN_SUCCESS) {
4216                                 named_entry_unlock(named_entry);
4217                                 return kr;
4218                         }
4219
4220                         copy_addr = map_addr;
4221
4222                         for (copy_entry = vm_map_copy_first_entry(copy_map);
4223                             copy_entry != vm_map_copy_to_entry(copy_map);
4224                             copy_entry = copy_entry->vme_next) {
4225                                 int                     remap_flags;
4226                                 vm_map_kernel_flags_t   vmk_remap_flags;
4227                                 vm_map_t                copy_submap;
4228                                 vm_object_t             copy_object;
4229                                 vm_map_size_t           copy_size;
4230                                 vm_object_offset_t      copy_offset;
4231                                 int                     copy_vm_alias;
4232
4233                                 remap_flags = 0;
4234                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4235
4236                                 copy_object = VME_OBJECT(copy_entry);
4237                                 copy_offset = VME_OFFSET(copy_entry);
4238                                 copy_size = (copy_entry->vme_end -
4239                                     copy_entry->vme_start);
4240                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4241                                 if (copy_vm_alias == 0) {
4242                                         /*
4243                                          * Caller does not want a specific
4244                                          * alias for this new mapping:  use
4245                                          * the alias of the original mapping.
4246                                          */
4247                                         copy_vm_alias = VME_ALIAS(copy_entry);
4248                                 }
4249
4250                                 /* sanity check */
4251                                 if ((copy_addr + copy_size) >
4252                                     (map_addr +
4253                                     named_entry->size /* XXX full size */)) {
4254                                         /* over-mapping too much !? */
4255                                         kr = KERN_INVALID_ARGUMENT;
4256                                         /* abort */
4257                                         break;
4258                                 }
4259
4260                                 /* take a reference on the object */
4261                                 if (copy_entry->is_sub_map) {
4262                                         vmk_remap_flags.vmkf_submap = TRUE;
4263                                         copy_submap = VME_SUBMAP(copy_entry);
4264                                         vm_map_lock(copy_submap);
4265                                         vm_map_reference(copy_submap);
4266                                         vm_map_unlock(copy_submap);
4267                                         copy_object = (vm_object_t)(uintptr_t) copy_submap;
4268                                 } else if (!copy &&
4269                                     copy_object != VM_OBJECT_NULL &&
4270                                     (copy_entry->needs_copy ||
4271                                     copy_object->shadowed ||
4272                                     (!copy_object->true_share &&
4273                                     !copy_entry->is_shared &&
4274                                     copy_object->vo_size > copy_size))) {
4275                                         /*
4276                                          * We need to resolve our side of this
4277                                          * "symmetric" copy-on-write now; we
4278                                          * need a new object to map and share,
4279                                          * instead of the current one which
4280                                          * might still be shared with the
4281                                          * original mapping.
4282                                          *
4283                                          * Note: A "vm_map_copy_t" does not
4284                                          * have a lock but we're protected by
4285                                          * the named entry's lock here.
4286                                          */
4287                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4288                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
4289                                         if (!copy_entry->needs_copy &&
4290                                             copy_entry->protection & VM_PROT_WRITE) {
4291                                                 vm_prot_t prot;
4292
4293                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
4294                                                 vm_object_pmap_protect(copy_object,
4295                                                     copy_offset,
4296                                                     copy_size,
4297                                                     PMAP_NULL,
4298                                                     0,
4299                                                     prot);
4300                                         }
4301
4302                                         copy_entry->needs_copy = FALSE;
4303                                         copy_entry->is_shared = TRUE;
4304                                         copy_object = VME_OBJECT(copy_entry);
4305                                         copy_offset = VME_OFFSET(copy_entry);
4306                                         vm_object_lock(copy_object);
4307                                         vm_object_reference_locked(copy_object);
4308                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4309                                                 /* we're about to make a shared mapping of this object */
4310                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4311                                                 copy_object->true_share = TRUE;
4312                                         }
4313                                         vm_object_unlock(copy_object);
4314                                 } else {
4315                                         /*
4316                                          * We already have the right object
4317                                          * to map.
4318                                          */
4319                                         copy_object = VME_OBJECT(copy_entry);
4320                                         vm_object_reference(copy_object);
4321                                 }
4322
4323                                 /* over-map the object into destination */
4324                                 remap_flags |= flags;
4325                                 remap_flags |= VM_FLAGS_FIXED;
4326                                 remap_flags |= VM_FLAGS_OVERWRITE;
4327                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4328                                 if (!copy && !copy_entry->is_sub_map) {
4329                                         /*
4330                                          * copy-on-write should have been
4331                                          * resolved at this point, or we would
4332                                          * end up sharing instead of copying.
4333                                          */
4334                                         assert(!copy_entry->needs_copy);
4335                                 }
4336 #if !CONFIG_EMBEDDED
4337                                 if (copy_entry->used_for_jit) {
4338                                         vmk_remap_flags.vmkf_map_jit = TRUE;
4339                                 }
4340 #endif /* !CONFIG_EMBEDDED */
4341                                 kr = vm_map_enter(target_map,
4342                                     &copy_addr,
4343                                     copy_size,
4344                                     (vm_map_offset_t) 0,
4345                                     remap_flags,
4346                                     vmk_remap_flags,
4347                                     copy_vm_alias,
4348                                     copy_object,
4349                                     copy_offset,
4350                                     ((copy_object == NULL) ? FALSE : copy),
4351                                     cur_protection,
4352                                     max_protection,
4353                                     inheritance);
4354                                 if (kr != KERN_SUCCESS) {
4355                                         if (copy_entry->is_sub_map) {
4356                                                 vm_map_deallocate(copy_submap);
4357                                         } else {
4358                                                 vm_object_deallocate(copy_object);
4359                                         }
4360                                         /* abort */
4361                                         break;
4362                                 }
4363
4364                                 /* next mapping */
4365                                 copy_addr += copy_size;
4366                         }
4367
4368                         if (kr == KERN_SUCCESS) {
4369                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4370                                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4371                                         *address = map_addr + offset_in_mapping;
4372                                 } else {
4373                                         *address = map_addr;
4374                                 }
4375
4376                                 if (offset) {
4377                                         /*
4378                                          * Trim in front, from 0 to "offset".
4379                                          */
4380                                         vm_map_remove(target_map,
4381                                             map_addr,
4382                                             map_addr + offset,
4383                                             VM_MAP_REMOVE_NO_FLAGS);
4384                                         *address += offset;
4385                                 }
4386                                 if (offset + map_size < named_entry->size) {
4387                                         /*
4388                                          * Trim in back, from
4389                                          * "offset + map_size" to
4390                                          * "named_entry->size".
4391                                          */
4392                                         vm_map_remove(target_map,
4393                                             (map_addr +
4394                                             offset + map_size),
4395                                             (map_addr +
4396                                             named_entry->size),
4397                                             VM_MAP_REMOVE_NO_FLAGS);
4398                                 }
4399                         }
4400                         named_entry_unlock(named_entry);
4401
4402                         if (kr != KERN_SUCCESS) {
4403                                 if (!(flags & VM_FLAGS_OVERWRITE)) {
4404                                         /* deallocate the contiguous range */
4405                                         (void) vm_deallocate(target_map,
4406                                             map_addr,
4407                                             map_size);
4408                                 }
4409                         }
4410
4411                         return kr;
4412                 } else {
4413                         unsigned int    access;
4414                         vm_prot_t       protections;
4415                         unsigned int    wimg_mode;
4416
4417                         /* we are mapping a VM object */
4418
4419                         protections = named_entry->protection & VM_PROT_ALL;
4420                         access = GET_MAP_MEM(named_entry->protection);
4421
4422                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4423                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4424                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4425                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4426                                         offset_in_mapping &= ~((signed)(0xFFF));
4427                                 }
4428                                 offset = vm_object_trunc_page(offset);
4429                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4430                         }
4431
4432                         object = named_entry->backing.object;
4433                         assert(object != VM_OBJECT_NULL);
4434                         vm_object_lock(object);
4435                         named_entry_unlock(named_entry);
4436
4437                         vm_object_reference_locked(object);
4438
4439                         wimg_mode = object->wimg_bits;
4440                         vm_prot_to_wimg(access, &wimg_mode);
4441                         if (object->wimg_bits != wimg_mode) {
4442                                 vm_object_change_wimg_mode(object, wimg_mode);
4443                         }
4444
4445                         vm_object_unlock(object);
4446                 }
4447         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4448                 /*
4449                  * JMM - This is temporary until we unify named entries
4450                  * and raw memory objects.
4451                  *
4452                  * Detected fake ip_kotype for a memory object.  In
4453                  * this case, the port isn't really a port at all, but
4454                  * instead is just a raw memory object.
4455                  */
4456                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4457                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4458                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4459                 }
4460
4461                 object = memory_object_to_vm_object((memory_object_t)port);
4462                 if (object == VM_OBJECT_NULL) {
4463                         return KERN_INVALID_OBJECT;
4464                 }
4465                 vm_object_reference(object);
4466
4467                 /* wait for object (if any) to be ready */
4468                 if (object != VM_OBJECT_NULL) {
4469                         if (object == kernel_object) {
4470                                 printf("Warning: Attempt to map kernel object"
4471                                     " by a non-private kernel entity\n");
4472                                 return KERN_INVALID_OBJECT;
4473                         }
4474                         if (!object->pager_ready) {
4475                                 vm_object_lock(object);
4476
4477                                 while (!object->pager_ready) {
4478                                         vm_object_wait(object,
4479                                             VM_OBJECT_EVENT_PAGER_READY,
4480                                             THREAD_UNINT);
4481                                         vm_object_lock(object);
4482                                 }
4483                                 vm_object_unlock(object);
4484                         }
4485                 }
4486         } else {
4487                 return KERN_INVALID_OBJECT;
4488         }
4489
4490         if (object != VM_OBJECT_NULL &&
4491             object->named &&
4492             object->pager != MEMORY_OBJECT_NULL &&
4493             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4494                 memory_object_t pager;
4495                 vm_prot_t       pager_prot;
4496                 kern_return_t   kr;
4497
4498                 /*
4499                  * For "named" VM objects, let the pager know that the
4500                  * memory object is being mapped.  Some pagers need to keep
4501                  * track of this, to know when they can reclaim the memory
4502                  * object, for example.
4503                  * VM calls memory_object_map() for each mapping (specifying
4504                  * the protection of each mapping) and calls
4505                  * memory_object_last_unmap() when all the mappings are gone.
4506                  */
4507                 pager_prot = max_protection;
4508                 if (copy) {
4509                         /*
4510                          * Copy-On-Write mapping: won't modify the
4511                          * memory object.
4512                          */
4513                         pager_prot &= ~VM_PROT_WRITE;
4514                 }
4515                 vm_object_lock(object);
4516                 pager = object->pager;
4517                 if (object->named &&
4518                     pager != MEMORY_OBJECT_NULL &&
4519                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4520                         assert(object->pager_ready);
4521                         vm_object_mapping_wait(object, THREAD_UNINT);
4522                         vm_object_mapping_begin(object);
4523                         vm_object_unlock(object);
4524
4525                         kr = memory_object_map(pager, pager_prot);
4526                         assert(kr == KERN_SUCCESS);
4527
4528                         vm_object_lock(object);
4529                         vm_object_mapping_end(object);
4530                 }
4531                 vm_object_unlock(object);
4532         }
4533
4534         /*
4535          *      Perform the copy if requested
4536          */
4537
4538         if (copy) {
4539                 vm_object_t             new_object;
4540                 vm_object_offset_t      new_offset;
4541
4542                 result = vm_object_copy_strategically(object, offset,
4543                     map_size,
4544                     &new_object, &new_offset,
4545                     &copy);
4546
4547
4548                 if (result == KERN_MEMORY_RESTART_COPY) {
4549                         boolean_t success;
4550                         boolean_t src_needs_copy;
4551
4552                         /*
4553                          * XXX
4554                          * We currently ignore src_needs_copy.
4555                          * This really is the issue of how to make
4556                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4557                          * non-kernel users to use. Solution forthcoming.
4558                          * In the meantime, since we don't allow non-kernel
4559                          * memory managers to specify symmetric copy,
4560                          * we won't run into problems here.
4561                          */
4562                         new_object = object;
4563                         new_offset = offset;
4564                         success = vm_object_copy_quickly(&new_object,
4565                             new_offset,
4566                             map_size,
4567                             &src_needs_copy,
4568                             &copy);
4569                         assert(success);
4570                         result = KERN_SUCCESS;
4571                 }
4572                 /*
4573                  *      Throw away the reference to the
4574                  *      original object, as it won't be mapped.
4575                  */
4576
4577                 vm_object_deallocate(object);
4578
4579                 if (result != KERN_SUCCESS) {
4580                         return result;
4581                 }
4582
4583                 object = new_object;
4584                 offset = new_offset;
4585         }
4586
4587         /*
4588          * If non-kernel users want to try to prefault pages, the mapping and prefault
4589          * needs to be atomic.
4590          */
4591         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4592         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4593
4594 #if __arm64__
4595         if (fourk) {
4596                 /* map this object in a "4K" pager */
4597                 result = vm_map_enter_fourk(target_map,
4598                     &map_addr,
4599                     map_size,
4600                     (vm_map_offset_t) mask,
4601                     flags,
4602                     vmk_flags,
4603                     tag,
4604                     object,
4605                     offset,
4606                     copy,
4607                     cur_protection,
4608                     max_protection,
4609                     inheritance);
4610         } else
4611 #endif /* __arm64__ */
4612         {
4613                 result = vm_map_enter(target_map,
4614                     &map_addr, map_size,
4615                     (vm_map_offset_t)mask,
4616                     flags,
4617                     vmk_flags,
4618                     tag,
4619                     object, offset,
4620                     copy,
4621                     cur_protection, max_protection,
4622                     inheritance);
4623         }
4624         if (result != KERN_SUCCESS) {
4625                 vm_object_deallocate(object);
4626         }
4627
4628         /*
4629          * Try to prefault, and do not forget to release the vm map lock.
4630          */
4631         if (result == KERN_SUCCESS && try_prefault) {
4632                 mach_vm_address_t va = map_addr;
4633                 kern_return_t kr = KERN_SUCCESS;
4634                 unsigned int i = 0;
4635                 int pmap_options;
4636
4637                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4638                 if (object->internal) {
4639                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4640                 }
4641
4642                 for (i = 0; i < page_list_count; ++i) {
4643                         if (!UPL_VALID_PAGE(page_list, i)) {
4644                                 if (kernel_prefault) {
4645                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4646                                         result = KERN_MEMORY_ERROR;
4647                                         break;
4648                                 }
4649                         } else {
4650                                 /*
4651                                  * If this function call failed, we should stop
4652                                  * trying to optimize, other calls are likely
4653                                  * going to fail too.
4654                                  *
4655                                  * We are not gonna report an error for such
4656                                  * failure though. That's an optimization, not
4657                                  * something critical.
4658                                  */
4659                                 kr = pmap_enter_options(target_map->pmap,
4660                                     va, UPL_PHYS_PAGE(page_list, i),
4661                                     cur_protection, VM_PROT_NONE,
4662                                     0, TRUE, pmap_options, NULL);
4663                                 if (kr != KERN_SUCCESS) {
4664                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4665                                         if (kernel_prefault) {
4666                                                 result = kr;
4667                                         }
4668                                         break;
4669                                 }
4670                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4671                         }
4672
4673                         /* Next virtual address */
4674                         va += PAGE_SIZE;
4675                 }
4676                 if (vmk_flags.vmkf_keep_map_locked) {
4677                         vm_map_unlock(target_map);
4678                 }
4679         }
4680
4681         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4682             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4683                 *address = map_addr + offset_in_mapping;
4684         } else {
4685                 *address = map_addr;
4686         }
4687         return result;
4688 }
4689
4690 kern_return_t
4691 vm_map_enter_mem_object(
4692         vm_map_t                target_map,
4693         vm_map_offset_t         *address,
4694         vm_map_size_t           initial_size,
4695         vm_map_offset_t         mask,
4696         int                     flags,
4697         vm_map_kernel_flags_t   vmk_flags,
4698         vm_tag_t                tag,
4699         ipc_port_t              port,
4700         vm_object_offset_t      offset,
4701         boolean_t               copy,
4702         vm_prot_t               cur_protection,
4703         vm_prot_t               max_protection,
4704         vm_inherit_t            inheritance)
4705 {
4706         kern_return_t ret;
4707
4708         ret = vm_map_enter_mem_object_helper(target_map,
4709             address,
4710             initial_size,
4711             mask,
4712             flags,
4713             vmk_flags,
4714             tag,
4715             port,
4716             offset,
4717             copy,
4718             cur_protection,
4719             max_protection,
4720             inheritance,
4721             NULL,
4722             0);
4723
4724 #if KASAN
4725         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4726                 kasan_notify_address(*address, initial_size);
4727         }
4728 #endif
4729
4730         return ret;
4731 }
4732
4733 kern_return_t
4734 vm_map_enter_mem_object_prefault(
4735         vm_map_t                target_map,
4736         vm_map_offset_t         *address,
4737         vm_map_size_t           initial_size,
4738         vm_map_offset_t         mask,
4739         int                     flags,
4740         vm_map_kernel_flags_t   vmk_flags,
4741         vm_tag_t                tag,
4742         ipc_port_t              port,
4743         vm_object_offset_t      offset,
4744         vm_prot_t               cur_protection,
4745         vm_prot_t               max_protection,
4746         upl_page_list_ptr_t     page_list,
4747         unsigned int            page_list_count)
4748 {
4749         kern_return_t ret;
4750
4751         ret = vm_map_enter_mem_object_helper(target_map,
4752             address,
4753             initial_size,
4754             mask,
4755             flags,
4756             vmk_flags,
4757             tag,
4758             port,
4759             offset,
4760             FALSE,
4761             cur_protection,
4762             max_protection,
4763             VM_INHERIT_DEFAULT,
4764             page_list,
4765             page_list_count);
4766
4767 #if KASAN
4768         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4769                 kasan_notify_address(*address, initial_size);
4770         }
4771 #endif
4772
4773         return ret;
4774 }
4775
4776
4777 kern_return_t
4778 vm_map_enter_mem_object_control(
4779         vm_map_t                target_map,
4780         vm_map_offset_t         *address,
4781         vm_map_size_t           initial_size,
4782         vm_map_offset_t         mask,
4783         int                     flags,
4784         vm_map_kernel_flags_t   vmk_flags,
4785         vm_tag_t                tag,
4786         memory_object_control_t control,
4787         vm_object_offset_t      offset,
4788         boolean_t               copy,
4789         vm_prot_t               cur_protection,
4790         vm_prot_t               max_protection,
4791         vm_inherit_t            inheritance)
4792 {
4793         vm_map_address_t        map_addr;
4794         vm_map_size_t           map_size;
4795         vm_object_t             object;
4796         vm_object_size_t        size;
4797         kern_return_t           result;
4798         memory_object_t         pager;
4799         vm_prot_t               pager_prot;
4800         kern_return_t           kr;
4801 #if __arm64__
4802         boolean_t               fourk = vmk_flags.vmkf_fourk;
4803 #endif /* __arm64__ */
4804
4805         /*
4806          * Check arguments for validity
4807          */
4808         if ((target_map == VM_MAP_NULL) ||
4809             (cur_protection & ~VM_PROT_ALL) ||
4810             (max_protection & ~VM_PROT_ALL) ||
4811             (inheritance > VM_INHERIT_LAST_VALID) ||
4812             initial_size == 0) {
4813                 return KERN_INVALID_ARGUMENT;
4814         }
4815
4816 #if __arm64__
4817         if (fourk) {
4818                 map_addr = vm_map_trunc_page(*address,
4819                     FOURK_PAGE_MASK);
4820                 map_size = vm_map_round_page(initial_size,
4821                     FOURK_PAGE_MASK);
4822         } else
4823 #endif /* __arm64__ */
4824         {
4825                 map_addr = vm_map_trunc_page(*address,
4826                     VM_MAP_PAGE_MASK(target_map));
4827                 map_size = vm_map_round_page(initial_size,
4828                     VM_MAP_PAGE_MASK(target_map));
4829         }
4830         size = vm_object_round_page(initial_size);
4831
4832         object = memory_object_control_to_vm_object(control);
4833
4834         if (object == VM_OBJECT_NULL) {
4835                 return KERN_INVALID_OBJECT;
4836         }
4837
4838         if (object == kernel_object) {
4839                 printf("Warning: Attempt to map kernel object"
4840                     " by a non-private kernel entity\n");
4841                 return KERN_INVALID_OBJECT;
4842         }
4843
4844         vm_object_lock(object);
4845         object->ref_count++;
4846         vm_object_res_reference(object);
4847
4848         /*
4849          * For "named" VM objects, let the pager know that the
4850          * memory object is being mapped.  Some pagers need to keep
4851          * track of this, to know when they can reclaim the memory
4852          * object, for example.
4853          * VM calls memory_object_map() for each mapping (specifying
4854          * the protection of each mapping) and calls
4855          * memory_object_last_unmap() when all the mappings are gone.
4856          */
4857         pager_prot = max_protection;
4858         if (copy) {
4859                 pager_prot &= ~VM_PROT_WRITE;
4860         }
4861         pager = object->pager;
4862         if (object->named &&
4863             pager != MEMORY_OBJECT_NULL &&
4864             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4865                 assert(object->pager_ready);
4866                 vm_object_mapping_wait(object, THREAD_UNINT);
4867                 vm_object_mapping_begin(object);
4868                 vm_object_unlock(object);
4869
4870                 kr = memory_object_map(pager, pager_prot);
4871                 assert(kr == KERN_SUCCESS);
4872
4873                 vm_object_lock(object);
4874                 vm_object_mapping_end(object);
4875         }
4876         vm_object_unlock(object);
4877
4878         /*
4879          *      Perform the copy if requested
4880          */
4881
4882         if (copy) {
4883                 vm_object_t             new_object;
4884                 vm_object_offset_t      new_offset;
4885
4886                 result = vm_object_copy_strategically(object, offset, size,
4887                     &new_object, &new_offset,
4888                     &copy);
4889
4890
4891                 if (result == KERN_MEMORY_RESTART_COPY) {
4892                         boolean_t success;
4893                         boolean_t src_needs_copy;
4894
4895                         /*
4896                          * XXX
4897                          * We currently ignore src_needs_copy.
4898                          * This really is the issue of how to make
4899                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4900                          * non-kernel users to use. Solution forthcoming.
4901                          * In the meantime, since we don't allow non-kernel
4902                          * memory managers to specify symmetric copy,
4903                          * we won't run into problems here.
4904                          */
4905                         new_object = object;
4906                         new_offset = offset;
4907                         success = vm_object_copy_quickly(&new_object,
4908                             new_offset, size,
4909                             &src_needs_copy,
4910                             &copy);
4911                         assert(success);
4912                         result = KERN_SUCCESS;
4913                 }
4914                 /*
4915                  *      Throw away the reference to the
4916                  *      original object, as it won't be mapped.
4917                  */
4918
4919                 vm_object_deallocate(object);
4920
4921                 if (result != KERN_SUCCESS) {
4922                         return result;
4923                 }
4924
4925                 object = new_object;
4926                 offset = new_offset;
4927         }
4928
4929 #if __arm64__
4930         if (fourk) {
4931                 result = vm_map_enter_fourk(target_map,
4932                     &map_addr,
4933                     map_size,
4934                     (vm_map_offset_t)mask,
4935                     flags,
4936                     vmk_flags,
4937                     tag,
4938                     object, offset,
4939                     copy,
4940                     cur_protection, max_protection,
4941                     inheritance);
4942         } else
4943 #endif /* __arm64__ */
4944         {
4945                 result = vm_map_enter(target_map,
4946                     &map_addr, map_size,
4947                     (vm_map_offset_t)mask,
4948                     flags,
4949                     vmk_flags,
4950                     tag,
4951                     object, offset,
4952                     copy,
4953                     cur_protection, max_protection,
4954                     inheritance);
4955         }
4956         if (result != KERN_SUCCESS) {
4957                 vm_object_deallocate(object);
4958         }
4959         *address = map_addr;
4960
4961         return result;
4962 }
4963
4964
4965 #if     VM_CPM
4966
4967 #ifdef MACH_ASSERT
4968 extern pmap_paddr_t     avail_start, avail_end;
4969 #endif
4970
4971 /*
4972  *      Allocate memory in the specified map, with the caveat that
4973  *      the memory is physically contiguous.  This call may fail
4974  *      if the system can't find sufficient contiguous memory.
4975  *      This call may cause or lead to heart-stopping amounts of
4976  *      paging activity.
4977  *
4978  *      Memory obtained from this call should be freed in the
4979  *      normal way, viz., via vm_deallocate.
4980  */
4981 kern_return_t
4982 vm_map_enter_cpm(
4983         vm_map_t                map,
4984         vm_map_offset_t *addr,
4985         vm_map_size_t           size,
4986         int                     flags)
4987 {
4988         vm_object_t             cpm_obj;
4989         pmap_t                  pmap;
4990         vm_page_t               m, pages;
4991         kern_return_t           kr;
4992         vm_map_offset_t         va, start, end, offset;
4993 #if     MACH_ASSERT
4994         vm_map_offset_t         prev_addr = 0;
4995 #endif  /* MACH_ASSERT */
4996
4997         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4998         vm_tag_t tag;
4999
5000         VM_GET_FLAGS_ALIAS(flags, tag);
5001
5002         if (size == 0) {
5003                 *addr = 0;
5004                 return KERN_SUCCESS;
5005         }
5006         if (anywhere) {
5007                 *addr = vm_map_min(map);
5008         } else {
5009                 *addr = vm_map_trunc_page(*addr,
5010                     VM_MAP_PAGE_MASK(map));
5011         }
5012         size = vm_map_round_page(size,
5013             VM_MAP_PAGE_MASK(map));
5014
5015         /*
5016          * LP64todo - cpm_allocate should probably allow
5017          * allocations of >4GB, but not with the current
5018          * algorithm, so just cast down the size for now.
5019          */
5020         if (size > VM_MAX_ADDRESS) {
5021                 return KERN_RESOURCE_SHORTAGE;
5022         }
5023         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5024             &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5025                 return kr;
5026         }
5027
5028         cpm_obj = vm_object_allocate((vm_object_size_t)size);
5029         assert(cpm_obj != VM_OBJECT_NULL);
5030         assert(cpm_obj->internal);
5031         assert(cpm_obj->vo_size == (vm_object_size_t)size);
5032         assert(cpm_obj->can_persist == FALSE);
5033         assert(cpm_obj->pager_created == FALSE);
5034         assert(cpm_obj->pageout == FALSE);
5035         assert(cpm_obj->shadow == VM_OBJECT_NULL);
5036
5037         /*
5038          *      Insert pages into object.
5039          */
5040
5041         vm_object_lock(cpm_obj);
5042         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5043                 m = pages;
5044                 pages = NEXT_PAGE(m);
5045                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5046
5047                 assert(!m->vmp_gobbled);
5048                 assert(!m->vmp_wanted);
5049                 assert(!m->vmp_pageout);
5050                 assert(!m->vmp_tabled);
5051                 assert(VM_PAGE_WIRED(m));
5052                 assert(m->vmp_busy);
5053                 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5054
5055                 m->vmp_busy = FALSE;
5056                 vm_page_insert(m, cpm_obj, offset);
5057         }
5058         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5059         vm_object_unlock(cpm_obj);
5060
5061         /*
5062          *      Hang onto a reference on the object in case a
5063          *      multi-threaded application for some reason decides
5064          *      to deallocate the portion of the address space into
5065          *      which we will insert this object.
5066          *
5067          *      Unfortunately, we must insert the object now before
5068          *      we can talk to the pmap module about which addresses
5069          *      must be wired down.  Hence, the race with a multi-
5070          *      threaded app.
5071          */
5072         vm_object_reference(cpm_obj);
5073
5074         /*
5075          *      Insert object into map.
5076          */
5077
5078         kr = vm_map_enter(
5079                 map,
5080                 addr,
5081                 size,
5082                 (vm_map_offset_t)0,
5083                 flags,
5084                 VM_MAP_KERNEL_FLAGS_NONE,
5085                 cpm_obj,
5086                 (vm_object_offset_t)0,
5087                 FALSE,
5088                 VM_PROT_ALL,
5089                 VM_PROT_ALL,
5090                 VM_INHERIT_DEFAULT);
5091
5092         if (kr != KERN_SUCCESS) {
5093                 /*
5094                  *      A CPM object doesn't have can_persist set,
5095                  *      so all we have to do is deallocate it to
5096                  *      free up these pages.
5097                  */
5098                 assert(cpm_obj->pager_created == FALSE);
5099                 assert(cpm_obj->can_persist == FALSE);
5100                 assert(cpm_obj->pageout == FALSE);
5101                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5102                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5103                 vm_object_deallocate(cpm_obj); /* kill creation ref */
5104         }
5105
5106         /*
5107          *      Inform the physical mapping system that the
5108          *      range of addresses may not fault, so that
5109          *      page tables and such can be locked down as well.
5110          */
5111         start = *addr;
5112         end = start + size;
5113         pmap = vm_map_pmap(map);
5114         pmap_pageable(pmap, start, end, FALSE);
5115
5116         /*
5117          *      Enter each page into the pmap, to avoid faults.
5118          *      Note that this loop could be coded more efficiently,
5119          *      if the need arose, rather than looking up each page
5120          *      again.
5121          */
5122         for (offset = 0, va = start; offset < size;
5123             va += PAGE_SIZE, offset += PAGE_SIZE) {
5124                 int type_of_fault;
5125
5126                 vm_object_lock(cpm_obj);
5127                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5128                 assert(m != VM_PAGE_NULL);
5129
5130                 vm_page_zero_fill(m);
5131
5132                 type_of_fault = DBG_ZERO_FILL_FAULT;
5133
5134                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5135                     VM_PAGE_WIRED(m),
5136                     FALSE,                             /* change_wiring */
5137                     VM_KERN_MEMORY_NONE,                             /* tag - not wiring */
5138                     FALSE,                             /* no_cache */
5139                     FALSE,                             /* cs_bypass */
5140                     0,                                 /* user_tag */
5141                     0,                             /* pmap_options */
5142                     NULL,                              /* need_retry */
5143                     &type_of_fault);
5144
5145                 vm_object_unlock(cpm_obj);
5146         }
5147
5148 #if     MACH_ASSERT
5149         /*
5150          *      Verify ordering in address space.
5151          */
5152         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5153                 vm_object_lock(cpm_obj);
5154                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5155                 vm_object_unlock(cpm_obj);
5156                 if (m == VM_PAGE_NULL) {
5157                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5158                             cpm_obj, (uint64_t)offset);
5159                 }
5160                 assert(m->vmp_tabled);
5161                 assert(!m->vmp_busy);
5162                 assert(!m->vmp_wanted);
5163                 assert(!m->vmp_fictitious);
5164                 assert(!m->vmp_private);
5165                 assert(!m->vmp_absent);
5166                 assert(!m->vmp_error);
5167                 assert(!m->vmp_cleaning);
5168                 assert(!m->vmp_laundry);
5169                 assert(!m->vmp_precious);
5170                 assert(!m->vmp_clustered);
5171                 if (offset != 0) {
5172                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5173                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5174                                     (uint64_t)start, (uint64_t)end, (uint64_t)va);
5175                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5176                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5177                                 panic("vm_allocate_cpm:  pages not contig!");
5178                         }
5179                 }
5180                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5181         }
5182 #endif  /* MACH_ASSERT */
5183
5184         vm_object_deallocate(cpm_obj); /* kill extra ref */
5185
5186         return kr;
5187 }
5188
5189
5190 #else   /* VM_CPM */
5191
5192 /*
5193  *      Interface is defined in all cases, but unless the kernel
5194  *      is built explicitly for this option, the interface does
5195  *      nothing.
5196  */
5197
5198 kern_return_t
5199 vm_map_enter_cpm(
5200         __unused vm_map_t       map,
5201         __unused vm_map_offset_t        *addr,
5202         __unused vm_map_size_t  size,
5203         __unused int            flags)
5204 {
5205         return KERN_FAILURE;
5206 }
5207 #endif /* VM_CPM */
5208
5209 /* Not used without nested pmaps */
5210 #ifndef NO_NESTED_PMAP
5211 /*
5212  * Clip and unnest a portion of a nested submap mapping.
5213  */
5214
5215
5216 static void
5217 vm_map_clip_unnest(
5218         vm_map_t        map,
5219         vm_map_entry_t  entry,
5220         vm_map_offset_t start_unnest,
5221         vm_map_offset_t end_unnest)
5222 {
5223         vm_map_offset_t old_start_unnest = start_unnest;
5224         vm_map_offset_t old_end_unnest = end_unnest;
5225
5226         assert(entry->is_sub_map);
5227         assert(VME_SUBMAP(entry) != NULL);
5228         assert(entry->use_pmap);
5229
5230         /*
5231          * Query the platform for the optimal unnest range.
5232          * DRK: There's some duplication of effort here, since
5233          * callers may have adjusted the range to some extent. This
5234          * routine was introduced to support 1GiB subtree nesting
5235          * for x86 platforms, which can also nest on 2MiB boundaries
5236          * depending on size/alignment.
5237          */
5238         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5239                 assert(VME_SUBMAP(entry)->is_nested_map);
5240                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5241                 log_unnest_badness(map,
5242                     old_start_unnest,
5243                     old_end_unnest,
5244                     VME_SUBMAP(entry)->is_nested_map,
5245                     (entry->vme_start +
5246                     VME_SUBMAP(entry)->lowest_unnestable_start -
5247                     VME_OFFSET(entry)));
5248         }
5249
5250         if (entry->vme_start > start_unnest ||
5251             entry->vme_end < end_unnest) {
5252                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5253                     "bad nested entry: start=0x%llx end=0x%llx\n",
5254                     (long long)start_unnest, (long long)end_unnest,
5255                     (long long)entry->vme_start, (long long)entry->vme_end);
5256         }
5257
5258         if (start_unnest > entry->vme_start) {
5259                 _vm_map_clip_start(&map->hdr,
5260                     entry,
5261                     start_unnest);
5262                 if (map->holelistenabled) {
5263                         vm_map_store_update_first_free(map, NULL, FALSE);
5264                 } else {
5265                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5266                 }
5267         }
5268         if (entry->vme_end > end_unnest) {
5269                 _vm_map_clip_end(&map->hdr,
5270                     entry,
5271                     end_unnest);
5272                 if (map->holelistenabled) {
5273                         vm_map_store_update_first_free(map, NULL, FALSE);
5274                 } else {
5275                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5276                 }
5277         }
5278
5279         pmap_unnest(map->pmap,
5280             entry->vme_start,
5281             entry->vme_end - entry->vme_start);
5282         if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5283                 /* clean up parent map/maps */
5284                 vm_map_submap_pmap_clean(
5285                         map, entry->vme_start,
5286                         entry->vme_end,
5287                         VME_SUBMAP(entry),
5288                         VME_OFFSET(entry));
5289         }
5290         entry->use_pmap = FALSE;
5291         if ((map->pmap != kernel_pmap) &&
5292             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5293                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5294         }
5295 }
5296 #endif  /* NO_NESTED_PMAP */
5297
5298 /*
5299  *      vm_map_clip_start:      [ internal use only ]
5300  *
5301  *      Asserts that the given entry begins at or after
5302  *      the specified address; if necessary,
5303  *      it splits the entry into two.
5304  */
5305 void
5306 vm_map_clip_start(
5307         vm_map_t        map,
5308         vm_map_entry_t  entry,
5309         vm_map_offset_t startaddr)
5310 {
5311 #ifndef NO_NESTED_PMAP
5312         if (entry->is_sub_map &&
5313             entry->use_pmap &&
5314             startaddr >= entry->vme_start) {
5315                 vm_map_offset_t start_unnest, end_unnest;
5316
5317                 /*
5318                  * Make sure "startaddr" is no longer in a nested range
5319                  * before we clip.  Unnest only the minimum range the platform
5320                  * can handle.
5321                  * vm_map_clip_unnest may perform additional adjustments to
5322                  * the unnest range.
5323                  */
5324                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5325                 end_unnest = start_unnest + pmap_nesting_size_min;
5326                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5327         }
5328 #endif /* NO_NESTED_PMAP */
5329         if (startaddr > entry->vme_start) {
5330                 if (VME_OBJECT(entry) &&
5331                     !entry->is_sub_map &&
5332                     VME_OBJECT(entry)->phys_contiguous) {
5333                         pmap_remove(map->pmap,
5334                             (addr64_t)(entry->vme_start),
5335                             (addr64_t)(entry->vme_end));
5336                 }
5337                 if (entry->vme_atomic) {
5338                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5339                 }
5340
5341                 DTRACE_VM5(
5342                         vm_map_clip_start,
5343                         vm_map_t, map,
5344                         vm_map_offset_t, entry->vme_start,
5345                         vm_map_offset_t, entry->vme_end,
5346                         vm_map_offset_t, startaddr,
5347                         int, VME_ALIAS(entry));
5348
5349                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5350                 if (map->holelistenabled) {
5351                         vm_map_store_update_first_free(map, NULL, FALSE);
5352                 } else {
5353                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5354                 }
5355         }
5356 }
5357
5358
5359 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5360         MACRO_BEGIN \
5361         if ((startaddr) > (entry)->vme_start) \
5362                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5363         MACRO_END
5364
5365 /*
5366  *      This routine is called only when it is known that
5367  *      the entry must be split.
5368  */
5369 static void
5370 _vm_map_clip_start(
5371         struct vm_map_header    *map_header,
5372         vm_map_entry_t          entry,
5373         vm_map_offset_t         start)
5374 {
5375         vm_map_entry_t  new_entry;
5376
5377         /*
5378          *      Split off the front portion --
5379          *      note that we must insert the new
5380          *      entry BEFORE this one, so that
5381          *      this entry has the specified starting
5382          *      address.
5383          */
5384
5385         if (entry->map_aligned) {
5386                 assert(VM_MAP_PAGE_ALIGNED(start,
5387                     VM_MAP_HDR_PAGE_MASK(map_header)));
5388         }
5389
5390         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5391         vm_map_entry_copy_full(new_entry, entry);
5392
5393         new_entry->vme_end = start;
5394         assert(new_entry->vme_start < new_entry->vme_end);
5395         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5396         assert(start < entry->vme_end);
5397         entry->vme_start = start;
5398
5399         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5400
5401         if (entry->is_sub_map) {
5402                 vm_map_reference(VME_SUBMAP(new_entry));
5403         } else {
5404                 vm_object_reference(VME_OBJECT(new_entry));
5405         }
5406 }
5407
5408
5409 /*
5410  *      vm_map_clip_end:        [ internal use only ]
5411  *
5412  *      Asserts that the given entry ends at or before
5413  *      the specified address; if necessary,
5414  *      it splits the entry into two.
5415  */
5416 void
5417 vm_map_clip_end(
5418         vm_map_t        map,
5419         vm_map_entry_t  entry,
5420         vm_map_offset_t endaddr)
5421 {
5422         if (endaddr > entry->vme_end) {
5423                 /*
5424                  * Within the scope of this clipping, limit "endaddr" to
5425                  * the end of this map entry...
5426                  */
5427                 endaddr = entry->vme_end;
5428         }
5429 #ifndef NO_NESTED_PMAP
5430         if (entry->is_sub_map && entry->use_pmap) {
5431                 vm_map_offset_t start_unnest, end_unnest;
5432
5433                 /*
5434                  * Make sure the range between the start of this entry and
5435                  * the new "endaddr" is no longer nested before we clip.
5436                  * Unnest only the minimum range the platform can handle.
5437                  * vm_map_clip_unnest may perform additional adjustments to
5438                  * the unnest range.
5439                  */
5440                 start_unnest = entry->vme_start;
5441                 end_unnest =
5442                     (endaddr + pmap_nesting_size_min - 1) &
5443                     ~(pmap_nesting_size_min - 1);
5444                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5445         }
5446 #endif /* NO_NESTED_PMAP */
5447         if (endaddr < entry->vme_end) {
5448                 if (VME_OBJECT(entry) &&
5449                     !entry->is_sub_map &&
5450                     VME_OBJECT(entry)->phys_contiguous) {
5451                         pmap_remove(map->pmap,
5452                             (addr64_t)(entry->vme_start),
5453                             (addr64_t)(entry->vme_end));
5454                 }
5455                 if (entry->vme_atomic) {
5456                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5457                 }
5458                 DTRACE_VM5(
5459                         vm_map_clip_end,
5460                         vm_map_t, map,
5461                         vm_map_offset_t, entry->vme_start,
5462                         vm_map_offset_t, entry->vme_end,
5463                         vm_map_offset_t, endaddr,
5464                         int, VME_ALIAS(entry));
5465
5466                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5467                 if (map->holelistenabled) {
5468                         vm_map_store_update_first_free(map, NULL, FALSE);
5469                 } else {
5470                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5471                 }
5472         }
5473 }
5474
5475
5476 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5477         MACRO_BEGIN \
5478         if ((endaddr) < (entry)->vme_end) \
5479                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5480         MACRO_END
5481
5482 /*
5483  *      This routine is called only when it is known that
5484  *      the entry must be split.
5485  */
5486 static void
5487 _vm_map_clip_end(
5488         struct vm_map_header    *map_header,
5489         vm_map_entry_t          entry,
5490         vm_map_offset_t         end)
5491 {
5492         vm_map_entry_t  new_entry;
5493
5494         /*
5495          *      Create a new entry and insert it
5496          *      AFTER the specified entry
5497          */
5498
5499         if (entry->map_aligned) {
5500                 assert(VM_MAP_PAGE_ALIGNED(end,
5501                     VM_MAP_HDR_PAGE_MASK(map_header)));
5502         }
5503
5504         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5505         vm_map_entry_copy_full(new_entry, entry);
5506
5507         assert(entry->vme_start < end);
5508         new_entry->vme_start = entry->vme_end = end;
5509         VME_OFFSET_SET(new_entry,
5510             VME_OFFSET(new_entry) + (end - entry->vme_start));
5511         assert(new_entry->vme_start < new_entry->vme_end);
5512
5513         _vm_map_store_entry_link(map_header, entry, new_entry);
5514
5515         if (entry->is_sub_map) {
5516                 vm_map_reference(VME_SUBMAP(new_entry));
5517         } else {
5518                 vm_object_reference(VME_OBJECT(new_entry));
5519         }
5520 }
5521
5522
5523 /*
5524  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5525  *
5526  *      Asserts that the starting and ending region
5527  *      addresses fall within the valid range of the map.
5528  */
5529 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5530         MACRO_BEGIN                             \
5531         if (start < vm_map_min(map))            \
5532                 start = vm_map_min(map);        \
5533         if (end > vm_map_max(map))              \
5534                 end = vm_map_max(map);          \
5535         if (start > end)                        \
5536                 start = end;                    \
5537         MACRO_END
5538
5539 /*
5540  *      vm_map_range_check:     [ internal use only ]
5541  *
5542  *      Check that the region defined by the specified start and
5543  *      end addresses are wholly contained within a single map
5544  *      entry or set of adjacent map entries of the spacified map,
5545  *      i.e. the specified region contains no unmapped space.
5546  *      If any or all of the region is unmapped, FALSE is returned.
5547  *      Otherwise, TRUE is returned and if the output argument 'entry'
5548  *      is not NULL it points to the map entry containing the start
5549  *      of the region.
5550  *
5551  *      The map is locked for reading on entry and is left locked.
5552  */
5553 static boolean_t
5554 vm_map_range_check(
5555         vm_map_t                map,
5556         vm_map_offset_t         start,
5557         vm_map_offset_t         end,
5558         vm_map_entry_t          *entry)
5559 {
5560         vm_map_entry_t          cur;
5561         vm_map_offset_t         prev;
5562
5563         /*
5564          *      Basic sanity checks first
5565          */
5566         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5567                 return FALSE;
5568         }
5569
5570         /*
5571          *      Check first if the region starts within a valid
5572          *      mapping for the map.
5573          */
5574         if (!vm_map_lookup_entry(map, start, &cur)) {
5575                 return FALSE;
5576         }
5577
5578         /*
5579          *      Optimize for the case that the region is contained
5580          *      in a single map entry.
5581          */
5582         if (entry != (vm_map_entry_t *) NULL) {
5583                 *entry = cur;
5584         }
5585         if (end <= cur->vme_end) {
5586                 return TRUE;
5587         }
5588
5589         /*
5590          *      If the region is not wholly contained within a
5591          *      single entry, walk the entries looking for holes.
5592          */
5593         prev = cur->vme_end;
5594         cur = cur->vme_next;
5595         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5596                 if (end <= cur->vme_end) {
5597                         return TRUE;
5598                 }
5599                 prev = cur->vme_end;
5600                 cur = cur->vme_next;
5601         }
5602         return FALSE;
5603 }
5604
5605 /*
5606  *      vm_map_submap:          [ kernel use only ]
5607  *
5608  *      Mark the given range as handled by a subordinate map.
5609  *
5610  *      This range must have been created with vm_map_find using
5611  *      the vm_submap_object, and no other operations may have been
5612  *      performed on this range prior to calling vm_map_submap.
5613  *
5614  *      Only a limited number of operations can be performed
5615  *      within this rage after calling vm_map_submap:
5616  *              vm_fault
5617  *      [Don't try vm_map_copyin!]
5618  *
5619  *      To remove a submapping, one must first remove the
5620  *      range from the superior map, and then destroy the
5621  *      submap (if desired).  [Better yet, don't try it.]
5622  */
5623 kern_return_t
5624 vm_map_submap(
5625         vm_map_t        map,
5626         vm_map_offset_t start,
5627         vm_map_offset_t end,
5628         vm_map_t        submap,
5629         vm_map_offset_t offset,
5630 #ifdef NO_NESTED_PMAP
5631         __unused
5632 #endif  /* NO_NESTED_PMAP */
5633         boolean_t       use_pmap)
5634 {
5635         vm_map_entry_t          entry;
5636         kern_return_t           result = KERN_INVALID_ARGUMENT;
5637         vm_object_t             object;
5638
5639         vm_map_lock(map);
5640
5641         if (!vm_map_lookup_entry(map, start, &entry)) {
5642                 entry = entry->vme_next;
5643         }
5644
5645         if (entry == vm_map_to_entry(map) ||
5646             entry->is_sub_map) {
5647                 vm_map_unlock(map);
5648                 return KERN_INVALID_ARGUMENT;
5649         }
5650
5651         vm_map_clip_start(map, entry, start);
5652         vm_map_clip_end(map, entry, end);
5653
5654         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5655             (!entry->is_sub_map) &&
5656             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5657             (object->resident_page_count == 0) &&
5658             (object->copy == VM_OBJECT_NULL) &&
5659             (object->shadow == VM_OBJECT_NULL) &&
5660             (!object->pager_created)) {
5661                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5662                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5663                 vm_object_deallocate(object);
5664                 entry->is_sub_map = TRUE;
5665                 entry->use_pmap = FALSE;
5666                 VME_SUBMAP_SET(entry, submap);
5667                 vm_map_reference(submap);
5668                 if (submap->mapped_in_other_pmaps == FALSE &&
5669                     vm_map_pmap(submap) != PMAP_NULL &&
5670                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5671                         /*
5672                          * This submap is being mapped in a map
5673                          * that uses a different pmap.
5674                          * Set its "mapped_in_other_pmaps" flag
5675                          * to indicate that we now need to
5676                          * remove mappings from all pmaps rather
5677                          * than just the submap's pmap.
5678                          */
5679                         submap->mapped_in_other_pmaps = TRUE;
5680                 }
5681
5682 #ifndef NO_NESTED_PMAP
5683                 if (use_pmap) {
5684                         /* nest if platform code will allow */
5685                         if (submap->pmap == NULL) {
5686                                 ledger_t ledger = map->pmap->ledger;
5687                                 submap->pmap = pmap_create_options(ledger,
5688                                     (vm_map_size_t) 0, 0);
5689                                 if (submap->pmap == PMAP_NULL) {
5690                                         vm_map_unlock(map);
5691                                         return KERN_NO_SPACE;
5692                                 }
5693 #if     defined(__arm__) || defined(__arm64__)
5694                                 pmap_set_nested(submap->pmap);
5695 #endif
5696                         }
5697                         result = pmap_nest(map->pmap,
5698                             (VME_SUBMAP(entry))->pmap,
5699                             (addr64_t)start,
5700                             (addr64_t)start,
5701                             (uint64_t)(end - start));
5702                         if (result) {
5703                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5704                         }
5705                         entry->use_pmap = TRUE;
5706                 }
5707 #else   /* NO_NESTED_PMAP */
5708                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5709 #endif  /* NO_NESTED_PMAP */
5710                 result = KERN_SUCCESS;
5711         }
5712         vm_map_unlock(map);
5713
5714         return result;
5715 }
5716
5717 /*
5718  *      vm_map_protect:
5719  *
5720  *      Sets the protection of the specified address
5721  *      region in the target map.  If "set_max" is
5722  *      specified, the maximum protection is to be set;
5723  *      otherwise, only the current protection is affected.
5724  */
5725 kern_return_t
5726 vm_map_protect(
5727         vm_map_t        map,
5728         vm_map_offset_t start,
5729         vm_map_offset_t end,
5730         vm_prot_t       new_prot,
5731         boolean_t       set_max)
5732 {
5733         vm_map_entry_t                  current;
5734         vm_map_offset_t                 prev;
5735         vm_map_entry_t                  entry;
5736         vm_prot_t                       new_max;
5737         int                             pmap_options = 0;
5738         kern_return_t                   kr;
5739
5740         if (new_prot & VM_PROT_COPY) {
5741                 vm_map_offset_t         new_start;
5742                 vm_prot_t               cur_prot, max_prot;
5743                 vm_map_kernel_flags_t   kflags;
5744
5745                 /* LP64todo - see below */
5746                 if (start >= map->max_offset) {
5747                         return KERN_INVALID_ADDRESS;
5748                 }
5749
5750 #if VM_PROTECT_WX_FAIL
5751                 if ((new_prot & VM_PROT_EXECUTE) &&
5752                     map != kernel_map &&
5753                     cs_process_enforcement(NULL)) {
5754                         DTRACE_VM3(cs_wx,
5755                             uint64_t, (uint64_t) start,
5756                             uint64_t, (uint64_t) end,
5757                             vm_prot_t, new_prot);
5758                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5759                             proc_selfpid(),
5760                             (current_task()->bsd_info
5761                             ? proc_name_address(current_task()->bsd_info)
5762                             : "?"),
5763                             __FUNCTION__);
5764                         return KERN_PROTECTION_FAILURE;
5765                 }
5766 #endif /* VM_PROTECT_WX_FAIL */
5767
5768                 /*
5769                  * Let vm_map_remap_extract() know that it will need to:
5770                  * + make a copy of the mapping
5771                  * + add VM_PROT_WRITE to the max protections
5772                  * + remove any protections that are no longer allowed from the
5773                  *   max protections (to avoid any WRITE/EXECUTE conflict, for
5774                  *   example).
5775                  * Note that "max_prot" is an IN/OUT parameter only for this
5776                  * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
5777                  * only.
5778                  */
5779                 max_prot = new_prot & VM_PROT_ALL;
5780                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5781                 kflags.vmkf_remap_prot_copy = TRUE;
5782                 kflags.vmkf_overwrite_immutable = TRUE;
5783                 new_start = start;
5784                 kr = vm_map_remap(map,
5785                     &new_start,
5786                     end - start,
5787                     0,               /* mask */
5788                     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5789                     kflags,
5790                     0,
5791                     map,
5792                     start,
5793                     TRUE,               /* copy-on-write remapping! */
5794                     &cur_prot,
5795                     &max_prot,
5796                     VM_INHERIT_DEFAULT);
5797                 if (kr != KERN_SUCCESS) {
5798                         return kr;
5799                 }
5800                 new_prot &= ~VM_PROT_COPY;
5801         }
5802
5803         vm_map_lock(map);
5804
5805         /* LP64todo - remove this check when vm_map_commpage64()
5806          * no longer has to stuff in a map_entry for the commpage
5807          * above the map's max_offset.
5808          */
5809         if (start >= map->max_offset) {
5810                 vm_map_unlock(map);
5811                 return KERN_INVALID_ADDRESS;
5812         }
5813
5814         while (1) {
5815                 /*
5816                  *      Lookup the entry.  If it doesn't start in a valid
5817                  *      entry, return an error.
5818                  */
5819                 if (!vm_map_lookup_entry(map, start, &entry)) {
5820                         vm_map_unlock(map);
5821                         return KERN_INVALID_ADDRESS;
5822                 }
5823
5824                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5825                         start = SUPERPAGE_ROUND_DOWN(start);
5826                         continue;
5827                 }
5828                 break;
5829         }
5830         if (entry->superpage_size) {
5831                 end = SUPERPAGE_ROUND_UP(end);
5832         }
5833
5834         /*
5835          *      Make a first pass to check for protection and address
5836          *      violations.
5837          */
5838
5839         current = entry;
5840         prev = current->vme_start;
5841         while ((current != vm_map_to_entry(map)) &&
5842             (current->vme_start < end)) {
5843                 /*
5844                  * If there is a hole, return an error.
5845                  */
5846                 if (current->vme_start != prev) {
5847                         vm_map_unlock(map);
5848                         return KERN_INVALID_ADDRESS;
5849                 }
5850
5851                 new_max = current->max_protection;
5852                 if ((new_prot & new_max) != new_prot) {
5853                         vm_map_unlock(map);
5854                         return KERN_PROTECTION_FAILURE;
5855                 }
5856
5857                 if ((new_prot & VM_PROT_WRITE) &&
5858                     (new_prot & VM_PROT_EXECUTE) &&
5859 #if !CONFIG_EMBEDDED
5860                     map != kernel_map &&
5861                     cs_process_enforcement(NULL) &&
5862 #endif /* !CONFIG_EMBEDDED */
5863                     !(current->used_for_jit)) {
5864                         DTRACE_VM3(cs_wx,
5865                             uint64_t, (uint64_t) current->vme_start,
5866                             uint64_t, (uint64_t) current->vme_end,
5867                             vm_prot_t, new_prot);
5868                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5869                             proc_selfpid(),
5870                             (current_task()->bsd_info
5871                             ? proc_name_address(current_task()->bsd_info)
5872                             : "?"),
5873                             __FUNCTION__);
5874                         new_prot &= ~VM_PROT_EXECUTE;
5875 #if VM_PROTECT_WX_FAIL
5876                         vm_map_unlock(map);
5877                         return KERN_PROTECTION_FAILURE;
5878 #endif /* VM_PROTECT_WX_FAIL */
5879                 }
5880
5881                 /*
5882                  * If the task has requested executable lockdown,
5883                  * deny both:
5884                  * - adding executable protections OR
5885                  * - adding write protections to an existing executable mapping.
5886                  */
5887                 if (map->map_disallow_new_exec == TRUE) {
5888                         if ((new_prot & VM_PROT_EXECUTE) ||
5889                             ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5890                                 vm_map_unlock(map);
5891                                 return KERN_PROTECTION_FAILURE;
5892                         }
5893                 }
5894
5895                 prev = current->vme_end;
5896                 current = current->vme_next;
5897         }
5898
5899 #if __arm64__
5900         if (end > prev &&
5901             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5902                 vm_map_entry_t prev_entry;
5903
5904                 prev_entry = current->vme_prev;
5905                 if (prev_entry != vm_map_to_entry(map) &&
5906                     !prev_entry->map_aligned &&
5907                     (vm_map_round_page(prev_entry->vme_end,
5908                     VM_MAP_PAGE_MASK(map))
5909                     == end)) {
5910                         /*
5911                          * The last entry in our range is not "map-aligned"
5912                          * but it would have reached all the way to "end"
5913                          * if it had been map-aligned, so this is not really
5914                          * a hole in the range and we can proceed.
5915                          */
5916                         prev = end;
5917                 }
5918         }
5919 #endif /* __arm64__ */
5920
5921         if (end > prev) {
5922                 vm_map_unlock(map);
5923                 return KERN_INVALID_ADDRESS;
5924         }
5925
5926         /*
5927          *      Go back and fix up protections.
5928          *      Clip to start here if the range starts within
5929          *      the entry.
5930          */
5931
5932         current = entry;
5933         if (current != vm_map_to_entry(map)) {
5934                 /* clip and unnest if necessary */
5935                 vm_map_clip_start(map, current, start);
5936         }
5937
5938         while ((current != vm_map_to_entry(map)) &&
5939             (current->vme_start < end)) {
5940                 vm_prot_t       old_prot;
5941
5942                 vm_map_clip_end(map, current, end);
5943
5944                 if (current->is_sub_map) {
5945                         /* clipping did unnest if needed */
5946                         assert(!current->use_pmap);
5947                 }
5948
5949                 old_prot = current->protection;
5950
5951                 if (set_max) {
5952                         current->max_protection = new_prot;
5953                         current->protection = new_prot & old_prot;
5954                 } else {
5955                         current->protection = new_prot;
5956                 }
5957
5958                 /*
5959                  *      Update physical map if necessary.
5960                  *      If the request is to turn off write protection,
5961                  *      we won't do it for real (in pmap). This is because
5962                  *      it would cause copy-on-write to fail.  We've already
5963                  *      set, the new protection in the map, so if a
5964                  *      write-protect fault occurred, it will be fixed up
5965                  *      properly, COW or not.
5966                  */
5967                 if (current->protection != old_prot) {
5968                         /* Look one level in we support nested pmaps */
5969                         /* from mapped submaps which are direct entries */
5970                         /* in our map */
5971
5972                         vm_prot_t prot;
5973
5974                         prot = current->protection;
5975                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5976                                 prot &= ~VM_PROT_WRITE;
5977                         } else {
5978                                 assert(!VME_OBJECT(current)->code_signed);
5979                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5980                         }
5981
5982                         if (override_nx(map, VME_ALIAS(current)) && prot) {
5983                                 prot |= VM_PROT_EXECUTE;
5984                         }
5985
5986 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5987                         if (!(old_prot & VM_PROT_EXECUTE) &&
5988                             (prot & VM_PROT_EXECUTE) &&
5989                             panic_on_unsigned_execute &&
5990                             (proc_selfcsflags() & CS_KILL)) {
5991                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5992                         }
5993 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5994
5995                         if (pmap_has_prot_policy(prot)) {
5996                                 if (current->wired_count) {
5997                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5998                                             map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5999                                 }
6000
6001                                 /* If the pmap layer cares about this
6002                                  * protection type, force a fault for
6003                                  * each page so that vm_fault will
6004                                  * repopulate the page with the full
6005                                  * set of protections.
6006                                  */
6007                                 /*
6008                                  * TODO: We don't seem to need this,
6009                                  * but this is due to an internal
6010                                  * implementation detail of
6011                                  * pmap_protect.  Do we want to rely
6012                                  * on this?
6013                                  */
6014                                 prot = VM_PROT_NONE;
6015                         }
6016
6017                         if (current->is_sub_map && current->use_pmap) {
6018                                 pmap_protect(VME_SUBMAP(current)->pmap,
6019                                     current->vme_start,
6020                                     current->vme_end,
6021                                     prot);
6022                         } else {
6023                                 if (prot & VM_PROT_WRITE) {
6024                                         if (VME_OBJECT(current) == compressor_object) {
6025                                                 /*
6026                                                  * For write requests on the
6027                                                  * compressor, we wil ask the
6028                                                  * pmap layer to prevent us from
6029                                                  * taking a write fault when we
6030                                                  * attempt to access the mapping
6031                                                  * next.
6032                                                  */
6033                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6034                                         }
6035                                 }
6036
6037                                 pmap_protect_options(map->pmap,
6038                                     current->vme_start,
6039                                     current->vme_end,
6040                                     prot,
6041                                     pmap_options,
6042                                     NULL);
6043                         }
6044                 }
6045                 current = current->vme_next;
6046         }
6047
6048         current = entry;
6049         while ((current != vm_map_to_entry(map)) &&
6050             (current->vme_start <= end)) {
6051                 vm_map_simplify_entry(map, current);
6052                 current = current->vme_next;
6053         }
6054
6055         vm_map_unlock(map);
6056         return KERN_SUCCESS;
6057 }
6058
6059 /*
6060  *      vm_map_inherit:
6061  *
6062  *      Sets the inheritance of the specified address
6063  *      range in the target map.  Inheritance
6064  *      affects how the map will be shared with
6065  *      child maps at the time of vm_map_fork.
6066  */
6067 kern_return_t
6068 vm_map_inherit(
6069         vm_map_t        map,
6070         vm_map_offset_t start,
6071         vm_map_offset_t end,
6072         vm_inherit_t    new_inheritance)
6073 {
6074         vm_map_entry_t  entry;
6075         vm_map_entry_t  temp_entry;
6076
6077         vm_map_lock(map);
6078
6079         VM_MAP_RANGE_CHECK(map, start, end);
6080
6081         if (vm_map_lookup_entry(map, start, &temp_entry)) {
6082                 entry = temp_entry;
6083         } else {
6084                 temp_entry = temp_entry->vme_next;
6085                 entry = temp_entry;
6086         }
6087
6088         /* first check entire range for submaps which can't support the */
6089         /* given inheritance. */
6090         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6091                 if (entry->is_sub_map) {
6092                         if (new_inheritance == VM_INHERIT_COPY) {
6093                                 vm_map_unlock(map);
6094                                 return KERN_INVALID_ARGUMENT;
6095                         }
6096                 }
6097
6098                 entry = entry->vme_next;
6099         }
6100
6101         entry = temp_entry;
6102         if (entry != vm_map_to_entry(map)) {
6103                 /* clip and unnest if necessary */
6104                 vm_map_clip_start(map, entry, start);
6105         }
6106
6107         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6108                 vm_map_clip_end(map, entry, end);
6109                 if (entry->is_sub_map) {
6110                         /* clip did unnest if needed */
6111                         assert(!entry->use_pmap);
6112                 }
6113
6114                 entry->inheritance = new_inheritance;
6115
6116                 entry = entry->vme_next;
6117         }
6118
6119         vm_map_unlock(map);
6120         return KERN_SUCCESS;
6121 }
6122
6123 /*
6124  * Update the accounting for the amount of wired memory in this map.  If the user has
6125  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
6126  */
6127
6128 static kern_return_t
6129 add_wire_counts(
6130         vm_map_t        map,
6131         vm_map_entry_t  entry,
6132         boolean_t       user_wire)
6133 {
6134         vm_map_size_t   size;
6135
6136         if (user_wire) {
6137                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
6138
6139                 /*
6140                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6141                  * this map entry.
6142                  */
6143
6144                 if (entry->user_wired_count == 0) {
6145                         size = entry->vme_end - entry->vme_start;
6146
6147                         /*
6148                          * Since this is the first time the user is wiring this map entry, check to see if we're
6149                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6150                          * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value.  There is also
6151                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6152                          * limit, then we fail.
6153                          */
6154
6155                         if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6156                             size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6157                                 return KERN_RESOURCE_SHORTAGE;
6158                         }
6159
6160                         /*
6161                          * The first time the user wires an entry, we also increment the wired_count and add this to
6162                          * the total that has been wired in the map.
6163                          */
6164
6165                         if (entry->wired_count >= MAX_WIRE_COUNT) {
6166                                 return KERN_FAILURE;
6167                         }
6168
6169                         entry->wired_count++;
6170                         map->user_wire_size += size;
6171                 }
6172
6173                 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6174                         return KERN_FAILURE;
6175                 }
6176
6177                 entry->user_wired_count++;
6178         } else {
6179                 /*
6180                  * The kernel's wiring the memory.  Just bump the count and continue.
6181                  */
6182
6183                 if (entry->wired_count >= MAX_WIRE_COUNT) {
6184                         panic("vm_map_wire: too many wirings");
6185                 }
6186
6187                 entry->wired_count++;
6188         }
6189
6190         return KERN_SUCCESS;
6191 }
6192
6193 /*
6194  * Update the memory wiring accounting now that the given map entry is being unwired.
6195  */
6196
6197 static void
6198 subtract_wire_counts(
6199         vm_map_t        map,
6200         vm_map_entry_t  entry,
6201         boolean_t       user_wire)
6202 {
6203         if (user_wire) {
6204                 /*
6205                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6206                  */
6207
6208                 if (entry->user_wired_count == 1) {
6209                         /*
6210                          * We're removing the last user wire reference.  Decrement the wired_count and the total
6211                          * user wired memory for this map.
6212                          */
6213
6214                         assert(entry->wired_count >= 1);
6215                         entry->wired_count--;
6216                         map->user_wire_size -= entry->vme_end - entry->vme_start;
6217                 }
6218
6219                 assert(entry->user_wired_count >= 1);
6220                 entry->user_wired_count--;
6221         } else {
6222                 /*
6223                  * The kernel is unwiring the memory.   Just update the count.
6224                  */
6225
6226                 assert(entry->wired_count >= 1);
6227                 entry->wired_count--;
6228         }
6229 }
6230
6231 int cs_executable_wire = 0;
6232
6233 /*
6234  *      vm_map_wire:
6235  *
6236  *      Sets the pageability of the specified address range in the
6237  *      target map as wired.  Regions specified as not pageable require
6238  *      locked-down physical memory and physical page maps.  The
6239  *      access_type variable indicates types of accesses that must not
6240  *      generate page faults.  This is checked against protection of
6241  *      memory being locked-down.
6242  *
6243  *      The map must not be locked, but a reference must remain to the
6244  *      map throughout the call.
6245  */
6246 static kern_return_t
6247 vm_map_wire_nested(
6248         vm_map_t                map,
6249         vm_map_offset_t         start,
6250         vm_map_offset_t         end,
6251         vm_prot_t               caller_prot,
6252         vm_tag_t                tag,
6253         boolean_t               user_wire,
6254         pmap_t                  map_pmap,
6255         vm_map_offset_t         pmap_addr,
6256         ppnum_t                 *physpage_p)
6257 {
6258         vm_map_entry_t          entry;
6259         vm_prot_t               access_type;
6260         struct vm_map_entry     *first_entry, tmp_entry;
6261         vm_map_t                real_map;
6262         vm_map_offset_t         s, e;
6263         kern_return_t           rc;
6264         boolean_t               need_wakeup;
6265         boolean_t               main_map = FALSE;
6266         wait_interrupt_t        interruptible_state;
6267         thread_t                cur_thread;
6268         unsigned int            last_timestamp;
6269         vm_map_size_t           size;
6270         boolean_t               wire_and_extract;
6271
6272         access_type = (caller_prot & VM_PROT_ALL);
6273
6274         wire_and_extract = FALSE;
6275         if (physpage_p != NULL) {
6276                 /*
6277                  * The caller wants the physical page number of the
6278                  * wired page.  We return only one physical page number
6279                  * so this works for only one page at a time.
6280                  */
6281                 if ((end - start) != PAGE_SIZE) {
6282                         return KERN_INVALID_ARGUMENT;
6283                 }
6284                 wire_and_extract = TRUE;
6285                 *physpage_p = 0;
6286         }
6287
6288         vm_map_lock(map);
6289         if (map_pmap == NULL) {
6290                 main_map = TRUE;
6291         }
6292         last_timestamp = map->timestamp;
6293
6294         VM_MAP_RANGE_CHECK(map, start, end);
6295         assert(page_aligned(start));
6296         assert(page_aligned(end));
6297         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6298         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6299         if (start == end) {
6300                 /* We wired what the caller asked for, zero pages */
6301                 vm_map_unlock(map);
6302                 return KERN_SUCCESS;
6303         }
6304
6305         need_wakeup = FALSE;
6306         cur_thread = current_thread();
6307
6308         s = start;
6309         rc = KERN_SUCCESS;
6310
6311         if (vm_map_lookup_entry(map, s, &first_entry)) {
6312                 entry = first_entry;
6313                 /*
6314                  * vm_map_clip_start will be done later.
6315                  * We don't want to unnest any nested submaps here !
6316                  */
6317         } else {
6318                 /* Start address is not in map */
6319                 rc = KERN_INVALID_ADDRESS;
6320                 goto done;
6321         }
6322
6323         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6324                 /*
6325                  * At this point, we have wired from "start" to "s".
6326                  * We still need to wire from "s" to "end".
6327                  *
6328                  * "entry" hasn't been clipped, so it could start before "s"
6329                  * and/or end after "end".
6330                  */
6331
6332                 /* "e" is how far we want to wire in this entry */
6333                 e = entry->vme_end;
6334                 if (e > end) {
6335                         e = end;
6336                 }
6337
6338                 /*
6339                  * If another thread is wiring/unwiring this entry then
6340                  * block after informing other thread to wake us up.
6341                  */
6342                 if (entry->in_transition) {
6343                         wait_result_t wait_result;
6344
6345                         /*
6346                          * We have not clipped the entry.  Make sure that
6347                          * the start address is in range so that the lookup
6348                          * below will succeed.
6349                          * "s" is the current starting point: we've already
6350                          * wired from "start" to "s" and we still have
6351                          * to wire from "s" to "end".
6352                          */
6353
6354                         entry->needs_wakeup = TRUE;
6355
6356                         /*
6357                          * wake up anybody waiting on entries that we have
6358                          * already wired.
6359                          */
6360                         if (need_wakeup) {
6361                                 vm_map_entry_wakeup(map);
6362                                 need_wakeup = FALSE;
6363                         }
6364                         /*
6365                          * User wiring is interruptible
6366                          */
6367                         wait_result = vm_map_entry_wait(map,
6368                             (user_wire) ? THREAD_ABORTSAFE :
6369                             THREAD_UNINT);
6370                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
6371                                 /*
6372                                  * undo the wirings we have done so far
6373                                  * We do not clear the needs_wakeup flag,
6374                                  * because we cannot tell if we were the
6375                                  * only one waiting.
6376                                  */
6377                                 rc = KERN_FAILURE;
6378                                 goto done;
6379                         }
6380
6381                         /*
6382                          * Cannot avoid a lookup here. reset timestamp.
6383                          */
6384                         last_timestamp = map->timestamp;
6385
6386                         /*
6387                          * The entry could have been clipped, look it up again.
6388                          * Worse that can happen is, it may not exist anymore.
6389                          */
6390                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6391                                 /*
6392                                  * User: undo everything upto the previous
6393                                  * entry.  let vm_map_unwire worry about
6394                                  * checking the validity of the range.
6395                                  */
6396                                 rc = KERN_FAILURE;
6397                                 goto done;
6398                         }
6399                         entry = first_entry;
6400                         continue;
6401                 }
6402
6403                 if (entry->is_sub_map) {
6404                         vm_map_offset_t sub_start;
6405                         vm_map_offset_t sub_end;
6406                         vm_map_offset_t local_start;
6407                         vm_map_offset_t local_end;
6408                         pmap_t          pmap;
6409
6410                         if (wire_and_extract) {
6411                                 /*
6412                                  * Wiring would result in copy-on-write
6413                                  * which would not be compatible with
6414                                  * the sharing we have with the original
6415                                  * provider of this memory.
6416                                  */
6417                                 rc = KERN_INVALID_ARGUMENT;
6418                                 goto done;
6419                         }
6420
6421                         vm_map_clip_start(map, entry, s);
6422                         vm_map_clip_end(map, entry, end);
6423
6424                         sub_start = VME_OFFSET(entry);
6425                         sub_end = entry->vme_end;
6426                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6427
6428                         local_end = entry->vme_end;
6429                         if (map_pmap == NULL) {
6430                                 vm_object_t             object;
6431                                 vm_object_offset_t      offset;
6432                                 vm_prot_t               prot;
6433                                 boolean_t               wired;
6434                                 vm_map_entry_t          local_entry;
6435                                 vm_map_version_t         version;
6436                                 vm_map_t                lookup_map;
6437
6438                                 if (entry->use_pmap) {
6439                                         pmap = VME_SUBMAP(entry)->pmap;
6440                                         /* ppc implementation requires that */
6441                                         /* submaps pmap address ranges line */
6442                                         /* up with parent map */
6443 #ifdef notdef
6444                                         pmap_addr = sub_start;
6445 #endif
6446                                         pmap_addr = s;
6447                                 } else {
6448                                         pmap = map->pmap;
6449                                         pmap_addr = s;
6450                                 }
6451
6452                                 if (entry->wired_count) {
6453                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6454                                                 goto done;
6455                                         }
6456
6457                                         /*
6458                                          * The map was not unlocked:
6459                                          * no need to goto re-lookup.
6460                                          * Just go directly to next entry.
6461                                          */
6462                                         entry = entry->vme_next;
6463                                         s = entry->vme_start;
6464                                         continue;
6465                                 }
6466
6467                                 /* call vm_map_lookup_locked to */
6468                                 /* cause any needs copy to be   */
6469                                 /* evaluated */
6470                                 local_start = entry->vme_start;
6471                                 lookup_map = map;
6472                                 vm_map_lock_write_to_read(map);
6473                                 if (vm_map_lookup_locked(
6474                                             &lookup_map, local_start,
6475                                             access_type | VM_PROT_COPY,
6476                                             OBJECT_LOCK_EXCLUSIVE,
6477                                             &version, &object,
6478                                             &offset, &prot, &wired,
6479                                             NULL,
6480                                             &real_map)) {
6481                                         vm_map_unlock_read(lookup_map);
6482                                         assert(map_pmap == NULL);
6483                                         vm_map_unwire(map, start,
6484                                             s, user_wire);
6485                                         return KERN_FAILURE;
6486                                 }
6487                                 vm_object_unlock(object);
6488                                 if (real_map != lookup_map) {
6489                                         vm_map_unlock(real_map);
6490                                 }
6491                                 vm_map_unlock_read(lookup_map);
6492                                 vm_map_lock(map);
6493
6494                                 /* we unlocked, so must re-lookup */
6495                                 if (!vm_map_lookup_entry(map,
6496                                     local_start,
6497                                     &local_entry)) {
6498                                         rc = KERN_FAILURE;
6499                                         goto done;
6500                                 }
6501
6502                                 /*
6503                                  * entry could have been "simplified",
6504                                  * so re-clip
6505                                  */
6506                                 entry = local_entry;
6507                                 assert(s == local_start);
6508                                 vm_map_clip_start(map, entry, s);
6509                                 vm_map_clip_end(map, entry, end);
6510                                 /* re-compute "e" */
6511                                 e = entry->vme_end;
6512                                 if (e > end) {
6513                                         e = end;
6514                                 }
6515
6516                                 /* did we have a change of type? */
6517                                 if (!entry->is_sub_map) {
6518                                         last_timestamp = map->timestamp;
6519                                         continue;
6520                                 }
6521                         } else {
6522                                 local_start = entry->vme_start;
6523                                 pmap = map_pmap;
6524                         }
6525
6526                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6527                                 goto done;
6528                         }
6529
6530                         entry->in_transition = TRUE;
6531
6532                         vm_map_unlock(map);
6533                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6534                             sub_start, sub_end,
6535                             caller_prot, tag,
6536                             user_wire, pmap, pmap_addr,
6537                             NULL);
6538                         vm_map_lock(map);
6539
6540                         /*
6541                          * Find the entry again.  It could have been clipped
6542                          * after we unlocked the map.
6543                          */
6544                         if (!vm_map_lookup_entry(map, local_start,
6545                             &first_entry)) {
6546                                 panic("vm_map_wire: re-lookup failed");
6547                         }
6548                         entry = first_entry;
6549
6550                         assert(local_start == s);
6551                         /* re-compute "e" */
6552                         e = entry->vme_end;
6553                         if (e > end) {
6554                                 e = end;
6555                         }
6556
6557                         last_timestamp = map->timestamp;
6558                         while ((entry != vm_map_to_entry(map)) &&
6559                             (entry->vme_start < e)) {
6560                                 assert(entry->in_transition);
6561                                 entry->in_transition = FALSE;
6562                                 if (entry->needs_wakeup) {
6563                                         entry->needs_wakeup = FALSE;
6564                                         need_wakeup = TRUE;
6565                                 }
6566                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6567                                         subtract_wire_counts(map, entry, user_wire);
6568                                 }
6569                                 entry = entry->vme_next;
6570                         }
6571                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6572                                 goto done;
6573                         }
6574
6575                         /* no need to relookup again */
6576                         s = entry->vme_start;
6577                         continue;
6578                 }
6579
6580                 /*
6581                  * If this entry is already wired then increment
6582                  * the appropriate wire reference count.
6583                  */
6584                 if (entry->wired_count) {
6585                         if ((entry->protection & access_type) != access_type) {
6586                                 /* found a protection problem */
6587
6588                                 /*
6589                                  * XXX FBDP
6590                                  * We should always return an error
6591                                  * in this case but since we didn't
6592                                  * enforce it before, let's do
6593                                  * it only for the new "wire_and_extract"
6594                                  * code path for now...
6595                                  */
6596                                 if (wire_and_extract) {
6597                                         rc = KERN_PROTECTION_FAILURE;
6598                                         goto done;
6599                                 }
6600                         }
6601
6602                         /*
6603                          * entry is already wired down, get our reference
6604                          * after clipping to our range.
6605                          */
6606                         vm_map_clip_start(map, entry, s);
6607                         vm_map_clip_end(map, entry, end);
6608
6609                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6610                                 goto done;
6611                         }
6612
6613                         if (wire_and_extract) {
6614                                 vm_object_t             object;
6615                                 vm_object_offset_t      offset;
6616                                 vm_page_t               m;
6617
6618                                 /*
6619                                  * We don't have to "wire" the page again
6620                                  * bit we still have to "extract" its
6621                                  * physical page number, after some sanity
6622                                  * checks.
6623                                  */
6624                                 assert((entry->vme_end - entry->vme_start)
6625                                     == PAGE_SIZE);
6626                                 assert(!entry->needs_copy);
6627                                 assert(!entry->is_sub_map);
6628                                 assert(VME_OBJECT(entry));
6629                                 if (((entry->vme_end - entry->vme_start)
6630                                     != PAGE_SIZE) ||
6631                                     entry->needs_copy ||
6632                                     entry->is_sub_map ||
6633                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6634                                         rc = KERN_INVALID_ARGUMENT;
6635                                         goto done;
6636                                 }
6637
6638                                 object = VME_OBJECT(entry);
6639                                 offset = VME_OFFSET(entry);
6640                                 /* need exclusive lock to update m->dirty */
6641                                 if (entry->protection & VM_PROT_WRITE) {
6642                                         vm_object_lock(object);
6643                                 } else {
6644                                         vm_object_lock_shared(object);
6645                                 }
6646                                 m = vm_page_lookup(object, offset);
6647                                 assert(m != VM_PAGE_NULL);
6648                                 assert(VM_PAGE_WIRED(m));
6649                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6650                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6651                                         if (entry->protection & VM_PROT_WRITE) {
6652                                                 vm_object_lock_assert_exclusive(
6653                                                         object);
6654                                                 m->vmp_dirty = TRUE;
6655                                         }
6656                                 } else {
6657                                         /* not already wired !? */
6658                                         *physpage_p = 0;
6659                                 }
6660                                 vm_object_unlock(object);
6661                         }
6662
6663                         /* map was not unlocked: no need to relookup */
6664                         entry = entry->vme_next;
6665                         s = entry->vme_start;
6666                         continue;
6667                 }
6668
6669                 /*
6670                  * Unwired entry or wire request transmitted via submap
6671                  */
6672
6673                 /*
6674                  * Wiring would copy the pages to the shadow object.
6675                  * The shadow object would not be code-signed so
6676                  * attempting to execute code from these copied pages
6677                  * would trigger a code-signing violation.
6678                  */
6679
6680                 if ((entry->protection & VM_PROT_EXECUTE)
6681 #if !CONFIG_EMBEDDED
6682                     &&
6683                     map != kernel_map &&
6684                     cs_process_enforcement(NULL)
6685 #endif /* !CONFIG_EMBEDDED */
6686                     ) {
6687 #if MACH_ASSERT
6688                         printf("pid %d[%s] wiring executable range from "
6689                             "0x%llx to 0x%llx: rejected to preserve "
6690                             "code-signing\n",
6691                             proc_selfpid(),
6692                             (current_task()->bsd_info
6693                             ? proc_name_address(current_task()->bsd_info)
6694                             : "?"),
6695                             (uint64_t) entry->vme_start,
6696                             (uint64_t) entry->vme_end);
6697 #endif /* MACH_ASSERT */
6698                         DTRACE_VM2(cs_executable_wire,
6699                             uint64_t, (uint64_t)entry->vme_start,
6700                             uint64_t, (uint64_t)entry->vme_end);
6701                         cs_executable_wire++;
6702                         rc = KERN_PROTECTION_FAILURE;
6703                         goto done;
6704                 }
6705
6706                 /*
6707                  * Perform actions of vm_map_lookup that need the write
6708                  * lock on the map: create a shadow object for a
6709                  * copy-on-write region, or an object for a zero-fill
6710                  * region.
6711                  */
6712                 size = entry->vme_end - entry->vme_start;
6713                 /*
6714                  * If wiring a copy-on-write page, we need to copy it now
6715                  * even if we're only (currently) requesting read access.
6716                  * This is aggressive, but once it's wired we can't move it.
6717                  */
6718                 if (entry->needs_copy) {
6719                         if (wire_and_extract) {
6720                                 /*
6721                                  * We're supposed to share with the original
6722                                  * provider so should not be "needs_copy"
6723                                  */
6724                                 rc = KERN_INVALID_ARGUMENT;
6725                                 goto done;
6726                         }
6727
6728                         VME_OBJECT_SHADOW(entry, size);
6729                         entry->needs_copy = FALSE;
6730                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6731                         if (wire_and_extract) {
6732                                 /*
6733                                  * We're supposed to share with the original
6734                                  * provider so should already have an object.
6735                                  */
6736                                 rc = KERN_INVALID_ARGUMENT;
6737                                 goto done;
6738                         }
6739                         VME_OBJECT_SET(entry, vm_object_allocate(size));
6740                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6741                         assert(entry->use_pmap);
6742                 }
6743
6744                 vm_map_clip_start(map, entry, s);
6745                 vm_map_clip_end(map, entry, end);
6746
6747                 /* re-compute "e" */
6748                 e = entry->vme_end;
6749                 if (e > end) {
6750                         e = end;
6751                 }
6752
6753                 /*
6754                  * Check for holes and protection mismatch.
6755                  * Holes: Next entry should be contiguous unless this
6756                  *        is the end of the region.
6757                  * Protection: Access requested must be allowed, unless
6758                  *      wiring is by protection class
6759                  */
6760                 if ((entry->vme_end < end) &&
6761                     ((entry->vme_next == vm_map_to_entry(map)) ||
6762                     (entry->vme_next->vme_start > entry->vme_end))) {
6763                         /* found a hole */
6764                         rc = KERN_INVALID_ADDRESS;
6765                         goto done;
6766                 }
6767                 if ((entry->protection & access_type) != access_type) {
6768                         /* found a protection problem */
6769                         rc = KERN_PROTECTION_FAILURE;
6770                         goto done;
6771                 }
6772
6773                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6774
6775                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6776                         goto done;
6777                 }
6778
6779                 entry->in_transition = TRUE;
6780
6781                 /*
6782                  * This entry might get split once we unlock the map.
6783                  * In vm_fault_wire(), we need the current range as
6784                  * defined by this entry.  In order for this to work
6785                  * along with a simultaneous clip operation, we make a
6786                  * temporary copy of this entry and use that for the
6787                  * wiring.  Note that the underlying objects do not
6788                  * change during a clip.
6789                  */
6790                 tmp_entry = *entry;
6791
6792                 /*
6793                  * The in_transition state guarentees that the entry
6794                  * (or entries for this range, if split occured) will be
6795                  * there when the map lock is acquired for the second time.
6796                  */
6797                 vm_map_unlock(map);
6798
6799                 if (!user_wire && cur_thread != THREAD_NULL) {
6800                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
6801                 } else {
6802                         interruptible_state = THREAD_UNINT;
6803                 }
6804
6805                 if (map_pmap) {
6806                         rc = vm_fault_wire(map,
6807                             &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6808                             physpage_p);
6809                 } else {
6810                         rc = vm_fault_wire(map,
6811                             &tmp_entry, caller_prot, tag, map->pmap,
6812                             tmp_entry.vme_start,
6813                             physpage_p);
6814                 }
6815
6816                 if (!user_wire && cur_thread != THREAD_NULL) {
6817                         thread_interrupt_level(interruptible_state);
6818                 }
6819
6820                 vm_map_lock(map);
6821
6822                 if (last_timestamp + 1 != map->timestamp) {
6823                         /*
6824                          * Find the entry again.  It could have been clipped
6825                          * after we unlocked the map.
6826                          */
6827                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6828                             &first_entry)) {
6829                                 panic("vm_map_wire: re-lookup failed");
6830                         }
6831
6832                         entry = first_entry;
6833                 }
6834
6835                 last_timestamp = map->timestamp;
6836
6837                 while ((entry != vm_map_to_entry(map)) &&
6838                     (entry->vme_start < tmp_entry.vme_end)) {
6839                         assert(entry->in_transition);
6840                         entry->in_transition = FALSE;
6841                         if (entry->needs_wakeup) {
6842                                 entry->needs_wakeup = FALSE;
6843                                 need_wakeup = TRUE;
6844                         }
6845                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6846                                 subtract_wire_counts(map, entry, user_wire);
6847                         }
6848                         entry = entry->vme_next;
6849                 }
6850
6851                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
6852                         goto done;
6853                 }
6854
6855                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6856                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
6857                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6858                         /* found a "new" hole */
6859                         s = tmp_entry.vme_end;
6860                         rc = KERN_INVALID_ADDRESS;
6861                         goto done;
6862                 }
6863
6864                 s = entry->vme_start;
6865         } /* end while loop through map entries */
6866
6867 done:
6868         if (rc == KERN_SUCCESS) {
6869                 /* repair any damage we may have made to the VM map */
6870                 vm_map_simplify_range(map, start, end);
6871         }
6872
6873         vm_map_unlock(map);
6874
6875         /*
6876          * wake up anybody waiting on entries we wired.
6877          */
6878         if (need_wakeup) {
6879                 vm_map_entry_wakeup(map);
6880         }
6881
6882         if (rc != KERN_SUCCESS) {
6883                 /* undo what has been wired so far */
6884                 vm_map_unwire_nested(map, start, s, user_wire,
6885                     map_pmap, pmap_addr);
6886                 if (physpage_p) {
6887                         *physpage_p = 0;
6888                 }
6889         }
6890
6891         return rc;
6892 }
6893
6894 kern_return_t
6895 vm_map_wire_external(
6896         vm_map_t                map,
6897         vm_map_offset_t         start,
6898         vm_map_offset_t         end,
6899         vm_prot_t               caller_prot,
6900         boolean_t               user_wire)
6901 {
6902         kern_return_t   kret;
6903
6904         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6905             user_wire, (pmap_t)NULL, 0, NULL);
6906         return kret;
6907 }
6908
6909 kern_return_t
6910 vm_map_wire_kernel(
6911         vm_map_t                map,
6912         vm_map_offset_t         start,
6913         vm_map_offset_t         end,
6914         vm_prot_t               caller_prot,
6915         vm_tag_t                tag,
6916         boolean_t               user_wire)
6917 {
6918         kern_return_t   kret;
6919
6920         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6921             user_wire, (pmap_t)NULL, 0, NULL);
6922         return kret;
6923 }
6924
6925 kern_return_t
6926 vm_map_wire_and_extract_external(
6927         vm_map_t        map,
6928         vm_map_offset_t start,
6929         vm_prot_t       caller_prot,
6930         boolean_t       user_wire,
6931         ppnum_t         *physpage_p)
6932 {
6933         kern_return_t   kret;
6934
6935         kret = vm_map_wire_nested(map,
6936             start,
6937             start + VM_MAP_PAGE_SIZE(map),
6938             caller_prot,
6939             vm_tag_bt(),
6940             user_wire,
6941             (pmap_t)NULL,
6942             0,
6943             physpage_p);
6944         if (kret != KERN_SUCCESS &&
6945             physpage_p != NULL) {
6946                 *physpage_p = 0;
6947         }
6948         return kret;
6949 }
6950
6951 kern_return_t
6952 vm_map_wire_and_extract_kernel(
6953         vm_map_t        map,
6954         vm_map_offset_t start,
6955         vm_prot_t       caller_prot,
6956         vm_tag_t        tag,
6957         boolean_t       user_wire,
6958         ppnum_t         *physpage_p)
6959 {
6960         kern_return_t   kret;
6961
6962         kret = vm_map_wire_nested(map,
6963             start,
6964             start + VM_MAP_PAGE_SIZE(map),
6965             caller_prot,
6966             tag,
6967             user_wire,
6968             (pmap_t)NULL,
6969             0,
6970             physpage_p);
6971         if (kret != KERN_SUCCESS &&
6972             physpage_p != NULL) {
6973                 *physpage_p = 0;
6974         }
6975         return kret;
6976 }
6977
6978 /*
6979  *      vm_map_unwire:
6980  *
6981  *      Sets the pageability of the specified address range in the target
6982  *      as pageable.  Regions specified must have been wired previously.
6983  *
6984  *      The map must not be locked, but a reference must remain to the map
6985  *      throughout the call.
6986  *
6987  *      Kernel will panic on failures.  User unwire ignores holes and
6988  *      unwired and intransition entries to avoid losing memory by leaving
6989  *      it unwired.
6990  */
6991 static kern_return_t
6992 vm_map_unwire_nested(
6993         vm_map_t                map,
6994         vm_map_offset_t         start,
6995         vm_map_offset_t         end,
6996         boolean_t               user_wire,
6997         pmap_t                  map_pmap,
6998         vm_map_offset_t         pmap_addr)
6999 {
7000         vm_map_entry_t          entry;
7001         struct vm_map_entry     *first_entry, tmp_entry;
7002         boolean_t               need_wakeup;
7003         boolean_t               main_map = FALSE;
7004         unsigned int            last_timestamp;
7005
7006         vm_map_lock(map);
7007         if (map_pmap == NULL) {
7008                 main_map = TRUE;
7009         }
7010         last_timestamp = map->timestamp;
7011
7012         VM_MAP_RANGE_CHECK(map, start, end);
7013         assert(page_aligned(start));
7014         assert(page_aligned(end));
7015         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7016         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7017
7018         if (start == end) {
7019                 /* We unwired what the caller asked for: zero pages */
7020                 vm_map_unlock(map);
7021                 return KERN_SUCCESS;
7022         }
7023
7024         if (vm_map_lookup_entry(map, start, &first_entry)) {
7025                 entry = first_entry;
7026                 /*
7027                  * vm_map_clip_start will be done later.
7028                  * We don't want to unnest any nested sub maps here !
7029                  */
7030         } else {
7031                 if (!user_wire) {
7032                         panic("vm_map_unwire: start not found");
7033                 }
7034                 /*      Start address is not in map. */
7035                 vm_map_unlock(map);
7036                 return KERN_INVALID_ADDRESS;
7037         }
7038
7039         if (entry->superpage_size) {
7040                 /* superpages are always wired */
7041                 vm_map_unlock(map);
7042                 return KERN_INVALID_ADDRESS;
7043         }
7044
7045         need_wakeup = FALSE;
7046         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7047                 if (entry->in_transition) {
7048                         /*
7049                          * 1)
7050                          * Another thread is wiring down this entry. Note
7051                          * that if it is not for the other thread we would
7052                          * be unwiring an unwired entry.  This is not
7053                          * permitted.  If we wait, we will be unwiring memory
7054                          * we did not wire.
7055                          *
7056                          * 2)
7057                          * Another thread is unwiring this entry.  We did not
7058                          * have a reference to it, because if we did, this
7059                          * entry will not be getting unwired now.
7060                          */
7061                         if (!user_wire) {
7062                                 /*
7063                                  * XXX FBDP
7064                                  * This could happen:  there could be some
7065                                  * overlapping vslock/vsunlock operations
7066                                  * going on.
7067                                  * We should probably just wait and retry,
7068                                  * but then we have to be careful that this
7069                                  * entry could get "simplified" after
7070                                  * "in_transition" gets unset and before
7071                                  * we re-lookup the entry, so we would
7072                                  * have to re-clip the entry to avoid
7073                                  * re-unwiring what we have already unwired...
7074                                  * See vm_map_wire_nested().
7075                                  *
7076                                  * Or we could just ignore "in_transition"
7077                                  * here and proceed to decement the wired
7078                                  * count(s) on this entry.  That should be fine
7079                                  * as long as "wired_count" doesn't drop all
7080                                  * the way to 0 (and we should panic if THAT
7081                                  * happens).
7082                                  */
7083                                 panic("vm_map_unwire: in_transition entry");
7084                         }
7085
7086                         entry = entry->vme_next;
7087                         continue;
7088                 }
7089
7090                 if (entry->is_sub_map) {
7091                         vm_map_offset_t sub_start;
7092                         vm_map_offset_t sub_end;
7093                         vm_map_offset_t local_end;
7094                         pmap_t          pmap;
7095
7096                         vm_map_clip_start(map, entry, start);
7097                         vm_map_clip_end(map, entry, end);
7098
7099                         sub_start = VME_OFFSET(entry);
7100                         sub_end = entry->vme_end - entry->vme_start;
7101                         sub_end += VME_OFFSET(entry);
7102                         local_end = entry->vme_end;
7103                         if (map_pmap == NULL) {
7104                                 if (entry->use_pmap) {
7105                                         pmap = VME_SUBMAP(entry)->pmap;
7106                                         pmap_addr = sub_start;
7107                                 } else {
7108                                         pmap = map->pmap;
7109                                         pmap_addr = start;
7110                                 }
7111                                 if (entry->wired_count == 0 ||
7112                                     (user_wire && entry->user_wired_count == 0)) {
7113                                         if (!user_wire) {
7114                                                 panic("vm_map_unwire: entry is unwired");
7115                                         }
7116                                         entry = entry->vme_next;
7117                                         continue;
7118                                 }
7119
7120                                 /*
7121                                  * Check for holes
7122                                  * Holes: Next entry should be contiguous unless
7123                                  * this is the end of the region.
7124                                  */
7125                                 if (((entry->vme_end < end) &&
7126                                     ((entry->vme_next == vm_map_to_entry(map)) ||
7127                                     (entry->vme_next->vme_start
7128                                     > entry->vme_end)))) {
7129                                         if (!user_wire) {
7130                                                 panic("vm_map_unwire: non-contiguous region");
7131                                         }
7132 /*
7133  *                                       entry = entry->vme_next;
7134  *                                       continue;
7135  */
7136                                 }
7137
7138                                 subtract_wire_counts(map, entry, user_wire);
7139
7140                                 if (entry->wired_count != 0) {
7141                                         entry = entry->vme_next;
7142                                         continue;
7143                                 }
7144
7145                                 entry->in_transition = TRUE;
7146                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
7147
7148                                 /*
7149                                  * We can unlock the map now. The in_transition state
7150                                  * guarantees existance of the entry.
7151                                  */
7152                                 vm_map_unlock(map);
7153                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7154                                     sub_start, sub_end, user_wire, pmap, pmap_addr);
7155                                 vm_map_lock(map);
7156
7157                                 if (last_timestamp + 1 != map->timestamp) {
7158                                         /*
7159                                          * Find the entry again.  It could have been
7160                                          * clipped or deleted after we unlocked the map.
7161                                          */
7162                                         if (!vm_map_lookup_entry(map,
7163                                             tmp_entry.vme_start,
7164                                             &first_entry)) {
7165                                                 if (!user_wire) {
7166                                                         panic("vm_map_unwire: re-lookup failed");
7167                                                 }
7168                                                 entry = first_entry->vme_next;
7169                                         } else {
7170                                                 entry = first_entry;
7171                                         }
7172                                 }
7173                                 last_timestamp = map->timestamp;
7174
7175                                 /*
7176                                  * clear transition bit for all constituent entries
7177                                  * that were in the original entry (saved in
7178                                  * tmp_entry).  Also check for waiters.
7179                                  */
7180                                 while ((entry != vm_map_to_entry(map)) &&
7181                                     (entry->vme_start < tmp_entry.vme_end)) {
7182                                         assert(entry->in_transition);
7183                                         entry->in_transition = FALSE;
7184                                         if (entry->needs_wakeup) {
7185                                                 entry->needs_wakeup = FALSE;
7186                                                 need_wakeup = TRUE;
7187                                         }
7188                                         entry = entry->vme_next;
7189                                 }
7190                                 continue;
7191                         } else {
7192                                 vm_map_unlock(map);
7193                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7194                                     sub_start, sub_end, user_wire, map_pmap,
7195                                     pmap_addr);
7196                                 vm_map_lock(map);
7197
7198                                 if (last_timestamp + 1 != map->timestamp) {
7199                                         /*
7200                                          * Find the entry again.  It could have been
7201                                          * clipped or deleted after we unlocked the map.
7202                                          */
7203                                         if (!vm_map_lookup_entry(map,
7204                                             tmp_entry.vme_start,
7205                                             &first_entry)) {
7206                                                 if (!user_wire) {
7207                                                         panic("vm_map_unwire: re-lookup failed");
7208                                                 }
7209                                                 entry = first_entry->vme_next;
7210                                         } else {
7211                                                 entry = first_entry;
7212                                         }
7213                                 }
7214                                 last_timestamp = map->timestamp;
7215                         }
7216                 }
7217
7218
7219                 if ((entry->wired_count == 0) ||
7220                     (user_wire && entry->user_wired_count == 0)) {
7221                         if (!user_wire) {
7222                                 panic("vm_map_unwire: entry is unwired");
7223                         }
7224
7225                         entry = entry->vme_next;
7226                         continue;
7227                 }
7228
7229                 assert(entry->wired_count > 0 &&
7230                     (!user_wire || entry->user_wired_count > 0));
7231
7232                 vm_map_clip_start(map, entry, start);
7233                 vm_map_clip_end(map, entry, end);
7234
7235                 /*
7236                  * Check for holes
7237                  * Holes: Next entry should be contiguous unless
7238                  *        this is the end of the region.
7239                  */
7240                 if (((entry->vme_end < end) &&
7241                     ((entry->vme_next == vm_map_to_entry(map)) ||
7242                     (entry->vme_next->vme_start > entry->vme_end)))) {
7243                         if (!user_wire) {
7244                                 panic("vm_map_unwire: non-contiguous region");
7245                         }
7246                         entry = entry->vme_next;
7247                         continue;
7248                 }
7249
7250                 subtract_wire_counts(map, entry, user_wire);
7251
7252                 if (entry->wired_count != 0) {
7253                         entry = entry->vme_next;
7254                         continue;
7255                 }
7256
7257                 if (entry->zero_wired_pages) {
7258                         entry->zero_wired_pages = FALSE;
7259                 }
7260
7261                 entry->in_transition = TRUE;
7262                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
7263
7264                 /*
7265                  * We can unlock the map now. The in_transition state
7266                  * guarantees existance of the entry.
7267                  */
7268                 vm_map_unlock(map);
7269                 if (map_pmap) {
7270                         vm_fault_unwire(map,
7271                             &tmp_entry, FALSE, map_pmap, pmap_addr);
7272                 } else {
7273                         vm_fault_unwire(map,
7274                             &tmp_entry, FALSE, map->pmap,
7275                             tmp_entry.vme_start);
7276                 }
7277                 vm_map_lock(map);
7278
7279                 if (last_timestamp + 1 != map->timestamp) {
7280                         /*
7281                          * Find the entry again.  It could have been clipped
7282                          * or deleted after we unlocked the map.
7283                          */
7284                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7285                             &first_entry)) {
7286                                 if (!user_wire) {
7287                                         panic("vm_map_unwire: re-lookup failed");
7288                                 }
7289                                 entry = first_entry->vme_next;
7290                         } else {
7291                                 entry = first_entry;
7292                         }
7293                 }
7294                 last_timestamp = map->timestamp;
7295
7296                 /*
7297                  * clear transition bit for all constituent entries that
7298                  * were in the original entry (saved in tmp_entry).  Also
7299                  * check for waiters.
7300                  */
7301                 while ((entry != vm_map_to_entry(map)) &&
7302                     (entry->vme_start < tmp_entry.vme_end)) {
7303                         assert(entry->in_transition);
7304                         entry->in_transition = FALSE;
7305                         if (entry->needs_wakeup) {
7306                                 entry->needs_wakeup = FALSE;
7307                                 need_wakeup = TRUE;
7308                         }
7309                         entry = entry->vme_next;
7310                 }
7311         }
7312
7313         /*
7314          * We might have fragmented the address space when we wired this
7315          * range of addresses.  Attempt to re-coalesce these VM map entries
7316          * with their neighbors now that they're no longer wired.
7317          * Under some circumstances, address space fragmentation can
7318          * prevent VM object shadow chain collapsing, which can cause
7319          * swap space leaks.
7320          */
7321         vm_map_simplify_range(map, start, end);
7322
7323         vm_map_unlock(map);
7324         /*
7325          * wake up anybody waiting on entries that we have unwired.
7326          */
7327         if (need_wakeup) {
7328                 vm_map_entry_wakeup(map);
7329         }
7330         return KERN_SUCCESS;
7331 }
7332
7333 kern_return_t
7334 vm_map_unwire(
7335         vm_map_t                map,
7336         vm_map_offset_t         start,
7337         vm_map_offset_t         end,
7338         boolean_t               user_wire)
7339 {
7340         return vm_map_unwire_nested(map, start, end,
7341                    user_wire, (pmap_t)NULL, 0);
7342 }
7343
7344
7345 /*
7346  *      vm_map_entry_delete:    [ internal use only ]
7347  *
7348  *      Deallocate the given entry from the target map.
7349  */
7350 static void
7351 vm_map_entry_delete(
7352         vm_map_t        map,
7353         vm_map_entry_t  entry)
7354 {
7355         vm_map_offset_t s, e;
7356         vm_object_t     object;
7357         vm_map_t        submap;
7358
7359         s = entry->vme_start;
7360         e = entry->vme_end;
7361         assert(page_aligned(s));
7362         assert(page_aligned(e));
7363         if (entry->map_aligned == TRUE) {
7364                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7365                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7366         }
7367         assert(entry->wired_count == 0);
7368         assert(entry->user_wired_count == 0);
7369         assert(!entry->permanent);
7370
7371         if (entry->is_sub_map) {
7372                 object = NULL;
7373                 submap = VME_SUBMAP(entry);
7374         } else {
7375                 submap = NULL;
7376                 object = VME_OBJECT(entry);
7377         }
7378
7379         vm_map_store_entry_unlink(map, entry);
7380         map->size -= e - s;
7381
7382         vm_map_entry_dispose(map, entry);
7383
7384         vm_map_unlock(map);
7385         /*
7386          *      Deallocate the object only after removing all
7387          *      pmap entries pointing to its pages.
7388          */
7389         if (submap) {
7390                 vm_map_deallocate(submap);
7391         } else {
7392                 vm_object_deallocate(object);
7393         }
7394 }
7395
7396 void
7397 vm_map_submap_pmap_clean(
7398         vm_map_t        map,
7399         vm_map_offset_t start,
7400         vm_map_offset_t end,
7401         vm_map_t        sub_map,
7402         vm_map_offset_t offset)
7403 {
7404         vm_map_offset_t submap_start;
7405         vm_map_offset_t submap_end;
7406         vm_map_size_t   remove_size;
7407         vm_map_entry_t  entry;
7408
7409         submap_end = offset + (end - start);
7410         submap_start = offset;
7411
7412         vm_map_lock_read(sub_map);
7413         if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7414                 remove_size = (entry->vme_end - entry->vme_start);
7415                 if (offset > entry->vme_start) {
7416                         remove_size -= offset - entry->vme_start;
7417                 }
7418
7419
7420                 if (submap_end < entry->vme_end) {
7421                         remove_size -=
7422                             entry->vme_end - submap_end;
7423                 }
7424                 if (entry->is_sub_map) {
7425                         vm_map_submap_pmap_clean(
7426                                 sub_map,
7427                                 start,
7428                                 start + remove_size,
7429                                 VME_SUBMAP(entry),
7430                                 VME_OFFSET(entry));
7431                 } else {
7432                         if (map->mapped_in_other_pmaps &&
7433                             os_ref_get_count(&map->map_refcnt) != 0 &&
7434                             VME_OBJECT(entry) != NULL) {
7435                                 vm_object_pmap_protect_options(
7436                                         VME_OBJECT(entry),
7437                                         (VME_OFFSET(entry) +
7438                                         offset -
7439                                         entry->vme_start),
7440                                         remove_size,
7441                                         PMAP_NULL,
7442                                         entry->vme_start,
7443                                         VM_PROT_NONE,
7444                                         PMAP_OPTIONS_REMOVE);
7445                         } else {
7446                                 pmap_remove(map->pmap,
7447                                     (addr64_t)start,
7448                                     (addr64_t)(start + remove_size));
7449                         }
7450                 }
7451         }
7452
7453         entry = entry->vme_next;
7454
7455         while ((entry != vm_map_to_entry(sub_map))
7456             && (entry->vme_start < submap_end)) {
7457                 remove_size = (entry->vme_end - entry->vme_start);
7458                 if (submap_end < entry->vme_end) {
7459                         remove_size -= entry->vme_end - submap_end;
7460                 }
7461                 if (entry->is_sub_map) {
7462                         vm_map_submap_pmap_clean(
7463                                 sub_map,
7464                                 (start + entry->vme_start) - offset,
7465                                 ((start + entry->vme_start) - offset) + remove_size,
7466                                 VME_SUBMAP(entry),
7467                                 VME_OFFSET(entry));
7468                 } else {
7469                         if (map->mapped_in_other_pmaps &&
7470                             os_ref_get_count(&map->map_refcnt) != 0 &&
7471                             VME_OBJECT(entry) != NULL) {
7472                                 vm_object_pmap_protect_options(
7473                                         VME_OBJECT(entry),
7474                                         VME_OFFSET(entry),
7475                                         remove_size,
7476                                         PMAP_NULL,
7477                                         entry->vme_start,
7478                                         VM_PROT_NONE,
7479                                         PMAP_OPTIONS_REMOVE);
7480                         } else {
7481                                 pmap_remove(map->pmap,
7482                                     (addr64_t)((start + entry->vme_start)
7483                                     - offset),
7484                                     (addr64_t)(((start + entry->vme_start)
7485                                     - offset) + remove_size));
7486                         }
7487                 }
7488                 entry = entry->vme_next;
7489         }
7490         vm_map_unlock_read(sub_map);
7491         return;
7492 }
7493
7494 /*
7495  *     virt_memory_guard_ast:
7496  *
7497  *     Handle the AST callout for a virtual memory guard.
7498  *         raise an EXC_GUARD exception and terminate the task
7499  *     if configured to do so.
7500  */
7501 void
7502 virt_memory_guard_ast(
7503         thread_t thread,
7504         mach_exception_data_type_t code,
7505         mach_exception_data_type_t subcode)
7506 {
7507         task_t task = thread->task;
7508         assert(task != kernel_task);
7509         assert(task == current_task());
7510         uint32_t behavior;
7511
7512         behavior = task->task_exc_guard;
7513
7514         /* Is delivery enabled */
7515         if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7516                 return;
7517         }
7518
7519         /* If only once, make sure we're that once */
7520         while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7521                 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7522
7523                 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7524                         break;
7525                 }
7526                 behavior = task->task_exc_guard;
7527                 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7528                         return;
7529                 }
7530         }
7531
7532         /* Raise exception via corpse fork or synchronously */
7533         if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7534             (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7535                 task_violated_guard(code, subcode, NULL);
7536         } else {
7537                 task_exception_notify(EXC_GUARD, code, subcode);
7538         }
7539
7540         /* Terminate the task if desired */
7541         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7542                 task_bsdtask_kill(current_task());
7543         }
7544 }
7545
7546 /*
7547  *     vm_map_guard_exception:
7548  *
7549  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7550  *
7551  *     Right now, we do this when we find nothing mapped, or a
7552  *     gap in the mapping when a user address space deallocate
7553  *     was requested. We report the address of the first gap found.
7554  */
7555 static void
7556 vm_map_guard_exception(
7557         vm_map_offset_t gap_start,
7558         unsigned reason)
7559 {
7560         mach_exception_code_t code = 0;
7561         unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7562         unsigned int target = 0; /* should we pass in pid associated with map? */
7563         mach_exception_data_type_t subcode = (uint64_t)gap_start;
7564         boolean_t fatal = FALSE;
7565
7566         task_t task = current_task();
7567
7568         /* Can't deliver exceptions to kernel task */
7569         if (task == kernel_task) {
7570                 return;
7571         }
7572
7573         EXC_GUARD_ENCODE_TYPE(code, guard_type);
7574         EXC_GUARD_ENCODE_FLAVOR(code, reason);
7575         EXC_GUARD_ENCODE_TARGET(code, target);
7576
7577         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7578                 fatal = TRUE;
7579         }
7580         thread_guard_violation(current_thread(), code, subcode, fatal);
7581 }
7582
7583 /*
7584  *      vm_map_delete:  [ internal use only ]
7585  *
7586  *      Deallocates the given address range from the target map.
7587  *      Removes all user wirings. Unwires one kernel wiring if
7588  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7589  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7590  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7591  *
7592  *      This routine is called with map locked and leaves map locked.
7593  */
7594 static kern_return_t
7595 vm_map_delete(
7596         vm_map_t                map,
7597         vm_map_offset_t         start,
7598         vm_map_offset_t         end,
7599         int                     flags,
7600         vm_map_t                zap_map)
7601 {
7602         vm_map_entry_t          entry, next;
7603         struct   vm_map_entry   *first_entry, tmp_entry;
7604         vm_map_offset_t         s;
7605         vm_object_t             object;
7606         boolean_t               need_wakeup;
7607         unsigned int            last_timestamp = ~0; /* unlikely value */
7608         int                     interruptible;
7609         vm_map_offset_t         gap_start;
7610         __unused vm_map_offset_t save_start = start;
7611         __unused vm_map_offset_t save_end = end;
7612         const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7613         const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7614
7615         if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
7616                 gap_start = FIND_GAP;
7617         } else {
7618                 gap_start = GAPS_OK;
7619         }
7620
7621         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7622             THREAD_ABORTSAFE : THREAD_UNINT;
7623
7624         /*
7625          * All our DMA I/O operations in IOKit are currently done by
7626          * wiring through the map entries of the task requesting the I/O.
7627          * Because of this, we must always wait for kernel wirings
7628          * to go away on the entries before deleting them.
7629          *
7630          * Any caller who wants to actually remove a kernel wiring
7631          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7632          * properly remove one wiring instead of blasting through
7633          * them all.
7634          */
7635         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7636
7637         while (1) {
7638                 /*
7639                  *      Find the start of the region, and clip it
7640                  */
7641                 if (vm_map_lookup_entry(map, start, &first_entry)) {
7642                         entry = first_entry;
7643                         if (map == kalloc_map &&
7644                             (entry->vme_start != start ||
7645                             entry->vme_end != end)) {
7646                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7647                                     "mismatched entry %p [0x%llx:0x%llx]\n",
7648                                     map,
7649                                     (uint64_t)start,
7650                                     (uint64_t)end,
7651                                     entry,
7652                                     (uint64_t)entry->vme_start,
7653                                     (uint64_t)entry->vme_end);
7654                         }
7655
7656                         /*
7657                          * If in a superpage, extend the range to include the start of the mapping.
7658                          */
7659                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7660                                 start = SUPERPAGE_ROUND_DOWN(start);
7661                                 continue;
7662                         }
7663
7664                         if (start == entry->vme_start) {
7665                                 /*
7666                                  * No need to clip.  We don't want to cause
7667                                  * any unnecessary unnesting in this case...
7668                                  */
7669                         } else {
7670                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7671                                     entry->map_aligned &&
7672                                     !VM_MAP_PAGE_ALIGNED(
7673                                             start,
7674                                             VM_MAP_PAGE_MASK(map))) {
7675                                         /*
7676                                          * The entry will no longer be
7677                                          * map-aligned after clipping
7678                                          * and the caller said it's OK.
7679                                          */
7680                                         entry->map_aligned = FALSE;
7681                                 }
7682                                 if (map == kalloc_map) {
7683                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
7684                                             " clipping %p at 0x%llx\n",
7685                                             map,
7686                                             (uint64_t)start,
7687                                             (uint64_t)end,
7688                                             entry,
7689                                             (uint64_t)start);
7690                                 }
7691                                 vm_map_clip_start(map, entry, start);
7692                         }
7693
7694                         /*
7695                          *      Fix the lookup hint now, rather than each
7696                          *      time through the loop.
7697                          */
7698                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7699                 } else {
7700                         if (map->pmap == kernel_pmap &&
7701                             os_ref_get_count(&map->map_refcnt) != 0) {
7702                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7703                                     "no map entry at 0x%llx\n",
7704                                     map,
7705                                     (uint64_t)start,
7706                                     (uint64_t)end,
7707                                     (uint64_t)start);
7708                         }
7709                         entry = first_entry->vme_next;
7710                         if (gap_start == FIND_GAP) {
7711                                 gap_start = start;
7712                         }
7713                 }
7714                 break;
7715         }
7716         if (entry->superpage_size) {
7717                 end = SUPERPAGE_ROUND_UP(end);
7718         }
7719
7720         need_wakeup = FALSE;
7721         /*
7722          *      Step through all entries in this region
7723          */
7724         s = entry->vme_start;
7725         while ((entry != vm_map_to_entry(map)) && (s < end)) {
7726                 /*
7727                  * At this point, we have deleted all the memory entries
7728                  * between "start" and "s".  We still need to delete
7729                  * all memory entries between "s" and "end".
7730                  * While we were blocked and the map was unlocked, some
7731                  * new memory entries could have been re-allocated between
7732                  * "start" and "s" and we don't want to mess with those.
7733                  * Some of those entries could even have been re-assembled
7734                  * with an entry after "s" (in vm_map_simplify_entry()), so
7735                  * we may have to vm_map_clip_start() again.
7736                  */
7737
7738                 if (entry->vme_start >= s) {
7739                         /*
7740                          * This entry starts on or after "s"
7741                          * so no need to clip its start.
7742                          */
7743                 } else {
7744                         /*
7745                          * This entry has been re-assembled by a
7746                          * vm_map_simplify_entry().  We need to
7747                          * re-clip its start.
7748                          */
7749                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7750                             entry->map_aligned &&
7751                             !VM_MAP_PAGE_ALIGNED(s,
7752                             VM_MAP_PAGE_MASK(map))) {
7753                                 /*
7754                                  * The entry will no longer be map-aligned
7755                                  * after clipping and the caller said it's OK.
7756                                  */
7757                                 entry->map_aligned = FALSE;
7758                         }
7759                         if (map == kalloc_map) {
7760                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7761                                     "clipping %p at 0x%llx\n",
7762                                     map,
7763                                     (uint64_t)start,
7764                                     (uint64_t)end,
7765                                     entry,
7766                                     (uint64_t)s);
7767                         }
7768                         vm_map_clip_start(map, entry, s);
7769                 }
7770                 if (entry->vme_end <= end) {
7771                         /*
7772                          * This entry is going away completely, so no need
7773                          * to clip and possibly cause an unnecessary unnesting.
7774                          */
7775                 } else {
7776                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7777                             entry->map_aligned &&
7778                             !VM_MAP_PAGE_ALIGNED(end,
7779                             VM_MAP_PAGE_MASK(map))) {
7780                                 /*
7781                                  * The entry will no longer be map-aligned
7782                                  * after clipping and the caller said it's OK.
7783                                  */
7784                                 entry->map_aligned = FALSE;
7785                         }
7786                         if (map == kalloc_map) {
7787                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7788                                     "clipping %p at 0x%llx\n",
7789                                     map,
7790                                     (uint64_t)start,
7791                                     (uint64_t)end,
7792                                     entry,
7793                                     (uint64_t)end);
7794                         }
7795                         vm_map_clip_end(map, entry, end);
7796                 }
7797
7798                 if (entry->permanent) {
7799                         if (map->pmap == kernel_pmap) {
7800                                 panic("%s(%p,0x%llx,0x%llx): "
7801                                     "attempt to remove permanent "
7802                                     "VM map entry "
7803                                     "%p [0x%llx:0x%llx]\n",
7804                                     __FUNCTION__,
7805                                     map,
7806                                     (uint64_t) start,
7807                                     (uint64_t) end,
7808                                     entry,
7809                                     (uint64_t) entry->vme_start,
7810                                     (uint64_t) entry->vme_end);
7811                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7812 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7813                                 entry->permanent = FALSE;
7814 #if PMAP_CS
7815                         } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7816                                 entry->permanent = FALSE;
7817
7818                                 printf("%d[%s] %s(0x%llx,0x%llx): "
7819                                     "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7820                                     "prot 0x%x/0x%x\n",
7821                                     proc_selfpid(),
7822                                     (current_task()->bsd_info
7823                                     ? proc_name_address(current_task()->bsd_info)
7824                                     : "?"),
7825                                     __FUNCTION__,
7826                                     (uint64_t) start,
7827                                     (uint64_t) end,
7828                                     (uint64_t)entry->vme_start,
7829                                     (uint64_t)entry->vme_end,
7830                                     entry->protection,
7831                                     entry->max_protection);
7832 #endif
7833                         } else {
7834                                 if (vm_map_executable_immutable_verbose) {
7835                                         printf("%d[%s] %s(0x%llx,0x%llx): "
7836                                             "permanent entry [0x%llx:0x%llx] "
7837                                             "prot 0x%x/0x%x\n",
7838                                             proc_selfpid(),
7839                                             (current_task()->bsd_info
7840                                             ? proc_name_address(current_task()->bsd_info)
7841                                             : "?"),
7842                                             __FUNCTION__,
7843                                             (uint64_t) start,
7844                                             (uint64_t) end,
7845                                             (uint64_t)entry->vme_start,
7846                                             (uint64_t)entry->vme_end,
7847                                             entry->protection,
7848                                             entry->max_protection);
7849                                 }
7850                                 /*
7851                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7852                                  */
7853                                 DTRACE_VM5(vm_map_delete_permanent,
7854                                     vm_map_offset_t, entry->vme_start,
7855                                     vm_map_offset_t, entry->vme_end,
7856                                     vm_prot_t, entry->protection,
7857                                     vm_prot_t, entry->max_protection,
7858                                     int, VME_ALIAS(entry));
7859                         }
7860                 }
7861
7862
7863                 if (entry->in_transition) {
7864                         wait_result_t wait_result;
7865
7866                         /*
7867                          * Another thread is wiring/unwiring this entry.
7868                          * Let the other thread know we are waiting.
7869                          */
7870                         assert(s == entry->vme_start);
7871                         entry->needs_wakeup = TRUE;
7872
7873                         /*
7874                          * wake up anybody waiting on entries that we have
7875                          * already unwired/deleted.
7876                          */
7877                         if (need_wakeup) {
7878                                 vm_map_entry_wakeup(map);
7879                                 need_wakeup = FALSE;
7880                         }
7881
7882                         wait_result = vm_map_entry_wait(map, interruptible);
7883
7884                         if (interruptible &&
7885                             wait_result == THREAD_INTERRUPTED) {
7886                                 /*
7887                                  * We do not clear the needs_wakeup flag,
7888                                  * since we cannot tell if we were the only one.
7889                                  */
7890                                 return KERN_ABORTED;
7891                         }
7892
7893                         /*
7894                          * The entry could have been clipped or it
7895                          * may not exist anymore.  Look it up again.
7896                          */
7897                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
7898                                 /*
7899                                  * User: use the next entry
7900                                  */
7901                                 if (gap_start == FIND_GAP) {
7902                                         gap_start = s;
7903                                 }
7904                                 entry = first_entry->vme_next;
7905                                 s = entry->vme_start;
7906                         } else {
7907                                 entry = first_entry;
7908                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7909                         }
7910                         last_timestamp = map->timestamp;
7911                         continue;
7912                 } /* end in_transition */
7913
7914                 if (entry->wired_count) {
7915                         boolean_t       user_wire;
7916
7917                         user_wire = entry->user_wired_count > 0;
7918
7919                         /*
7920                          *      Remove a kernel wiring if requested
7921                          */
7922                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
7923                                 entry->wired_count--;
7924                         }
7925
7926                         /*
7927                          *      Remove all user wirings for proper accounting
7928                          */
7929                         if (entry->user_wired_count > 0) {
7930                                 while (entry->user_wired_count) {
7931                                         subtract_wire_counts(map, entry, user_wire);
7932                                 }
7933                         }
7934
7935                         if (entry->wired_count != 0) {
7936                                 assert(map != kernel_map);
7937                                 /*
7938                                  * Cannot continue.  Typical case is when
7939                                  * a user thread has physical io pending on
7940                                  * on this page.  Either wait for the
7941                                  * kernel wiring to go away or return an
7942                                  * error.
7943                                  */
7944                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7945                                         wait_result_t wait_result;
7946
7947                                         assert(s == entry->vme_start);
7948                                         entry->needs_wakeup = TRUE;
7949                                         wait_result = vm_map_entry_wait(map,
7950                                             interruptible);
7951
7952                                         if (interruptible &&
7953                                             wait_result == THREAD_INTERRUPTED) {
7954                                                 /*
7955                                                  * We do not clear the
7956                                                  * needs_wakeup flag, since we
7957                                                  * cannot tell if we were the
7958                                                  * only one.
7959                                                  */
7960                                                 return KERN_ABORTED;
7961                                         }
7962
7963                                         /*
7964                                          * The entry could have been clipped or
7965                                          * it may not exist anymore.  Look it
7966                                          * up again.
7967                                          */
7968                                         if (!vm_map_lookup_entry(map, s,
7969                                             &first_entry)) {
7970                                                 assert(map != kernel_map);
7971                                                 /*
7972                                                  * User: use the next entry
7973                                                  */
7974                                                 if (gap_start == FIND_GAP) {
7975                                                         gap_start = s;
7976                                                 }
7977                                                 entry = first_entry->vme_next;
7978                                                 s = entry->vme_start;
7979                                         } else {
7980                                                 entry = first_entry;
7981                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7982                                         }
7983                                         last_timestamp = map->timestamp;
7984                                         continue;
7985                                 } else {
7986                                         return KERN_FAILURE;
7987                                 }
7988                         }
7989
7990                         entry->in_transition = TRUE;
7991                         /*
7992                          * copy current entry.  see comment in vm_map_wire()
7993                          */
7994                         tmp_entry = *entry;
7995                         assert(s == entry->vme_start);
7996
7997                         /*
7998                          * We can unlock the map now. The in_transition
7999                          * state guarentees existance of the entry.
8000                          */
8001                         vm_map_unlock(map);
8002
8003                         if (tmp_entry.is_sub_map) {
8004                                 vm_map_t sub_map;
8005                                 vm_map_offset_t sub_start, sub_end;
8006                                 pmap_t pmap;
8007                                 vm_map_offset_t pmap_addr;
8008
8009
8010                                 sub_map = VME_SUBMAP(&tmp_entry);
8011                                 sub_start = VME_OFFSET(&tmp_entry);
8012                                 sub_end = sub_start + (tmp_entry.vme_end -
8013                                     tmp_entry.vme_start);
8014                                 if (tmp_entry.use_pmap) {
8015                                         pmap = sub_map->pmap;
8016                                         pmap_addr = tmp_entry.vme_start;
8017                                 } else {
8018                                         pmap = map->pmap;
8019                                         pmap_addr = tmp_entry.vme_start;
8020                                 }
8021                                 (void) vm_map_unwire_nested(sub_map,
8022                                     sub_start, sub_end,
8023                                     user_wire,
8024                                     pmap, pmap_addr);
8025                         } else {
8026                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8027                                         pmap_protect_options(
8028                                                 map->pmap,
8029                                                 tmp_entry.vme_start,
8030                                                 tmp_entry.vme_end,
8031                                                 VM_PROT_NONE,
8032                                                 PMAP_OPTIONS_REMOVE,
8033                                                 NULL);
8034                                 }
8035                                 vm_fault_unwire(map, &tmp_entry,
8036                                     VME_OBJECT(&tmp_entry) == kernel_object,
8037                                     map->pmap, tmp_entry.vme_start);
8038                         }
8039
8040                         vm_map_lock(map);
8041
8042                         if (last_timestamp + 1 != map->timestamp) {
8043                                 /*
8044                                  * Find the entry again.  It could have
8045                                  * been clipped after we unlocked the map.
8046                                  */
8047                                 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8048                                         assert((map != kernel_map) &&
8049                                             (!entry->is_sub_map));
8050                                         if (gap_start == FIND_GAP) {
8051                                                 gap_start = s;
8052                                         }
8053                                         first_entry = first_entry->vme_next;
8054                                         s = first_entry->vme_start;
8055                                 } else {
8056                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8057                                 }
8058                         } else {
8059                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8060                                 first_entry = entry;
8061                         }
8062
8063                         last_timestamp = map->timestamp;
8064
8065                         entry = first_entry;
8066                         while ((entry != vm_map_to_entry(map)) &&
8067                             (entry->vme_start < tmp_entry.vme_end)) {
8068                                 assert(entry->in_transition);
8069                                 entry->in_transition = FALSE;
8070                                 if (entry->needs_wakeup) {
8071                                         entry->needs_wakeup = FALSE;
8072                                         need_wakeup = TRUE;
8073                                 }
8074                                 entry = entry->vme_next;
8075                         }
8076                         /*
8077                          * We have unwired the entry(s).  Go back and
8078                          * delete them.
8079                          */
8080                         entry = first_entry;
8081                         continue;
8082                 }
8083
8084                 /* entry is unwired */
8085                 assert(entry->wired_count == 0);
8086                 assert(entry->user_wired_count == 0);
8087
8088                 assert(s == entry->vme_start);
8089
8090                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8091                         /*
8092                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8093                          * vm_map_delete(), some map entries might have been
8094                          * transferred to a "zap_map", which doesn't have a
8095                          * pmap.  The original pmap has already been flushed
8096                          * in the vm_map_delete() call targeting the original
8097                          * map, but when we get to destroying the "zap_map",
8098                          * we don't have any pmap to flush, so let's just skip
8099                          * all this.
8100                          */
8101                 } else if (entry->is_sub_map) {
8102                         if (entry->use_pmap) {
8103 #ifndef NO_NESTED_PMAP
8104                                 int pmap_flags;
8105
8106                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8107                                         /*
8108                                          * This is the final cleanup of the
8109                                          * address space being terminated.
8110                                          * No new mappings are expected and
8111                                          * we don't really need to unnest the
8112                                          * shared region (and lose the "global"
8113                                          * pmap mappings, if applicable).
8114                                          *
8115                                          * Tell the pmap layer that we're
8116                                          * "clean" wrt nesting.
8117                                          */
8118                                         pmap_flags = PMAP_UNNEST_CLEAN;
8119                                 } else {
8120                                         /*
8121                                          * We're unmapping part of the nested
8122                                          * shared region, so we can't keep the
8123                                          * nested pmap.
8124                                          */
8125                                         pmap_flags = 0;
8126                                 }
8127                                 pmap_unnest_options(
8128                                         map->pmap,
8129                                         (addr64_t)entry->vme_start,
8130                                         entry->vme_end - entry->vme_start,
8131                                         pmap_flags);
8132 #endif  /* NO_NESTED_PMAP */
8133                                 if (map->mapped_in_other_pmaps &&
8134                                     os_ref_get_count(&map->map_refcnt) != 0) {
8135                                         /* clean up parent map/maps */
8136                                         vm_map_submap_pmap_clean(
8137                                                 map, entry->vme_start,
8138                                                 entry->vme_end,
8139                                                 VME_SUBMAP(entry),
8140                                                 VME_OFFSET(entry));
8141                                 }
8142                         } else {
8143                                 vm_map_submap_pmap_clean(
8144                                         map, entry->vme_start, entry->vme_end,
8145                                         VME_SUBMAP(entry),
8146                                         VME_OFFSET(entry));
8147                         }
8148                 } else if (VME_OBJECT(entry) != kernel_object &&
8149                     VME_OBJECT(entry) != compressor_object) {
8150                         object = VME_OBJECT(entry);
8151                         if (map->mapped_in_other_pmaps &&
8152                             os_ref_get_count(&map->map_refcnt) != 0) {
8153                                 vm_object_pmap_protect_options(
8154                                         object, VME_OFFSET(entry),
8155                                         entry->vme_end - entry->vme_start,
8156                                         PMAP_NULL,
8157                                         entry->vme_start,
8158                                         VM_PROT_NONE,
8159                                         PMAP_OPTIONS_REMOVE);
8160                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8161                             (map->pmap == kernel_pmap)) {
8162                                 /* Remove translations associated
8163                                  * with this range unless the entry
8164                                  * does not have an object, or
8165                                  * it's the kernel map or a descendant
8166                                  * since the platform could potentially
8167                                  * create "backdoor" mappings invisible
8168                                  * to the VM. It is expected that
8169                                  * objectless, non-kernel ranges
8170                                  * do not have such VM invisible
8171                                  * translations.
8172                                  */
8173                                 pmap_remove_options(map->pmap,
8174                                     (addr64_t)entry->vme_start,
8175                                     (addr64_t)entry->vme_end,
8176                                     PMAP_OPTIONS_REMOVE);
8177                         }
8178                 }
8179
8180                 if (entry->iokit_acct) {
8181                         /* alternate accounting */
8182                         DTRACE_VM4(vm_map_iokit_unmapped_region,
8183                             vm_map_t, map,
8184                             vm_map_offset_t, entry->vme_start,
8185                             vm_map_offset_t, entry->vme_end,
8186                             int, VME_ALIAS(entry));
8187                         vm_map_iokit_unmapped_region(map,
8188                             (entry->vme_end -
8189                             entry->vme_start));
8190                         entry->iokit_acct = FALSE;
8191                         entry->use_pmap = FALSE;
8192                 }
8193
8194                 /*
8195                  * All pmap mappings for this map entry must have been
8196                  * cleared by now.
8197                  */
8198 #if DEBUG
8199                 assert(vm_map_pmap_is_empty(map,
8200                     entry->vme_start,
8201                     entry->vme_end));
8202 #endif /* DEBUG */
8203
8204                 next = entry->vme_next;
8205
8206                 if (map->pmap == kernel_pmap &&
8207                     os_ref_get_count(&map->map_refcnt) != 0 &&
8208                     entry->vme_end < end &&
8209                     (next == vm_map_to_entry(map) ||
8210                     next->vme_start != entry->vme_end)) {
8211                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
8212                             "hole after %p at 0x%llx\n",
8213                             map,
8214                             (uint64_t)start,
8215                             (uint64_t)end,
8216                             entry,
8217                             (uint64_t)entry->vme_end);
8218                 }
8219
8220                 /*
8221                  * If the desired range didn't end with "entry", then there is a gap if
8222                  * we wrapped around to the start of the map or if "entry" and "next"
8223                  * aren't contiguous.
8224                  *
8225                  * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8226                  * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8227                  */
8228                 if (gap_start == FIND_GAP &&
8229                     vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8230                     (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8231                         gap_start = entry->vme_end;
8232                 }
8233                 s = next->vme_start;
8234                 last_timestamp = map->timestamp;
8235
8236                 if (entry->permanent) {
8237                         /*
8238                          * A permanent entry can not be removed, so leave it
8239                          * in place but remove all access permissions.
8240                          */
8241                         entry->protection = VM_PROT_NONE;
8242                         entry->max_protection = VM_PROT_NONE;
8243                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8244                     zap_map != VM_MAP_NULL) {
8245                         vm_map_size_t entry_size;
8246                         /*
8247                          * The caller wants to save the affected VM map entries
8248                          * into the "zap_map".  The caller will take care of
8249                          * these entries.
8250                          */
8251                         /* unlink the entry from "map" ... */
8252                         vm_map_store_entry_unlink(map, entry);
8253                         /* ... and add it to the end of the "zap_map" */
8254                         vm_map_store_entry_link(zap_map,
8255                             vm_map_last_entry(zap_map),
8256                             entry,
8257                             VM_MAP_KERNEL_FLAGS_NONE);
8258                         entry_size = entry->vme_end - entry->vme_start;
8259                         map->size -= entry_size;
8260                         zap_map->size += entry_size;
8261                         /* we didn't unlock the map, so no timestamp increase */
8262                         last_timestamp--;
8263                 } else {
8264                         vm_map_entry_delete(map, entry);
8265                         /* vm_map_entry_delete unlocks the map */
8266                         vm_map_lock(map);
8267                 }
8268
8269                 entry = next;
8270
8271                 if (entry == vm_map_to_entry(map)) {
8272                         break;
8273                 }
8274                 if (last_timestamp + 1 != map->timestamp) {
8275                         /*
8276                          * We are responsible for deleting everything
8277                          * from the given space. If someone has interfered,
8278                          * we pick up where we left off. Back fills should
8279                          * be all right for anyone, except map_delete, and
8280                          * we have to assume that the task has been fully
8281                          * disabled before we get here
8282                          */
8283                         if (!vm_map_lookup_entry(map, s, &entry)) {
8284                                 entry = entry->vme_next;
8285
8286                                 /*
8287                                  * Nothing found for s. If we weren't already done, then there is a gap.
8288                                  */
8289                                 if (gap_start == FIND_GAP && s < end) {
8290                                         gap_start = s;
8291                                 }
8292                                 s = entry->vme_start;
8293                         } else {
8294                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8295                         }
8296                         /*
8297                          * others can not only allocate behind us, we can
8298                          * also see coalesce while we don't have the map lock
8299                          */
8300                         if (entry == vm_map_to_entry(map)) {
8301                                 break;
8302                         }
8303                 }
8304                 last_timestamp = map->timestamp;
8305         }
8306
8307         if (map->wait_for_space) {
8308                 thread_wakeup((event_t) map);
8309         }
8310         /*
8311          * wake up anybody waiting on entries that we have already deleted.
8312          */
8313         if (need_wakeup) {
8314                 vm_map_entry_wakeup(map);
8315         }
8316
8317         if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8318                 DTRACE_VM3(kern_vm_deallocate_gap,
8319                     vm_map_offset_t, gap_start,
8320                     vm_map_offset_t, save_start,
8321                     vm_map_offset_t, save_end);
8322                 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8323                         vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8324                 }
8325         }
8326
8327         return KERN_SUCCESS;
8328 }
8329
8330 /*
8331  *      vm_map_remove:
8332  *
8333  *      Remove the given address range from the target map.
8334  *      This is the exported form of vm_map_delete.
8335  */
8336 kern_return_t
8337 vm_map_remove(
8338         vm_map_t        map,
8339         vm_map_offset_t start,
8340         vm_map_offset_t end,
8341         boolean_t      flags)
8342 {
8343         kern_return_t   result;
8344
8345         vm_map_lock(map);
8346         VM_MAP_RANGE_CHECK(map, start, end);
8347         /*
8348          * For the zone_map, the kernel controls the allocation/freeing of memory.
8349          * Any free to the zone_map should be within the bounds of the map and
8350          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8351          * free to the zone_map into a no-op, there is a problem and we should
8352          * panic.
8353          */
8354         if ((map == zone_map) && (start == end)) {
8355                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8356         }
8357         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8358         vm_map_unlock(map);
8359
8360         return result;
8361 }
8362
8363 /*
8364  *      vm_map_remove_locked:
8365  *
8366  *      Remove the given address range from the target locked map.
8367  *      This is the exported form of vm_map_delete.
8368  */
8369 kern_return_t
8370 vm_map_remove_locked(
8371         vm_map_t        map,
8372         vm_map_offset_t start,
8373         vm_map_offset_t end,
8374         boolean_t       flags)
8375 {
8376         kern_return_t   result;
8377
8378         VM_MAP_RANGE_CHECK(map, start, end);
8379         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8380         return result;
8381 }
8382
8383
8384 /*
8385  *      Routine:        vm_map_copy_allocate
8386  *
8387  *      Description:
8388  *              Allocates and initializes a map copy object.
8389  */
8390 static vm_map_copy_t
8391 vm_map_copy_allocate(void)
8392 {
8393         vm_map_copy_t new_copy;
8394
8395         new_copy = zalloc(vm_map_copy_zone);
8396         bzero(new_copy, sizeof(*new_copy));
8397         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8398         vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8399         vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8400         return new_copy;
8401 }
8402
8403 /*
8404  *      Routine:        vm_map_copy_discard
8405  *
8406  *      Description:
8407  *              Dispose of a map copy object (returned by
8408  *              vm_map_copyin).
8409  */
8410 void
8411 vm_map_copy_discard(
8412         vm_map_copy_t   copy)
8413 {
8414         if (copy == VM_MAP_COPY_NULL) {
8415                 return;
8416         }
8417
8418         switch (copy->type) {
8419         case VM_MAP_COPY_ENTRY_LIST:
8420                 while (vm_map_copy_first_entry(copy) !=
8421                     vm_map_copy_to_entry(copy)) {
8422                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8423
8424                         vm_map_copy_entry_unlink(copy, entry);
8425                         if (entry->is_sub_map) {
8426                                 vm_map_deallocate(VME_SUBMAP(entry));
8427                         } else {
8428                                 vm_object_deallocate(VME_OBJECT(entry));
8429                         }
8430                         vm_map_copy_entry_dispose(copy, entry);
8431                 }
8432                 break;
8433         case VM_MAP_COPY_OBJECT:
8434                 vm_object_deallocate(copy->cpy_object);
8435                 break;
8436         case VM_MAP_COPY_KERNEL_BUFFER:
8437
8438                 /*
8439                  * The vm_map_copy_t and possibly the data buffer were
8440                  * allocated by a single call to kalloc(), i.e. the
8441                  * vm_map_copy_t was not allocated out of the zone.
8442                  */
8443                 if (copy->size > msg_ool_size_small || copy->offset) {
8444                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8445                             (long long)copy->size, (long long)copy->offset);
8446                 }
8447                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8448                 return;
8449         }
8450         zfree(vm_map_copy_zone, copy);
8451 }
8452
8453 /*
8454  *      Routine:        vm_map_copy_copy
8455  *
8456  *      Description:
8457  *                      Move the information in a map copy object to
8458  *                      a new map copy object, leaving the old one
8459  *                      empty.
8460  *
8461  *                      This is used by kernel routines that need
8462  *                      to look at out-of-line data (in copyin form)
8463  *                      before deciding whether to return SUCCESS.
8464  *                      If the routine returns FAILURE, the original
8465  *                      copy object will be deallocated; therefore,
8466  *                      these routines must make a copy of the copy
8467  *                      object and leave the original empty so that
8468  *                      deallocation will not fail.
8469  */
8470 vm_map_copy_t
8471 vm_map_copy_copy(
8472         vm_map_copy_t   copy)
8473 {
8474         vm_map_copy_t   new_copy;
8475
8476         if (copy == VM_MAP_COPY_NULL) {
8477                 return VM_MAP_COPY_NULL;
8478         }
8479
8480         /*
8481          * Allocate a new copy object, and copy the information
8482          * from the old one into it.
8483          */
8484
8485         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8486         *new_copy = *copy;
8487
8488         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8489                 /*
8490                  * The links in the entry chain must be
8491                  * changed to point to the new copy object.
8492                  */
8493                 vm_map_copy_first_entry(copy)->vme_prev
8494                         = vm_map_copy_to_entry(new_copy);
8495                 vm_map_copy_last_entry(copy)->vme_next
8496                         = vm_map_copy_to_entry(new_copy);
8497         }
8498
8499         /*
8500          * Change the old copy object into one that contains
8501          * nothing to be deallocated.
8502          */
8503         copy->type = VM_MAP_COPY_OBJECT;
8504         copy->cpy_object = VM_OBJECT_NULL;
8505
8506         /*
8507          * Return the new object.
8508          */
8509         return new_copy;
8510 }
8511
8512 static kern_return_t
8513 vm_map_overwrite_submap_recurse(
8514         vm_map_t        dst_map,
8515         vm_map_offset_t dst_addr,
8516         vm_map_size_t   dst_size)
8517 {
8518         vm_map_offset_t dst_end;
8519         vm_map_entry_t  tmp_entry;
8520         vm_map_entry_t  entry;
8521         kern_return_t   result;
8522         boolean_t       encountered_sub_map = FALSE;
8523
8524
8525
8526         /*
8527          *      Verify that the destination is all writeable
8528          *      initially.  We have to trunc the destination
8529          *      address and round the copy size or we'll end up
8530          *      splitting entries in strange ways.
8531          */
8532
8533         dst_end = vm_map_round_page(dst_addr + dst_size,
8534             VM_MAP_PAGE_MASK(dst_map));
8535         vm_map_lock(dst_map);
8536
8537 start_pass_1:
8538         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8539                 vm_map_unlock(dst_map);
8540                 return KERN_INVALID_ADDRESS;
8541         }
8542
8543         vm_map_clip_start(dst_map,
8544             tmp_entry,
8545             vm_map_trunc_page(dst_addr,
8546             VM_MAP_PAGE_MASK(dst_map)));
8547         if (tmp_entry->is_sub_map) {
8548                 /* clipping did unnest if needed */
8549                 assert(!tmp_entry->use_pmap);
8550         }
8551
8552         for (entry = tmp_entry;;) {
8553                 vm_map_entry_t  next;
8554
8555                 next = entry->vme_next;
8556                 while (entry->is_sub_map) {
8557                         vm_map_offset_t sub_start;
8558                         vm_map_offset_t sub_end;
8559                         vm_map_offset_t local_end;
8560
8561                         if (entry->in_transition) {
8562                                 /*
8563                                  * Say that we are waiting, and wait for entry.
8564                                  */
8565                                 entry->needs_wakeup = TRUE;
8566                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8567
8568                                 goto start_pass_1;
8569                         }
8570
8571                         encountered_sub_map = TRUE;
8572                         sub_start = VME_OFFSET(entry);
8573
8574                         if (entry->vme_end < dst_end) {
8575                                 sub_end = entry->vme_end;
8576                         } else {
8577                                 sub_end = dst_end;
8578                         }
8579                         sub_end -= entry->vme_start;
8580                         sub_end += VME_OFFSET(entry);
8581                         local_end = entry->vme_end;
8582                         vm_map_unlock(dst_map);
8583
8584                         result = vm_map_overwrite_submap_recurse(
8585                                 VME_SUBMAP(entry),
8586                                 sub_start,
8587                                 sub_end - sub_start);
8588
8589                         if (result != KERN_SUCCESS) {
8590                                 return result;
8591                         }
8592                         if (dst_end <= entry->vme_end) {
8593                                 return KERN_SUCCESS;
8594                         }
8595                         vm_map_lock(dst_map);
8596                         if (!vm_map_lookup_entry(dst_map, local_end,
8597                             &tmp_entry)) {
8598                                 vm_map_unlock(dst_map);
8599                                 return KERN_INVALID_ADDRESS;
8600                         }
8601                         entry = tmp_entry;
8602                         next = entry->vme_next;
8603                 }
8604
8605                 if (!(entry->protection & VM_PROT_WRITE)) {
8606                         vm_map_unlock(dst_map);
8607                         return KERN_PROTECTION_FAILURE;
8608                 }
8609
8610                 /*
8611                  *      If the entry is in transition, we must wait
8612                  *      for it to exit that state.  Anything could happen
8613                  *      when we unlock the map, so start over.
8614                  */
8615                 if (entry->in_transition) {
8616                         /*
8617                          * Say that we are waiting, and wait for entry.
8618                          */
8619                         entry->needs_wakeup = TRUE;
8620                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8621
8622                         goto start_pass_1;
8623                 }
8624
8625 /*
8626  *              our range is contained completely within this map entry
8627  */
8628                 if (dst_end <= entry->vme_end) {
8629                         vm_map_unlock(dst_map);
8630                         return KERN_SUCCESS;
8631                 }
8632 /*
8633  *              check that range specified is contiguous region
8634  */
8635                 if ((next == vm_map_to_entry(dst_map)) ||
8636                     (next->vme_start != entry->vme_end)) {
8637                         vm_map_unlock(dst_map);
8638                         return KERN_INVALID_ADDRESS;
8639                 }
8640
8641                 /*
8642                  *      Check for permanent objects in the destination.
8643                  */
8644                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8645                     ((!VME_OBJECT(entry)->internal) ||
8646                     (VME_OBJECT(entry)->true_share))) {
8647                         if (encountered_sub_map) {
8648                                 vm_map_unlock(dst_map);
8649                                 return KERN_FAILURE;
8650                         }
8651                 }
8652
8653
8654                 entry = next;
8655         }/* for */
8656         vm_map_unlock(dst_map);
8657         return KERN_SUCCESS;
8658 }
8659
8660 /*
8661  *      Routine:        vm_map_copy_overwrite
8662  *
8663  *      Description:
8664  *              Copy the memory described by the map copy
8665  *              object (copy; returned by vm_map_copyin) onto
8666  *              the specified destination region (dst_map, dst_addr).
8667  *              The destination must be writeable.
8668  *
8669  *              Unlike vm_map_copyout, this routine actually
8670  *              writes over previously-mapped memory.  If the
8671  *              previous mapping was to a permanent (user-supplied)
8672  *              memory object, it is preserved.
8673  *
8674  *              The attributes (protection and inheritance) of the
8675  *              destination region are preserved.
8676  *
8677  *              If successful, consumes the copy object.
8678  *              Otherwise, the caller is responsible for it.
8679  *
8680  *      Implementation notes:
8681  *              To overwrite aligned temporary virtual memory, it is
8682  *              sufficient to remove the previous mapping and insert
8683  *              the new copy.  This replacement is done either on
8684  *              the whole region (if no permanent virtual memory
8685  *              objects are embedded in the destination region) or
8686  *              in individual map entries.
8687  *
8688  *              To overwrite permanent virtual memory , it is necessary
8689  *              to copy each page, as the external memory management
8690  *              interface currently does not provide any optimizations.
8691  *
8692  *              Unaligned memory also has to be copied.  It is possible
8693  *              to use 'vm_trickery' to copy the aligned data.  This is
8694  *              not done but not hard to implement.
8695  *
8696  *              Once a page of permanent memory has been overwritten,
8697  *              it is impossible to interrupt this function; otherwise,
8698  *              the call would be neither atomic nor location-independent.
8699  *              The kernel-state portion of a user thread must be
8700  *              interruptible.
8701  *
8702  *              It may be expensive to forward all requests that might
8703  *              overwrite permanent memory (vm_write, vm_copy) to
8704  *              uninterruptible kernel threads.  This routine may be
8705  *              called by interruptible threads; however, success is
8706  *              not guaranteed -- if the request cannot be performed
8707  *              atomically and interruptibly, an error indication is
8708  *              returned.
8709  */
8710
8711 static kern_return_t
8712 vm_map_copy_overwrite_nested(
8713         vm_map_t                dst_map,
8714         vm_map_address_t        dst_addr,
8715         vm_map_copy_t           copy,
8716         boolean_t               interruptible,
8717         pmap_t                  pmap,
8718         boolean_t               discard_on_success)
8719 {
8720         vm_map_offset_t         dst_end;
8721         vm_map_entry_t          tmp_entry;
8722         vm_map_entry_t          entry;
8723         kern_return_t           kr;
8724         boolean_t               aligned = TRUE;
8725         boolean_t               contains_permanent_objects = FALSE;
8726         boolean_t               encountered_sub_map = FALSE;
8727         vm_map_offset_t         base_addr;
8728         vm_map_size_t           copy_size;
8729         vm_map_size_t           total_size;
8730
8731
8732         /*
8733          *      Check for null copy object.
8734          */
8735
8736         if (copy == VM_MAP_COPY_NULL) {
8737                 return KERN_SUCCESS;
8738         }
8739
8740         /*
8741          *      Check for special kernel buffer allocated
8742          *      by new_ipc_kmsg_copyin.
8743          */
8744
8745         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8746                 return vm_map_copyout_kernel_buffer(
8747                         dst_map, &dst_addr,
8748                         copy, copy->size, TRUE, discard_on_success);
8749         }
8750
8751         /*
8752          *      Only works for entry lists at the moment.  Will
8753          *      support page lists later.
8754          */
8755
8756         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8757
8758         if (copy->size == 0) {
8759                 if (discard_on_success) {
8760                         vm_map_copy_discard(copy);
8761                 }
8762                 return KERN_SUCCESS;
8763         }
8764
8765         /*
8766          *      Verify that the destination is all writeable
8767          *      initially.  We have to trunc the destination
8768          *      address and round the copy size or we'll end up
8769          *      splitting entries in strange ways.
8770          */
8771
8772         if (!VM_MAP_PAGE_ALIGNED(copy->size,
8773             VM_MAP_PAGE_MASK(dst_map)) ||
8774             !VM_MAP_PAGE_ALIGNED(copy->offset,
8775             VM_MAP_PAGE_MASK(dst_map)) ||
8776             !VM_MAP_PAGE_ALIGNED(dst_addr,
8777             VM_MAP_PAGE_MASK(dst_map))) {
8778                 aligned = FALSE;
8779                 dst_end = vm_map_round_page(dst_addr + copy->size,
8780                     VM_MAP_PAGE_MASK(dst_map));
8781         } else {
8782                 dst_end = dst_addr + copy->size;
8783         }
8784
8785         vm_map_lock(dst_map);
8786
8787         /* LP64todo - remove this check when vm_map_commpage64()
8788          * no longer has to stuff in a map_entry for the commpage
8789          * above the map's max_offset.
8790          */
8791         if (dst_addr >= dst_map->max_offset) {
8792                 vm_map_unlock(dst_map);
8793                 return KERN_INVALID_ADDRESS;
8794         }
8795
8796 start_pass_1:
8797         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8798                 vm_map_unlock(dst_map);
8799                 return KERN_INVALID_ADDRESS;
8800         }
8801         vm_map_clip_start(dst_map,
8802             tmp_entry,
8803             vm_map_trunc_page(dst_addr,
8804             VM_MAP_PAGE_MASK(dst_map)));
8805         for (entry = tmp_entry;;) {
8806                 vm_map_entry_t  next = entry->vme_next;
8807
8808                 while (entry->is_sub_map) {
8809                         vm_map_offset_t sub_start;
8810                         vm_map_offset_t sub_end;
8811                         vm_map_offset_t local_end;
8812
8813                         if (entry->in_transition) {
8814                                 /*
8815                                  * Say that we are waiting, and wait for entry.
8816                                  */
8817                                 entry->needs_wakeup = TRUE;
8818                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8819
8820                                 goto start_pass_1;
8821                         }
8822
8823                         local_end = entry->vme_end;
8824                         if (!(entry->needs_copy)) {
8825                                 /* if needs_copy we are a COW submap */
8826                                 /* in such a case we just replace so */
8827                                 /* there is no need for the follow-  */
8828                                 /* ing check.                        */
8829                                 encountered_sub_map = TRUE;
8830                                 sub_start = VME_OFFSET(entry);
8831
8832                                 if (entry->vme_end < dst_end) {
8833                                         sub_end = entry->vme_end;
8834                                 } else {
8835                                         sub_end = dst_end;
8836                                 }
8837                                 sub_end -= entry->vme_start;
8838                                 sub_end += VME_OFFSET(entry);
8839                                 vm_map_unlock(dst_map);
8840
8841                                 kr = vm_map_overwrite_submap_recurse(
8842                                         VME_SUBMAP(entry),
8843                                         sub_start,
8844                                         sub_end - sub_start);
8845                                 if (kr != KERN_SUCCESS) {
8846                                         return kr;
8847                                 }
8848                                 vm_map_lock(dst_map);
8849                         }
8850
8851                         if (dst_end <= entry->vme_end) {
8852                                 goto start_overwrite;
8853                         }
8854                         if (!vm_map_lookup_entry(dst_map, local_end,
8855                             &entry)) {
8856                                 vm_map_unlock(dst_map);
8857                                 return KERN_INVALID_ADDRESS;
8858                         }
8859                         next = entry->vme_next;
8860                 }
8861
8862                 if (!(entry->protection & VM_PROT_WRITE)) {
8863                         vm_map_unlock(dst_map);
8864                         return KERN_PROTECTION_FAILURE;
8865                 }
8866
8867                 /*
8868                  *      If the entry is in transition, we must wait
8869                  *      for it to exit that state.  Anything could happen
8870                  *      when we unlock the map, so start over.
8871                  */
8872                 if (entry->in_transition) {
8873                         /*
8874                          * Say that we are waiting, and wait for entry.
8875                          */
8876                         entry->needs_wakeup = TRUE;
8877                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8878
8879                         goto start_pass_1;
8880                 }
8881
8882 /*
8883  *              our range is contained completely within this map entry
8884  */
8885                 if (dst_end <= entry->vme_end) {
8886                         break;
8887                 }
8888 /*
8889  *              check that range specified is contiguous region
8890  */
8891                 if ((next == vm_map_to_entry(dst_map)) ||
8892                     (next->vme_start != entry->vme_end)) {
8893                         vm_map_unlock(dst_map);
8894                         return KERN_INVALID_ADDRESS;
8895                 }
8896
8897
8898                 /*
8899                  *      Check for permanent objects in the destination.
8900                  */
8901                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8902                     ((!VME_OBJECT(entry)->internal) ||
8903                     (VME_OBJECT(entry)->true_share))) {
8904                         contains_permanent_objects = TRUE;
8905                 }
8906
8907                 entry = next;
8908         }/* for */
8909
8910 start_overwrite:
8911         /*
8912          *      If there are permanent objects in the destination, then
8913          *      the copy cannot be interrupted.
8914          */
8915
8916         if (interruptible && contains_permanent_objects) {
8917                 vm_map_unlock(dst_map);
8918                 return KERN_FAILURE;   /* XXX */
8919         }
8920
8921         /*
8922          *
8923          *      Make a second pass, overwriting the data
8924          *      At the beginning of each loop iteration,
8925          *      the next entry to be overwritten is "tmp_entry"
8926          *      (initially, the value returned from the lookup above),
8927          *      and the starting address expected in that entry
8928          *      is "start".
8929          */
8930
8931         total_size = copy->size;
8932         if (encountered_sub_map) {
8933                 copy_size = 0;
8934                 /* re-calculate tmp_entry since we've had the map */
8935                 /* unlocked */
8936                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8937                         vm_map_unlock(dst_map);
8938                         return KERN_INVALID_ADDRESS;
8939                 }
8940         } else {
8941                 copy_size = copy->size;
8942         }
8943
8944         base_addr = dst_addr;
8945         while (TRUE) {
8946                 /* deconstruct the copy object and do in parts */
8947                 /* only in sub_map, interruptable case */
8948                 vm_map_entry_t  copy_entry;
8949                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
8950                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
8951                 int             nentries;
8952                 int             remaining_entries = 0;
8953                 vm_map_offset_t new_offset = 0;
8954
8955                 for (entry = tmp_entry; copy_size == 0;) {
8956                         vm_map_entry_t  next;
8957
8958                         next = entry->vme_next;
8959
8960                         /* tmp_entry and base address are moved along */
8961                         /* each time we encounter a sub-map.  Otherwise */
8962                         /* entry can outpase tmp_entry, and the copy_size */
8963                         /* may reflect the distance between them */
8964                         /* if the current entry is found to be in transition */
8965                         /* we will start over at the beginning or the last */
8966                         /* encounter of a submap as dictated by base_addr */
8967                         /* we will zero copy_size accordingly. */
8968                         if (entry->in_transition) {
8969                                 /*
8970                                  * Say that we are waiting, and wait for entry.
8971                                  */
8972                                 entry->needs_wakeup = TRUE;
8973                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8974
8975                                 if (!vm_map_lookup_entry(dst_map, base_addr,
8976                                     &tmp_entry)) {
8977                                         vm_map_unlock(dst_map);
8978                                         return KERN_INVALID_ADDRESS;
8979                                 }
8980                                 copy_size = 0;
8981                                 entry = tmp_entry;
8982                                 continue;
8983                         }
8984                         if (entry->is_sub_map) {
8985                                 vm_map_offset_t sub_start;
8986                                 vm_map_offset_t sub_end;
8987                                 vm_map_offset_t local_end;
8988
8989                                 if (entry->needs_copy) {
8990                                         /* if this is a COW submap */
8991                                         /* just back the range with a */
8992                                         /* anonymous entry */
8993                                         if (entry->vme_end < dst_end) {
8994                                                 sub_end = entry->vme_end;
8995                                         } else {
8996                                                 sub_end = dst_end;
8997                                         }
8998                                         if (entry->vme_start < base_addr) {
8999                                                 sub_start = base_addr;
9000                                         } else {
9001                                                 sub_start = entry->vme_start;
9002                                         }
9003                                         vm_map_clip_end(
9004                                                 dst_map, entry, sub_end);
9005                                         vm_map_clip_start(
9006                                                 dst_map, entry, sub_start);
9007                                         assert(!entry->use_pmap);
9008                                         assert(!entry->iokit_acct);
9009                                         entry->use_pmap = TRUE;
9010                                         entry->is_sub_map = FALSE;
9011                                         vm_map_deallocate(
9012                                                 VME_SUBMAP(entry));
9013                                         VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9014                                         VME_OFFSET_SET(entry, 0);
9015                                         entry->is_shared = FALSE;
9016                                         entry->needs_copy = FALSE;
9017                                         entry->protection = VM_PROT_DEFAULT;
9018                                         entry->max_protection = VM_PROT_ALL;
9019                                         entry->wired_count = 0;
9020                                         entry->user_wired_count = 0;
9021                                         if (entry->inheritance
9022                                             == VM_INHERIT_SHARE) {
9023                                                 entry->inheritance = VM_INHERIT_COPY;
9024                                         }
9025                                         continue;
9026                                 }
9027                                 /* first take care of any non-sub_map */
9028                                 /* entries to send */
9029                                 if (base_addr < entry->vme_start) {
9030                                         /* stuff to send */
9031                                         copy_size =
9032                                             entry->vme_start - base_addr;
9033                                         break;
9034                                 }
9035                                 sub_start = VME_OFFSET(entry);
9036
9037                                 if (entry->vme_end < dst_end) {
9038                                         sub_end = entry->vme_end;
9039                                 } else {
9040                                         sub_end = dst_end;
9041                                 }
9042                                 sub_end -= entry->vme_start;
9043                                 sub_end += VME_OFFSET(entry);
9044                                 local_end = entry->vme_end;
9045                                 vm_map_unlock(dst_map);
9046                                 copy_size = sub_end - sub_start;
9047
9048                                 /* adjust the copy object */
9049                                 if (total_size > copy_size) {
9050                                         vm_map_size_t   local_size = 0;
9051                                         vm_map_size_t   entry_size;
9052
9053                                         nentries = 1;
9054                                         new_offset = copy->offset;
9055                                         copy_entry = vm_map_copy_first_entry(copy);
9056                                         while (copy_entry !=
9057                                             vm_map_copy_to_entry(copy)) {
9058                                                 entry_size = copy_entry->vme_end -
9059                                                     copy_entry->vme_start;
9060                                                 if ((local_size < copy_size) &&
9061                                                     ((local_size + entry_size)
9062                                                     >= copy_size)) {
9063                                                         vm_map_copy_clip_end(copy,
9064                                                             copy_entry,
9065                                                             copy_entry->vme_start +
9066                                                             (copy_size - local_size));
9067                                                         entry_size = copy_entry->vme_end -
9068                                                             copy_entry->vme_start;
9069                                                         local_size += entry_size;
9070                                                         new_offset += entry_size;
9071                                                 }
9072                                                 if (local_size >= copy_size) {
9073                                                         next_copy = copy_entry->vme_next;
9074                                                         copy_entry->vme_next =
9075                                                             vm_map_copy_to_entry(copy);
9076                                                         previous_prev =
9077                                                             copy->cpy_hdr.links.prev;
9078                                                         copy->cpy_hdr.links.prev = copy_entry;
9079                                                         copy->size = copy_size;
9080                                                         remaining_entries =
9081                                                             copy->cpy_hdr.nentries;
9082                                                         remaining_entries -= nentries;
9083                                                         copy->cpy_hdr.nentries = nentries;
9084                                                         break;
9085                                                 } else {
9086                                                         local_size += entry_size;
9087                                                         new_offset += entry_size;
9088                                                         nentries++;
9089                                                 }
9090                                                 copy_entry = copy_entry->vme_next;
9091                                         }
9092                                 }
9093
9094                                 if ((entry->use_pmap) && (pmap == NULL)) {
9095                                         kr = vm_map_copy_overwrite_nested(
9096                                                 VME_SUBMAP(entry),
9097                                                 sub_start,
9098                                                 copy,
9099                                                 interruptible,
9100                                                 VME_SUBMAP(entry)->pmap,
9101                                                 TRUE);
9102                                 } else if (pmap != NULL) {
9103                                         kr = vm_map_copy_overwrite_nested(
9104                                                 VME_SUBMAP(entry),
9105                                                 sub_start,
9106                                                 copy,
9107                                                 interruptible, pmap,
9108                                                 TRUE);
9109                                 } else {
9110                                         kr = vm_map_copy_overwrite_nested(
9111                                                 VME_SUBMAP(entry),
9112                                                 sub_start,
9113                                                 copy,
9114                                                 interruptible,
9115                                                 dst_map->pmap,
9116                                                 TRUE);
9117                                 }
9118                                 if (kr != KERN_SUCCESS) {
9119                                         if (next_copy != NULL) {
9120                                                 copy->cpy_hdr.nentries +=
9121                                                     remaining_entries;
9122                                                 copy->cpy_hdr.links.prev->vme_next =
9123                                                     next_copy;
9124                                                 copy->cpy_hdr.links.prev
9125                                                         = previous_prev;
9126                                                 copy->size = total_size;
9127                                         }
9128                                         return kr;
9129                                 }
9130                                 if (dst_end <= local_end) {
9131                                         return KERN_SUCCESS;
9132                                 }
9133                                 /* otherwise copy no longer exists, it was */
9134                                 /* destroyed after successful copy_overwrite */
9135                                 copy = vm_map_copy_allocate();
9136                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
9137                                 copy->offset = new_offset;
9138
9139                                 /*
9140                                  * XXX FBDP
9141                                  * this does not seem to deal with
9142                                  * the VM map store (R&B tree)
9143                                  */
9144
9145                                 total_size -= copy_size;
9146                                 copy_size = 0;
9147                                 /* put back remainder of copy in container */
9148                                 if (next_copy != NULL) {
9149                                         copy->cpy_hdr.nentries = remaining_entries;
9150                                         copy->cpy_hdr.links.next = next_copy;
9151                                         copy->cpy_hdr.links.prev = previous_prev;
9152                                         copy->size = total_size;
9153                                         next_copy->vme_prev =
9154                                             vm_map_copy_to_entry(copy);
9155                                         next_copy = NULL;
9156                                 }
9157                                 base_addr = local_end;
9158                                 vm_map_lock(dst_map);
9159                                 if (!vm_map_lookup_entry(dst_map,
9160                                     local_end, &tmp_entry)) {
9161                                         vm_map_unlock(dst_map);
9162                                         return KERN_INVALID_ADDRESS;
9163                                 }
9164                                 entry = tmp_entry;
9165                                 continue;
9166                         }
9167                         if (dst_end <= entry->vme_end) {
9168                                 copy_size = dst_end - base_addr;
9169                                 break;
9170                         }
9171
9172                         if ((next == vm_map_to_entry(dst_map)) ||
9173                             (next->vme_start != entry->vme_end)) {
9174                                 vm_map_unlock(dst_map);
9175                                 return KERN_INVALID_ADDRESS;
9176                         }
9177
9178                         entry = next;
9179                 }/* for */
9180
9181                 next_copy = NULL;
9182                 nentries = 1;
9183
9184                 /* adjust the copy object */
9185                 if (total_size > copy_size) {
9186                         vm_map_size_t   local_size = 0;
9187                         vm_map_size_t   entry_size;
9188
9189                         new_offset = copy->offset;
9190                         copy_entry = vm_map_copy_first_entry(copy);
9191                         while (copy_entry != vm_map_copy_to_entry(copy)) {
9192                                 entry_size = copy_entry->vme_end -
9193                                     copy_entry->vme_start;
9194                                 if ((local_size < copy_size) &&
9195                                     ((local_size + entry_size)
9196                                     >= copy_size)) {
9197                                         vm_map_copy_clip_end(copy, copy_entry,
9198                                             copy_entry->vme_start +
9199                                             (copy_size - local_size));
9200                                         entry_size = copy_entry->vme_end -
9201                                             copy_entry->vme_start;
9202                                         local_size += entry_size;
9203                                         new_offset += entry_size;
9204                                 }
9205                                 if (local_size >= copy_size) {
9206                                         next_copy = copy_entry->vme_next;
9207                                         copy_entry->vme_next =
9208                                             vm_map_copy_to_entry(copy);
9209                                         previous_prev =
9210                                             copy->cpy_hdr.links.prev;
9211                                         copy->cpy_hdr.links.prev = copy_entry;
9212                                         copy->size = copy_size;
9213                                         remaining_entries =
9214                                             copy->cpy_hdr.nentries;
9215                                         remaining_entries -= nentries;
9216                                         copy->cpy_hdr.nentries = nentries;
9217                                         break;
9218                                 } else {
9219                                         local_size += entry_size;
9220                                         new_offset += entry_size;
9221                                         nentries++;
9222                                 }
9223                                 copy_entry = copy_entry->vme_next;
9224                         }
9225                 }
9226
9227                 if (aligned) {
9228                         pmap_t  local_pmap;
9229
9230                         if (pmap) {
9231                                 local_pmap = pmap;
9232                         } else {
9233                                 local_pmap = dst_map->pmap;
9234                         }
9235
9236                         if ((kr =  vm_map_copy_overwrite_aligned(
9237                                     dst_map, tmp_entry, copy,
9238                                     base_addr, local_pmap)) != KERN_SUCCESS) {
9239                                 if (next_copy != NULL) {
9240                                         copy->cpy_hdr.nentries +=
9241                                             remaining_entries;
9242                                         copy->cpy_hdr.links.prev->vme_next =
9243                                             next_copy;
9244                                         copy->cpy_hdr.links.prev =
9245                                             previous_prev;
9246                                         copy->size += copy_size;
9247                                 }
9248                                 return kr;
9249                         }
9250                         vm_map_unlock(dst_map);
9251                 } else {
9252                         /*
9253                          * Performance gain:
9254                          *
9255                          * if the copy and dst address are misaligned but the same
9256                          * offset within the page we can copy_not_aligned the
9257                          * misaligned parts and copy aligned the rest.  If they are
9258                          * aligned but len is unaligned we simply need to copy
9259                          * the end bit unaligned.  We'll need to split the misaligned
9260                          * bits of the region in this case !
9261                          */
9262                         /* ALWAYS UNLOCKS THE dst_map MAP */
9263                         kr = vm_map_copy_overwrite_unaligned(
9264                                 dst_map,
9265                                 tmp_entry,
9266                                 copy,
9267                                 base_addr,
9268                                 discard_on_success);
9269                         if (kr != KERN_SUCCESS) {
9270                                 if (next_copy != NULL) {
9271                                         copy->cpy_hdr.nentries +=
9272                                             remaining_entries;
9273                                         copy->cpy_hdr.links.prev->vme_next =
9274                                             next_copy;
9275                                         copy->cpy_hdr.links.prev =
9276                                             previous_prev;
9277                                         copy->size += copy_size;
9278                                 }
9279                                 return kr;
9280                         }
9281                 }
9282                 total_size -= copy_size;
9283                 if (total_size == 0) {
9284                         break;
9285                 }
9286                 base_addr += copy_size;
9287                 copy_size = 0;
9288                 copy->offset = new_offset;
9289                 if (next_copy != NULL) {
9290                         copy->cpy_hdr.nentries = remaining_entries;
9291                         copy->cpy_hdr.links.next = next_copy;
9292                         copy->cpy_hdr.links.prev = previous_prev;
9293                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
9294                         copy->size = total_size;
9295                 }
9296                 vm_map_lock(dst_map);
9297                 while (TRUE) {
9298                         if (!vm_map_lookup_entry(dst_map,
9299                             base_addr, &tmp_entry)) {
9300                                 vm_map_unlock(dst_map);
9301                                 return KERN_INVALID_ADDRESS;
9302                         }
9303                         if (tmp_entry->in_transition) {
9304                                 entry->needs_wakeup = TRUE;
9305                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9306                         } else {
9307                                 break;
9308                         }
9309                 }
9310                 vm_map_clip_start(dst_map,
9311                     tmp_entry,
9312                     vm_map_trunc_page(base_addr,
9313                     VM_MAP_PAGE_MASK(dst_map)));
9314
9315                 entry = tmp_entry;
9316         } /* while */
9317
9318         /*
9319          *      Throw away the vm_map_copy object
9320          */
9321         if (discard_on_success) {
9322                 vm_map_copy_discard(copy);
9323         }
9324
9325         return KERN_SUCCESS;
9326 }/* vm_map_copy_overwrite */
9327
9328 kern_return_t
9329 vm_map_copy_overwrite(
9330         vm_map_t        dst_map,
9331         vm_map_offset_t dst_addr,
9332         vm_map_copy_t   copy,
9333         boolean_t       interruptible)
9334 {
9335         vm_map_size_t   head_size, tail_size;
9336         vm_map_copy_t   head_copy, tail_copy;
9337         vm_map_offset_t head_addr, tail_addr;
9338         vm_map_entry_t  entry;
9339         kern_return_t   kr;
9340         vm_map_offset_t effective_page_mask, effective_page_size;
9341
9342         head_size = 0;
9343         tail_size = 0;
9344         head_copy = NULL;
9345         tail_copy = NULL;
9346         head_addr = 0;
9347         tail_addr = 0;
9348
9349         if (interruptible ||
9350             copy == VM_MAP_COPY_NULL ||
9351             copy->type != VM_MAP_COPY_ENTRY_LIST) {
9352                 /*
9353                  * We can't split the "copy" map if we're interruptible
9354                  * or if we don't have a "copy" map...
9355                  */
9356 blunt_copy:
9357                 return vm_map_copy_overwrite_nested(dst_map,
9358                            dst_addr,
9359                            copy,
9360                            interruptible,
9361                            (pmap_t) NULL,
9362                            TRUE);
9363         }
9364
9365         effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9366         effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9367             effective_page_mask);
9368         effective_page_size = effective_page_mask + 1;
9369
9370         if (copy->size < 3 * effective_page_size) {
9371                 /*
9372                  * Too small to bother with optimizing...
9373                  */
9374                 goto blunt_copy;
9375         }
9376
9377         if ((dst_addr & effective_page_mask) !=
9378             (copy->offset & effective_page_mask)) {
9379                 /*
9380                  * Incompatible mis-alignment of source and destination...
9381                  */
9382                 goto blunt_copy;
9383         }
9384
9385         /*
9386          * Proper alignment or identical mis-alignment at the beginning.
9387          * Let's try and do a small unaligned copy first (if needed)
9388          * and then an aligned copy for the rest.
9389          */
9390         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9391                 head_addr = dst_addr;
9392                 head_size = (effective_page_size -
9393                     (copy->offset & effective_page_mask));
9394                 head_size = MIN(head_size, copy->size);
9395         }
9396         if (!vm_map_page_aligned(copy->offset + copy->size,
9397             effective_page_mask)) {
9398                 /*
9399                  * Mis-alignment at the end.
9400                  * Do an aligned copy up to the last page and
9401                  * then an unaligned copy for the remaining bytes.
9402                  */
9403                 tail_size = ((copy->offset + copy->size) &
9404                     effective_page_mask);
9405                 tail_size = MIN(tail_size, copy->size);
9406                 tail_addr = dst_addr + copy->size - tail_size;
9407                 assert(tail_addr >= head_addr + head_size);
9408         }
9409         assert(head_size + tail_size <= copy->size);
9410
9411         if (head_size + tail_size == copy->size) {
9412                 /*
9413                  * It's all unaligned, no optimization possible...
9414                  */
9415                 goto blunt_copy;
9416         }
9417
9418         /*
9419          * Can't optimize if there are any submaps in the
9420          * destination due to the way we free the "copy" map
9421          * progressively in vm_map_copy_overwrite_nested()
9422          * in that case.
9423          */
9424         vm_map_lock_read(dst_map);
9425         if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9426                 vm_map_unlock_read(dst_map);
9427                 goto blunt_copy;
9428         }
9429         for (;
9430             (entry != vm_map_copy_to_entry(copy) &&
9431             entry->vme_start < dst_addr + copy->size);
9432             entry = entry->vme_next) {
9433                 if (entry->is_sub_map) {
9434                         vm_map_unlock_read(dst_map);
9435                         goto blunt_copy;
9436                 }
9437         }
9438         vm_map_unlock_read(dst_map);
9439
9440         if (head_size) {
9441                 /*
9442                  * Unaligned copy of the first "head_size" bytes, to reach
9443                  * a page boundary.
9444                  */
9445
9446                 /*
9447                  * Extract "head_copy" out of "copy".
9448                  */
9449                 head_copy = vm_map_copy_allocate();
9450                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9451                 head_copy->cpy_hdr.entries_pageable =
9452                     copy->cpy_hdr.entries_pageable;
9453                 vm_map_store_init(&head_copy->cpy_hdr);
9454
9455                 entry = vm_map_copy_first_entry(copy);
9456                 if (entry->vme_end < copy->offset + head_size) {
9457                         head_size = entry->vme_end - copy->offset;
9458                 }
9459
9460                 head_copy->offset = copy->offset;
9461                 head_copy->size = head_size;
9462                 copy->offset += head_size;
9463                 copy->size -= head_size;
9464
9465                 vm_map_copy_clip_end(copy, entry, copy->offset);
9466                 vm_map_copy_entry_unlink(copy, entry);
9467                 vm_map_copy_entry_link(head_copy,
9468                     vm_map_copy_to_entry(head_copy),
9469                     entry);
9470
9471                 /*
9472                  * Do the unaligned copy.
9473                  */
9474                 kr = vm_map_copy_overwrite_nested(dst_map,
9475                     head_addr,
9476                     head_copy,
9477                     interruptible,
9478                     (pmap_t) NULL,
9479                     FALSE);
9480                 if (kr != KERN_SUCCESS) {
9481                         goto done;
9482                 }
9483         }
9484
9485         if (tail_size) {
9486                 /*
9487                  * Extract "tail_copy" out of "copy".
9488                  */
9489                 tail_copy = vm_map_copy_allocate();
9490                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9491                 tail_copy->cpy_hdr.entries_pageable =
9492                     copy->cpy_hdr.entries_pageable;
9493                 vm_map_store_init(&tail_copy->cpy_hdr);
9494
9495                 tail_copy->offset = copy->offset + copy->size - tail_size;
9496                 tail_copy->size = tail_size;
9497
9498                 copy->size -= tail_size;
9499
9500                 entry = vm_map_copy_last_entry(copy);
9501                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9502                 entry = vm_map_copy_last_entry(copy);
9503                 vm_map_copy_entry_unlink(copy, entry);
9504                 vm_map_copy_entry_link(tail_copy,
9505                     vm_map_copy_last_entry(tail_copy),
9506                     entry);
9507         }
9508
9509         /*
9510          * Copy most (or possibly all) of the data.
9511          */
9512         kr = vm_map_copy_overwrite_nested(dst_map,
9513             dst_addr + head_size,
9514             copy,
9515             interruptible,
9516             (pmap_t) NULL,
9517             FALSE);
9518         if (kr != KERN_SUCCESS) {
9519                 goto done;
9520         }
9521
9522         if (tail_size) {
9523                 kr = vm_map_copy_overwrite_nested(dst_map,
9524                     tail_addr,
9525                     tail_copy,
9526                     interruptible,
9527                     (pmap_t) NULL,
9528                     FALSE);
9529         }
9530
9531 done:
9532         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9533         if (kr == KERN_SUCCESS) {
9534                 /*
9535                  * Discard all the copy maps.
9536                  */
9537                 if (head_copy) {
9538                         vm_map_copy_discard(head_copy);
9539                         head_copy = NULL;
9540                 }
9541                 vm_map_copy_discard(copy);
9542                 if (tail_copy) {
9543                         vm_map_copy_discard(tail_copy);
9544                         tail_copy = NULL;
9545                 }
9546         } else {
9547                 /*
9548                  * Re-assemble the original copy map.
9549                  */
9550                 if (head_copy) {
9551                         entry = vm_map_copy_first_entry(head_copy);
9552                         vm_map_copy_entry_unlink(head_copy, entry);
9553                         vm_map_copy_entry_link(copy,
9554                             vm_map_copy_to_entry(copy),
9555                             entry);
9556                         copy->offset -= head_size;
9557                         copy->size += head_size;
9558                         vm_map_copy_discard(head_copy);
9559                         head_copy = NULL;
9560                 }
9561                 if (tail_copy) {
9562                         entry = vm_map_copy_last_entry(tail_copy);
9563                         vm_map_copy_entry_unlink(tail_copy, entry);
9564                         vm_map_copy_entry_link(copy,
9565                             vm_map_copy_last_entry(copy),
9566                             entry);
9567                         copy->size += tail_size;
9568                         vm_map_copy_discard(tail_copy);
9569                         tail_copy = NULL;
9570                 }
9571         }
9572         return kr;
9573 }
9574
9575
9576 /*
9577  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
9578  *
9579  *      Decription:
9580  *      Physically copy unaligned data
9581  *
9582  *      Implementation:
9583  *      Unaligned parts of pages have to be physically copied.  We use
9584  *      a modified form of vm_fault_copy (which understands none-aligned
9585  *      page offsets and sizes) to do the copy.  We attempt to copy as
9586  *      much memory in one go as possibly, however vm_fault_copy copies
9587  *      within 1 memory object so we have to find the smaller of "amount left"
9588  *      "source object data size" and "target object data size".  With
9589  *      unaligned data we don't need to split regions, therefore the source
9590  *      (copy) object should be one map entry, the target range may be split
9591  *      over multiple map entries however.  In any event we are pessimistic
9592  *      about these assumptions.
9593  *
9594  *      Assumptions:
9595  *      dst_map is locked on entry and is return locked on success,
9596  *      unlocked on error.
9597  */
9598
9599 static kern_return_t
9600 vm_map_copy_overwrite_unaligned(
9601         vm_map_t        dst_map,
9602         vm_map_entry_t  entry,
9603         vm_map_copy_t   copy,
9604         vm_map_offset_t start,
9605         boolean_t       discard_on_success)
9606 {
9607         vm_map_entry_t          copy_entry;
9608         vm_map_entry_t          copy_entry_next;
9609         vm_map_version_t        version;
9610         vm_object_t             dst_object;
9611         vm_object_offset_t      dst_offset;
9612         vm_object_offset_t      src_offset;
9613         vm_object_offset_t      entry_offset;
9614         vm_map_offset_t         entry_end;
9615         vm_map_size_t           src_size,
9616             dst_size,
9617             copy_size,
9618             amount_left;
9619         kern_return_t           kr = KERN_SUCCESS;
9620
9621
9622         copy_entry = vm_map_copy_first_entry(copy);
9623
9624         vm_map_lock_write_to_read(dst_map);
9625
9626         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9627         amount_left = copy->size;
9628 /*
9629  *      unaligned so we never clipped this entry, we need the offset into
9630  *      the vm_object not just the data.
9631  */
9632         while (amount_left > 0) {
9633                 if (entry == vm_map_to_entry(dst_map)) {
9634                         vm_map_unlock_read(dst_map);
9635                         return KERN_INVALID_ADDRESS;
9636                 }
9637
9638                 /* "start" must be within the current map entry */
9639                 assert((start >= entry->vme_start) && (start < entry->vme_end));
9640
9641                 dst_offset = start - entry->vme_start;
9642
9643                 dst_size = entry->vme_end - start;
9644
9645                 src_size = copy_entry->vme_end -
9646                     (copy_entry->vme_start + src_offset);
9647
9648                 if (dst_size < src_size) {
9649 /*
9650  *                      we can only copy dst_size bytes before
9651  *                      we have to get the next destination entry
9652  */
9653                         copy_size = dst_size;
9654                 } else {
9655 /*
9656  *                      we can only copy src_size bytes before
9657  *                      we have to get the next source copy entry
9658  */
9659                         copy_size = src_size;
9660                 }
9661
9662                 if (copy_size > amount_left) {
9663                         copy_size = amount_left;
9664                 }
9665 /*
9666  *              Entry needs copy, create a shadow shadow object for
9667  *              Copy on write region.
9668  */
9669                 if (entry->needs_copy &&
9670                     ((entry->protection & VM_PROT_WRITE) != 0)) {
9671                         if (vm_map_lock_read_to_write(dst_map)) {
9672                                 vm_map_lock_read(dst_map);
9673                                 goto RetryLookup;
9674                         }
9675                         VME_OBJECT_SHADOW(entry,
9676                             (vm_map_size_t)(entry->vme_end
9677                             - entry->vme_start));
9678                         entry->needs_copy = FALSE;
9679                         vm_map_lock_write_to_read(dst_map);
9680                 }
9681                 dst_object = VME_OBJECT(entry);
9682 /*
9683  *              unlike with the virtual (aligned) copy we're going
9684  *              to fault on it therefore we need a target object.
9685  */
9686                 if (dst_object == VM_OBJECT_NULL) {
9687                         if (vm_map_lock_read_to_write(dst_map)) {
9688                                 vm_map_lock_read(dst_map);
9689                                 goto RetryLookup;
9690                         }
9691                         dst_object = vm_object_allocate((vm_map_size_t)
9692                             entry->vme_end - entry->vme_start);
9693                         VME_OBJECT_SET(entry, dst_object);
9694                         VME_OFFSET_SET(entry, 0);
9695                         assert(entry->use_pmap);
9696                         vm_map_lock_write_to_read(dst_map);
9697                 }
9698 /*
9699  *              Take an object reference and unlock map. The "entry" may
9700  *              disappear or change when the map is unlocked.
9701  */
9702                 vm_object_reference(dst_object);
9703                 version.main_timestamp = dst_map->timestamp;
9704                 entry_offset = VME_OFFSET(entry);
9705                 entry_end = entry->vme_end;
9706                 vm_map_unlock_read(dst_map);
9707 /*
9708  *              Copy as much as possible in one pass
9709  */
9710                 kr = vm_fault_copy(
9711                         VME_OBJECT(copy_entry),
9712                         VME_OFFSET(copy_entry) + src_offset,
9713                         &copy_size,
9714                         dst_object,
9715                         entry_offset + dst_offset,
9716                         dst_map,
9717                         &version,
9718                         THREAD_UNINT );
9719
9720                 start += copy_size;
9721                 src_offset += copy_size;
9722                 amount_left -= copy_size;
9723 /*
9724  *              Release the object reference
9725  */
9726                 vm_object_deallocate(dst_object);
9727 /*
9728  *              If a hard error occurred, return it now
9729  */
9730                 if (kr != KERN_SUCCESS) {
9731                         return kr;
9732                 }
9733
9734                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9735                     || amount_left == 0) {
9736 /*
9737  *                      all done with this copy entry, dispose.
9738  */
9739                         copy_entry_next = copy_entry->vme_next;
9740
9741                         if (discard_on_success) {
9742                                 vm_map_copy_entry_unlink(copy, copy_entry);
9743                                 assert(!copy_entry->is_sub_map);
9744                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9745                                 vm_map_copy_entry_dispose(copy, copy_entry);
9746                         }
9747
9748                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9749                             amount_left) {
9750 /*
9751  *                              not finished copying but run out of source
9752  */
9753                                 return KERN_INVALID_ADDRESS;
9754                         }
9755
9756                         copy_entry = copy_entry_next;
9757
9758                         src_offset = 0;
9759                 }
9760
9761                 if (amount_left == 0) {
9762                         return KERN_SUCCESS;
9763                 }
9764
9765                 vm_map_lock_read(dst_map);
9766                 if (version.main_timestamp == dst_map->timestamp) {
9767                         if (start == entry_end) {
9768 /*
9769  *                              destination region is split.  Use the version
9770  *                              information to avoid a lookup in the normal
9771  *                              case.
9772  */
9773                                 entry = entry->vme_next;
9774 /*
9775  *                              should be contiguous. Fail if we encounter
9776  *                              a hole in the destination.
9777  */
9778                                 if (start != entry->vme_start) {
9779                                         vm_map_unlock_read(dst_map);
9780                                         return KERN_INVALID_ADDRESS;
9781                                 }
9782                         }
9783                 } else {
9784 /*
9785  *                      Map version check failed.
9786  *                      we must lookup the entry because somebody
9787  *                      might have changed the map behind our backs.
9788  */
9789 RetryLookup:
9790                         if (!vm_map_lookup_entry(dst_map, start, &entry)) {
9791                                 vm_map_unlock_read(dst_map);
9792                                 return KERN_INVALID_ADDRESS;
9793                         }
9794                 }
9795         }/* while */
9796
9797         return KERN_SUCCESS;
9798 }/* vm_map_copy_overwrite_unaligned */
9799
9800 /*
9801  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
9802  *
9803  *      Description:
9804  *      Does all the vm_trickery possible for whole pages.
9805  *
9806  *      Implementation:
9807  *
9808  *      If there are no permanent objects in the destination,
9809  *      and the source and destination map entry zones match,
9810  *      and the destination map entry is not shared,
9811  *      then the map entries can be deleted and replaced
9812  *      with those from the copy.  The following code is the
9813  *      basic idea of what to do, but there are lots of annoying
9814  *      little details about getting protection and inheritance
9815  *      right.  Should add protection, inheritance, and sharing checks
9816  *      to the above pass and make sure that no wiring is involved.
9817  */
9818
9819 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9820 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9821 int vm_map_copy_overwrite_aligned_src_large = 0;
9822
9823 static kern_return_t
9824 vm_map_copy_overwrite_aligned(
9825         vm_map_t        dst_map,
9826         vm_map_entry_t  tmp_entry,
9827         vm_map_copy_t   copy,
9828         vm_map_offset_t start,
9829         __unused pmap_t pmap)
9830 {
9831         vm_object_t     object;
9832         vm_map_entry_t  copy_entry;
9833         vm_map_size_t   copy_size;
9834         vm_map_size_t   size;
9835         vm_map_entry_t  entry;
9836
9837         while ((copy_entry = vm_map_copy_first_entry(copy))
9838             != vm_map_copy_to_entry(copy)) {
9839                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9840
9841                 entry = tmp_entry;
9842                 if (entry->is_sub_map) {
9843                         /* unnested when clipped earlier */
9844                         assert(!entry->use_pmap);
9845                 }
9846                 if (entry == vm_map_to_entry(dst_map)) {
9847                         vm_map_unlock(dst_map);
9848                         return KERN_INVALID_ADDRESS;
9849                 }
9850                 size = (entry->vme_end - entry->vme_start);
9851                 /*
9852                  *      Make sure that no holes popped up in the
9853                  *      address map, and that the protection is
9854                  *      still valid, in case the map was unlocked
9855                  *      earlier.
9856                  */
9857
9858                 if ((entry->vme_start != start) || ((entry->is_sub_map)
9859                     && !entry->needs_copy)) {
9860                         vm_map_unlock(dst_map);
9861                         return KERN_INVALID_ADDRESS;
9862                 }
9863                 assert(entry != vm_map_to_entry(dst_map));
9864
9865                 /*
9866                  *      Check protection again
9867                  */
9868
9869                 if (!(entry->protection & VM_PROT_WRITE)) {
9870                         vm_map_unlock(dst_map);
9871                         return KERN_PROTECTION_FAILURE;
9872                 }
9873
9874                 /*
9875                  *      Adjust to source size first
9876                  */
9877
9878                 if (copy_size < size) {
9879                         if (entry->map_aligned &&
9880                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9881                             VM_MAP_PAGE_MASK(dst_map))) {
9882                                 /* no longer map-aligned */
9883                                 entry->map_aligned = FALSE;
9884                         }
9885                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9886                         size = copy_size;
9887                 }
9888
9889                 /*
9890                  *      Adjust to destination size
9891                  */
9892
9893                 if (size < copy_size) {
9894                         vm_map_copy_clip_end(copy, copy_entry,
9895                             copy_entry->vme_start + size);
9896                         copy_size = size;
9897                 }
9898
9899                 assert((entry->vme_end - entry->vme_start) == size);
9900                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9901                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9902
9903                 /*
9904                  *      If the destination contains temporary unshared memory,
9905                  *      we can perform the copy by throwing it away and
9906                  *      installing the source data.
9907                  */
9908
9909                 object = VME_OBJECT(entry);
9910                 if ((!entry->is_shared &&
9911                     ((object == VM_OBJECT_NULL) ||
9912                     (object->internal && !object->true_share))) ||
9913                     entry->needs_copy) {
9914                         vm_object_t     old_object = VME_OBJECT(entry);
9915                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
9916                         vm_object_offset_t      offset;
9917
9918                         /*
9919                          * Ensure that the source and destination aren't
9920                          * identical
9921                          */
9922                         if (old_object == VME_OBJECT(copy_entry) &&
9923                             old_offset == VME_OFFSET(copy_entry)) {
9924                                 vm_map_copy_entry_unlink(copy, copy_entry);
9925                                 vm_map_copy_entry_dispose(copy, copy_entry);
9926
9927                                 if (old_object != VM_OBJECT_NULL) {
9928                                         vm_object_deallocate(old_object);
9929                                 }
9930
9931                                 start = tmp_entry->vme_end;
9932                                 tmp_entry = tmp_entry->vme_next;
9933                                 continue;
9934                         }
9935
9936 #if !CONFIG_EMBEDDED
9937 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9938 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
9939                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9940                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9941                             copy_size <= __TRADEOFF1_COPY_SIZE) {
9942                                 /*
9943                                  * Virtual vs. Physical copy tradeoff #1.
9944                                  *
9945                                  * Copying only a few pages out of a large
9946                                  * object:  do a physical copy instead of
9947                                  * a virtual copy, to avoid possibly keeping
9948                                  * the entire large object alive because of
9949                                  * those few copy-on-write pages.
9950                                  */
9951                                 vm_map_copy_overwrite_aligned_src_large++;
9952                                 goto slow_copy;
9953                         }
9954 #endif /* !CONFIG_EMBEDDED */
9955
9956                         if ((dst_map->pmap != kernel_pmap) &&
9957                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9958                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
9959                                 vm_object_t new_object, new_shadow;
9960
9961                                 /*
9962                                  * We're about to map something over a mapping
9963                                  * established by malloc()...
9964                                  */
9965                                 new_object = VME_OBJECT(copy_entry);
9966                                 if (new_object != VM_OBJECT_NULL) {
9967                                         vm_object_lock_shared(new_object);
9968                                 }
9969                                 while (new_object != VM_OBJECT_NULL &&
9970 #if !CONFIG_EMBEDDED
9971                                     !new_object->true_share &&
9972                                     new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9973 #endif /* !CONFIG_EMBEDDED */
9974                                     new_object->internal) {
9975                                         new_shadow = new_object->shadow;
9976                                         if (new_shadow == VM_OBJECT_NULL) {
9977                                                 break;
9978                                         }
9979                                         vm_object_lock_shared(new_shadow);
9980                                         vm_object_unlock(new_object);
9981                                         new_object = new_shadow;
9982                                 }
9983                                 if (new_object != VM_OBJECT_NULL) {
9984                                         if (!new_object->internal) {
9985                                                 /*
9986                                                  * The new mapping is backed
9987                                                  * by an external object.  We
9988                                                  * don't want malloc'ed memory
9989                                                  * to be replaced with such a
9990                                                  * non-anonymous mapping, so
9991                                                  * let's go off the optimized
9992                                                  * path...
9993                                                  */
9994                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
9995                                                 vm_object_unlock(new_object);
9996                                                 goto slow_copy;
9997                                         }
9998 #if !CONFIG_EMBEDDED
9999                                         if (new_object->true_share ||
10000                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10001                                                 /*
10002                                                  * Same if there's a "true_share"
10003                                                  * object in the shadow chain, or
10004                                                  * an object with a non-default
10005                                                  * (SYMMETRIC) copy strategy.
10006                                                  */
10007                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10008                                                 vm_object_unlock(new_object);
10009                                                 goto slow_copy;
10010                                         }
10011 #endif /* !CONFIG_EMBEDDED */
10012                                         vm_object_unlock(new_object);
10013                                 }
10014                                 /*
10015                                  * The new mapping is still backed by
10016                                  * anonymous (internal) memory, so it's
10017                                  * OK to substitute it for the original
10018                                  * malloc() mapping.
10019                                  */
10020                         }
10021
10022                         if (old_object != VM_OBJECT_NULL) {
10023                                 if (entry->is_sub_map) {
10024                                         if (entry->use_pmap) {
10025 #ifndef NO_NESTED_PMAP
10026                                                 pmap_unnest(dst_map->pmap,
10027                                                     (addr64_t)entry->vme_start,
10028                                                     entry->vme_end - entry->vme_start);
10029 #endif  /* NO_NESTED_PMAP */
10030                                                 if (dst_map->mapped_in_other_pmaps) {
10031                                                         /* clean up parent */
10032                                                         /* map/maps */
10033                                                         vm_map_submap_pmap_clean(
10034                                                                 dst_map, entry->vme_start,
10035                                                                 entry->vme_end,
10036                                                                 VME_SUBMAP(entry),
10037                                                                 VME_OFFSET(entry));
10038                                                 }
10039                                         } else {
10040                                                 vm_map_submap_pmap_clean(
10041                                                         dst_map, entry->vme_start,
10042                                                         entry->vme_end,
10043                                                         VME_SUBMAP(entry),
10044                                                         VME_OFFSET(entry));
10045                                         }
10046                                         vm_map_deallocate(VME_SUBMAP(entry));
10047                                 } else {
10048                                         if (dst_map->mapped_in_other_pmaps) {
10049                                                 vm_object_pmap_protect_options(
10050                                                         VME_OBJECT(entry),
10051                                                         VME_OFFSET(entry),
10052                                                         entry->vme_end
10053                                                         - entry->vme_start,
10054                                                         PMAP_NULL,
10055                                                         entry->vme_start,
10056                                                         VM_PROT_NONE,
10057                                                         PMAP_OPTIONS_REMOVE);
10058                                         } else {
10059                                                 pmap_remove_options(
10060                                                         dst_map->pmap,
10061                                                         (addr64_t)(entry->vme_start),
10062                                                         (addr64_t)(entry->vme_end),
10063                                                         PMAP_OPTIONS_REMOVE);
10064                                         }
10065                                         vm_object_deallocate(old_object);
10066                                 }
10067                         }
10068
10069                         if (entry->iokit_acct) {
10070                                 /* keep using iokit accounting */
10071                                 entry->use_pmap = FALSE;
10072                         } else {
10073                                 /* use pmap accounting */
10074                                 entry->use_pmap = TRUE;
10075                         }
10076                         entry->is_sub_map = FALSE;
10077                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10078                         object = VME_OBJECT(entry);
10079                         entry->needs_copy = copy_entry->needs_copy;
10080                         entry->wired_count = 0;
10081                         entry->user_wired_count = 0;
10082                         offset = VME_OFFSET(copy_entry);
10083                         VME_OFFSET_SET(entry, offset);
10084
10085                         vm_map_copy_entry_unlink(copy, copy_entry);
10086                         vm_map_copy_entry_dispose(copy, copy_entry);
10087
10088                         /*
10089                          * we could try to push pages into the pmap at this point, BUT
10090                          * this optimization only saved on average 2 us per page if ALL
10091                          * the pages in the source were currently mapped
10092                          * and ALL the pages in the dest were touched, if there were fewer
10093                          * than 2/3 of the pages touched, this optimization actually cost more cycles
10094                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10095                          */
10096
10097                         /*
10098                          *      Set up for the next iteration.  The map
10099                          *      has not been unlocked, so the next
10100                          *      address should be at the end of this
10101                          *      entry, and the next map entry should be
10102                          *      the one following it.
10103                          */
10104
10105                         start = tmp_entry->vme_end;
10106                         tmp_entry = tmp_entry->vme_next;
10107                 } else {
10108                         vm_map_version_t        version;
10109                         vm_object_t             dst_object;
10110                         vm_object_offset_t      dst_offset;
10111                         kern_return_t           r;
10112
10113 slow_copy:
10114                         if (entry->needs_copy) {
10115                                 VME_OBJECT_SHADOW(entry,
10116                                     (entry->vme_end -
10117                                     entry->vme_start));
10118                                 entry->needs_copy = FALSE;
10119                         }
10120
10121                         dst_object = VME_OBJECT(entry);
10122                         dst_offset = VME_OFFSET(entry);
10123
10124                         /*
10125                          *      Take an object reference, and record
10126                          *      the map version information so that the
10127                          *      map can be safely unlocked.
10128                          */
10129
10130                         if (dst_object == VM_OBJECT_NULL) {
10131                                 /*
10132                                  * We would usually have just taken the
10133                                  * optimized path above if the destination
10134                                  * object has not been allocated yet.  But we
10135                                  * now disable that optimization if the copy
10136                                  * entry's object is not backed by anonymous
10137                                  * memory to avoid replacing malloc'ed
10138                                  * (i.e. re-usable) anonymous memory with a
10139                                  * not-so-anonymous mapping.
10140                                  * So we have to handle this case here and
10141                                  * allocate a new VM object for this map entry.
10142                                  */
10143                                 dst_object = vm_object_allocate(
10144                                         entry->vme_end - entry->vme_start);
10145                                 dst_offset = 0;
10146                                 VME_OBJECT_SET(entry, dst_object);
10147                                 VME_OFFSET_SET(entry, dst_offset);
10148                                 assert(entry->use_pmap);
10149                         }
10150
10151                         vm_object_reference(dst_object);
10152
10153                         /* account for unlock bumping up timestamp */
10154                         version.main_timestamp = dst_map->timestamp + 1;
10155
10156                         vm_map_unlock(dst_map);
10157
10158                         /*
10159                          *      Copy as much as possible in one pass
10160                          */
10161
10162                         copy_size = size;
10163                         r = vm_fault_copy(
10164                                 VME_OBJECT(copy_entry),
10165                                 VME_OFFSET(copy_entry),
10166                                 &copy_size,
10167                                 dst_object,
10168                                 dst_offset,
10169                                 dst_map,
10170                                 &version,
10171                                 THREAD_UNINT );
10172
10173                         /*
10174                          *      Release the object reference
10175                          */
10176
10177                         vm_object_deallocate(dst_object);
10178
10179                         /*
10180                          *      If a hard error occurred, return it now
10181                          */
10182
10183                         if (r != KERN_SUCCESS) {
10184                                 return r;
10185                         }
10186
10187                         if (copy_size != 0) {
10188                                 /*
10189                                  *      Dispose of the copied region
10190                                  */
10191
10192                                 vm_map_copy_clip_end(copy, copy_entry,
10193                                     copy_entry->vme_start + copy_size);
10194                                 vm_map_copy_entry_unlink(copy, copy_entry);
10195                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10196                                 vm_map_copy_entry_dispose(copy, copy_entry);
10197                         }
10198
10199                         /*
10200                          *      Pick up in the destination map where we left off.
10201                          *
10202                          *      Use the version information to avoid a lookup
10203                          *      in the normal case.
10204                          */
10205
10206                         start += copy_size;
10207                         vm_map_lock(dst_map);
10208                         if (version.main_timestamp == dst_map->timestamp &&
10209                             copy_size != 0) {
10210                                 /* We can safely use saved tmp_entry value */
10211
10212                                 if (tmp_entry->map_aligned &&
10213                                     !VM_MAP_PAGE_ALIGNED(
10214                                             start,
10215                                             VM_MAP_PAGE_MASK(dst_map))) {
10216                                         /* no longer map-aligned */
10217                                         tmp_entry->map_aligned = FALSE;
10218                                 }
10219                                 vm_map_clip_end(dst_map, tmp_entry, start);
10220                                 tmp_entry = tmp_entry->vme_next;
10221                         } else {
10222                                 /* Must do lookup of tmp_entry */
10223
10224                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10225                                         vm_map_unlock(dst_map);
10226                                         return KERN_INVALID_ADDRESS;
10227                                 }
10228                                 if (tmp_entry->map_aligned &&
10229                                     !VM_MAP_PAGE_ALIGNED(
10230                                             start,
10231                                             VM_MAP_PAGE_MASK(dst_map))) {
10232                                         /* no longer map-aligned */
10233                                         tmp_entry->map_aligned = FALSE;
10234                                 }
10235                                 vm_map_clip_start(dst_map, tmp_entry, start);
10236                         }
10237                 }
10238         }/* while */
10239
10240         return KERN_SUCCESS;
10241 }/* vm_map_copy_overwrite_aligned */
10242
10243 /*
10244  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
10245  *
10246  *      Description:
10247  *              Copy in data to a kernel buffer from space in the
10248  *              source map. The original space may be optionally
10249  *              deallocated.
10250  *
10251  *              If successful, returns a new copy object.
10252  */
10253 static kern_return_t
10254 vm_map_copyin_kernel_buffer(
10255         vm_map_t        src_map,
10256         vm_map_offset_t src_addr,
10257         vm_map_size_t   len,
10258         boolean_t       src_destroy,
10259         vm_map_copy_t   *copy_result)
10260 {
10261         kern_return_t kr;
10262         vm_map_copy_t copy;
10263         vm_size_t kalloc_size;
10264
10265         if (len > msg_ool_size_small) {
10266                 return KERN_INVALID_ARGUMENT;
10267         }
10268
10269         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10270
10271         copy = (vm_map_copy_t)kalloc(kalloc_size);
10272         if (copy == VM_MAP_COPY_NULL) {
10273                 return KERN_RESOURCE_SHORTAGE;
10274         }
10275         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10276         copy->size = len;
10277         copy->offset = 0;
10278
10279         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10280         if (kr != KERN_SUCCESS) {
10281                 kfree(copy, kalloc_size);
10282                 return kr;
10283         }
10284         if (src_destroy) {
10285                 (void) vm_map_remove(
10286                         src_map,
10287                         vm_map_trunc_page(src_addr,
10288                         VM_MAP_PAGE_MASK(src_map)),
10289                         vm_map_round_page(src_addr + len,
10290                         VM_MAP_PAGE_MASK(src_map)),
10291                         (VM_MAP_REMOVE_INTERRUPTIBLE |
10292                         VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10293                         ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10294         }
10295         *copy_result = copy;
10296         return KERN_SUCCESS;
10297 }
10298
10299 /*
10300  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
10301  *
10302  *      Description:
10303  *              Copy out data from a kernel buffer into space in the
10304  *              destination map. The space may be otpionally dynamically
10305  *              allocated.
10306  *
10307  *              If successful, consumes the copy object.
10308  *              Otherwise, the caller is responsible for it.
10309  */
10310 static int vm_map_copyout_kernel_buffer_failures = 0;
10311 static kern_return_t
10312 vm_map_copyout_kernel_buffer(
10313         vm_map_t                map,
10314         vm_map_address_t        *addr,  /* IN/OUT */
10315         vm_map_copy_t           copy,
10316         vm_map_size_t           copy_size,
10317         boolean_t               overwrite,
10318         boolean_t               consume_on_success)
10319 {
10320         kern_return_t kr = KERN_SUCCESS;
10321         thread_t thread = current_thread();
10322
10323         assert(copy->size == copy_size);
10324
10325         /*
10326          * check for corrupted vm_map_copy structure
10327          */
10328         if (copy_size > msg_ool_size_small || copy->offset) {
10329                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10330                     (long long)copy->size, (long long)copy->offset);
10331         }
10332
10333         if (!overwrite) {
10334                 /*
10335                  * Allocate space in the target map for the data
10336                  */
10337                 *addr = 0;
10338                 kr = vm_map_enter(map,
10339                     addr,
10340                     vm_map_round_page(copy_size,
10341                     VM_MAP_PAGE_MASK(map)),
10342                     (vm_map_offset_t) 0,
10343                     VM_FLAGS_ANYWHERE,
10344                     VM_MAP_KERNEL_FLAGS_NONE,
10345                     VM_KERN_MEMORY_NONE,
10346                     VM_OBJECT_NULL,
10347                     (vm_object_offset_t) 0,
10348                     FALSE,
10349                     VM_PROT_DEFAULT,
10350                     VM_PROT_ALL,
10351                     VM_INHERIT_DEFAULT);
10352                 if (kr != KERN_SUCCESS) {
10353                         return kr;
10354                 }
10355 #if KASAN
10356                 if (map->pmap == kernel_pmap) {
10357                         kasan_notify_address(*addr, copy->size);
10358                 }
10359 #endif
10360         }
10361
10362         /*
10363          * Copyout the data from the kernel buffer to the target map.
10364          */
10365         if (thread->map == map) {
10366                 /*
10367                  * If the target map is the current map, just do
10368                  * the copy.
10369                  */
10370                 assert((vm_size_t)copy_size == copy_size);
10371                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10372                         kr = KERN_INVALID_ADDRESS;
10373                 }
10374         } else {
10375                 vm_map_t oldmap;
10376
10377                 /*
10378                  * If the target map is another map, assume the
10379                  * target's address space identity for the duration
10380                  * of the copy.
10381                  */
10382                 vm_map_reference(map);
10383                 oldmap = vm_map_switch(map);
10384
10385                 assert((vm_size_t)copy_size == copy_size);
10386                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10387                         vm_map_copyout_kernel_buffer_failures++;
10388                         kr = KERN_INVALID_ADDRESS;
10389                 }
10390
10391                 (void) vm_map_switch(oldmap);
10392                 vm_map_deallocate(map);
10393         }
10394
10395         if (kr != KERN_SUCCESS) {
10396                 /* the copy failed, clean up */
10397                 if (!overwrite) {
10398                         /*
10399                          * Deallocate the space we allocated in the target map.
10400                          */
10401                         (void) vm_map_remove(
10402                                 map,
10403                                 vm_map_trunc_page(*addr,
10404                                 VM_MAP_PAGE_MASK(map)),
10405                                 vm_map_round_page((*addr +
10406                                 vm_map_round_page(copy_size,
10407                                 VM_MAP_PAGE_MASK(map))),
10408                                 VM_MAP_PAGE_MASK(map)),
10409                                 VM_MAP_REMOVE_NO_FLAGS);
10410                         *addr = 0;
10411                 }
10412         } else {
10413                 /* copy was successful, dicard the copy structure */
10414                 if (consume_on_success) {
10415                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
10416                 }
10417         }
10418
10419         return kr;
10420 }
10421
10422 /*
10423  *      Routine:        vm_map_copy_insert      [internal use only]
10424  *
10425  *      Description:
10426  *              Link a copy chain ("copy") into a map at the
10427  *              specified location (after "where").
10428  *      Side effects:
10429  *              The copy chain is destroyed.
10430  */
10431 static void
10432 vm_map_copy_insert(
10433         vm_map_t        map,
10434         vm_map_entry_t  after_where,
10435         vm_map_copy_t   copy)
10436 {
10437         vm_map_entry_t  entry;
10438
10439         while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10440                 entry = vm_map_copy_first_entry(copy);
10441                 vm_map_copy_entry_unlink(copy, entry);
10442                 vm_map_store_entry_link(map, after_where, entry,
10443                     VM_MAP_KERNEL_FLAGS_NONE);
10444                 after_where = entry;
10445         }
10446         zfree(vm_map_copy_zone, copy);
10447 }
10448
10449 void
10450 vm_map_copy_remap(
10451         vm_map_t        map,
10452         vm_map_entry_t  where,
10453         vm_map_copy_t   copy,
10454         vm_map_offset_t adjustment,
10455         vm_prot_t       cur_prot,
10456         vm_prot_t       max_prot,
10457         vm_inherit_t    inheritance)
10458 {
10459         vm_map_entry_t  copy_entry, new_entry;
10460
10461         for (copy_entry = vm_map_copy_first_entry(copy);
10462             copy_entry != vm_map_copy_to_entry(copy);
10463             copy_entry = copy_entry->vme_next) {
10464                 /* get a new VM map entry for the map */
10465                 new_entry = vm_map_entry_create(map,
10466                     !map->hdr.entries_pageable);
10467                 /* copy the "copy entry" to the new entry */
10468                 vm_map_entry_copy(new_entry, copy_entry);
10469                 /* adjust "start" and "end" */
10470                 new_entry->vme_start += adjustment;
10471                 new_entry->vme_end += adjustment;
10472                 /* clear some attributes */
10473                 new_entry->inheritance = inheritance;
10474                 new_entry->protection = cur_prot;
10475                 new_entry->max_protection = max_prot;
10476                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10477                 /* take an extra reference on the entry's "object" */
10478                 if (new_entry->is_sub_map) {
10479                         assert(!new_entry->use_pmap); /* not nested */
10480                         vm_map_lock(VME_SUBMAP(new_entry));
10481                         vm_map_reference(VME_SUBMAP(new_entry));
10482                         vm_map_unlock(VME_SUBMAP(new_entry));
10483                 } else {
10484                         vm_object_reference(VME_OBJECT(new_entry));
10485                 }
10486                 /* insert the new entry in the map */
10487                 vm_map_store_entry_link(map, where, new_entry,
10488                     VM_MAP_KERNEL_FLAGS_NONE);
10489                 /* continue inserting the "copy entries" after the new entry */
10490                 where = new_entry;
10491         }
10492 }
10493
10494
10495 /*
10496  * Returns true if *size matches (or is in the range of) copy->size.
10497  * Upon returning true, the *size field is updated with the actual size of the
10498  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10499  */
10500 boolean_t
10501 vm_map_copy_validate_size(
10502         vm_map_t                dst_map,
10503         vm_map_copy_t           copy,
10504         vm_map_size_t           *size)
10505 {
10506         if (copy == VM_MAP_COPY_NULL) {
10507                 return FALSE;
10508         }
10509         vm_map_size_t copy_sz = copy->size;
10510         vm_map_size_t sz = *size;
10511         switch (copy->type) {
10512         case VM_MAP_COPY_OBJECT:
10513         case VM_MAP_COPY_KERNEL_BUFFER:
10514                 if (sz == copy_sz) {
10515                         return TRUE;
10516                 }
10517                 break;
10518         case VM_MAP_COPY_ENTRY_LIST:
10519                 /*
10520                  * potential page-size rounding prevents us from exactly
10521                  * validating this flavor of vm_map_copy, but we can at least
10522                  * assert that it's within a range.
10523                  */
10524                 if (copy_sz >= sz &&
10525                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10526                         *size = copy_sz;
10527                         return TRUE;
10528                 }
10529                 break;
10530         default:
10531                 break;
10532         }
10533         return FALSE;
10534 }
10535
10536 /*
10537  *      Routine:        vm_map_copyout_size
10538  *
10539  *      Description:
10540  *              Copy out a copy chain ("copy") into newly-allocated
10541  *              space in the destination map. Uses a prevalidated
10542  *              size for the copy object (vm_map_copy_validate_size).
10543  *
10544  *              If successful, consumes the copy object.
10545  *              Otherwise, the caller is responsible for it.
10546  */
10547 kern_return_t
10548 vm_map_copyout_size(
10549         vm_map_t                dst_map,
10550         vm_map_address_t        *dst_addr,      /* OUT */
10551         vm_map_copy_t           copy,
10552         vm_map_size_t           copy_size)
10553 {
10554         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10555                    TRUE,                     /* consume_on_success */
10556                    VM_PROT_DEFAULT,
10557                    VM_PROT_ALL,
10558                    VM_INHERIT_DEFAULT);
10559 }
10560
10561 /*
10562  *      Routine:        vm_map_copyout
10563  *
10564  *      Description:
10565  *              Copy out a copy chain ("copy") into newly-allocated
10566  *              space in the destination map.
10567  *
10568  *              If successful, consumes the copy object.
10569  *              Otherwise, the caller is responsible for it.
10570  */
10571 kern_return_t
10572 vm_map_copyout(
10573         vm_map_t                dst_map,
10574         vm_map_address_t        *dst_addr,      /* OUT */
10575         vm_map_copy_t           copy)
10576 {
10577         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10578                    TRUE,                     /* consume_on_success */
10579                    VM_PROT_DEFAULT,
10580                    VM_PROT_ALL,
10581                    VM_INHERIT_DEFAULT);
10582 }
10583
10584 kern_return_t
10585 vm_map_copyout_internal(
10586         vm_map_t                dst_map,
10587         vm_map_address_t        *dst_addr,      /* OUT */
10588         vm_map_copy_t           copy,
10589         vm_map_size_t           copy_size,
10590         boolean_t               consume_on_success,
10591         vm_prot_t               cur_protection,
10592         vm_prot_t               max_protection,
10593         vm_inherit_t            inheritance)
10594 {
10595         vm_map_size_t           size;
10596         vm_map_size_t           adjustment;
10597         vm_map_offset_t         start;
10598         vm_object_offset_t      vm_copy_start;
10599         vm_map_entry_t          last;
10600         vm_map_entry_t          entry;
10601         vm_map_entry_t          hole_entry;
10602
10603         /*
10604          *      Check for null copy object.
10605          */
10606
10607         if (copy == VM_MAP_COPY_NULL) {
10608                 *dst_addr = 0;
10609                 return KERN_SUCCESS;
10610         }
10611
10612         if (copy->size != copy_size) {
10613                 *dst_addr = 0;
10614                 return KERN_FAILURE;
10615         }
10616
10617         /*
10618          *      Check for special copy object, created
10619          *      by vm_map_copyin_object.
10620          */
10621
10622         if (copy->type == VM_MAP_COPY_OBJECT) {
10623                 vm_object_t             object = copy->cpy_object;
10624                 kern_return_t           kr;
10625                 vm_object_offset_t      offset;
10626
10627                 offset = vm_object_trunc_page(copy->offset);
10628                 size = vm_map_round_page((copy_size +
10629                     (vm_map_size_t)(copy->offset -
10630                     offset)),
10631                     VM_MAP_PAGE_MASK(dst_map));
10632                 *dst_addr = 0;
10633                 kr = vm_map_enter(dst_map, dst_addr, size,
10634                     (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10635                     VM_MAP_KERNEL_FLAGS_NONE,
10636                     VM_KERN_MEMORY_NONE,
10637                     object, offset, FALSE,
10638                     VM_PROT_DEFAULT, VM_PROT_ALL,
10639                     VM_INHERIT_DEFAULT);
10640                 if (kr != KERN_SUCCESS) {
10641                         return kr;
10642                 }
10643                 /* Account for non-pagealigned copy object */
10644                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10645                 if (consume_on_success) {
10646                         zfree(vm_map_copy_zone, copy);
10647                 }
10648                 return KERN_SUCCESS;
10649         }
10650
10651         /*
10652          *      Check for special kernel buffer allocated
10653          *      by new_ipc_kmsg_copyin.
10654          */
10655
10656         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10657                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10658                            copy, copy_size, FALSE,
10659                            consume_on_success);
10660         }
10661
10662
10663         /*
10664          *      Find space for the data
10665          */
10666
10667         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10668             VM_MAP_COPY_PAGE_MASK(copy));
10669         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10670             VM_MAP_COPY_PAGE_MASK(copy))
10671             - vm_copy_start;
10672
10673
10674 StartAgain:;
10675
10676         vm_map_lock(dst_map);
10677         if (dst_map->disable_vmentry_reuse == TRUE) {
10678                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10679                 last = entry;
10680         } else {
10681                 if (dst_map->holelistenabled) {
10682                         hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10683
10684                         if (hole_entry == NULL) {
10685                                 /*
10686                                  * No more space in the map?
10687                                  */
10688                                 vm_map_unlock(dst_map);
10689                                 return KERN_NO_SPACE;
10690                         }
10691
10692                         last = hole_entry;
10693                         start = last->vme_start;
10694                 } else {
10695                         assert(first_free_is_valid(dst_map));
10696                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10697                             vm_map_min(dst_map) : last->vme_end;
10698                 }
10699                 start = vm_map_round_page(start,
10700                     VM_MAP_PAGE_MASK(dst_map));
10701         }
10702
10703         while (TRUE) {
10704                 vm_map_entry_t  next = last->vme_next;
10705                 vm_map_offset_t end = start + size;
10706
10707                 if ((end > dst_map->max_offset) || (end < start)) {
10708                         if (dst_map->wait_for_space) {
10709                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10710                                         assert_wait((event_t) dst_map,
10711                                             THREAD_INTERRUPTIBLE);
10712                                         vm_map_unlock(dst_map);
10713                                         thread_block(THREAD_CONTINUE_NULL);
10714                                         goto StartAgain;
10715                                 }
10716                         }
10717                         vm_map_unlock(dst_map);
10718                         return KERN_NO_SPACE;
10719                 }
10720
10721                 if (dst_map->holelistenabled) {
10722                         if (last->vme_end >= end) {
10723                                 break;
10724                         }
10725                 } else {
10726                         /*
10727                          *      If there are no more entries, we must win.
10728                          *
10729                          *      OR
10730                          *
10731                          *      If there is another entry, it must be
10732                          *      after the end of the potential new region.
10733                          */
10734
10735                         if (next == vm_map_to_entry(dst_map)) {
10736                                 break;
10737                         }
10738
10739                         if (next->vme_start >= end) {
10740                                 break;
10741                         }
10742                 }
10743
10744                 last = next;
10745
10746                 if (dst_map->holelistenabled) {
10747                         if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10748                                 /*
10749                                  * Wrapped around
10750                                  */
10751                                 vm_map_unlock(dst_map);
10752                                 return KERN_NO_SPACE;
10753                         }
10754                         start = last->vme_start;
10755                 } else {
10756                         start = last->vme_end;
10757                 }
10758                 start = vm_map_round_page(start,
10759                     VM_MAP_PAGE_MASK(dst_map));
10760         }
10761
10762         if (dst_map->holelistenabled) {
10763                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10764                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10765                 }
10766         }
10767
10768
10769         adjustment = start - vm_copy_start;
10770         if (!consume_on_success) {
10771                 /*
10772                  * We're not allowed to consume "copy", so we'll have to
10773                  * copy its map entries into the destination map below.
10774                  * No need to re-allocate map entries from the correct
10775                  * (pageable or not) zone, since we'll get new map entries
10776                  * during the transfer.
10777                  * We'll also adjust the map entries's "start" and "end"
10778                  * during the transfer, to keep "copy"'s entries consistent
10779                  * with its "offset".
10780                  */
10781                 goto after_adjustments;
10782         }
10783
10784         /*
10785          *      Since we're going to just drop the map
10786          *      entries from the copy into the destination
10787          *      map, they must come from the same pool.
10788          */
10789
10790         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10791                 /*
10792                  * Mismatches occur when dealing with the default
10793                  * pager.
10794                  */
10795                 zone_t          old_zone;
10796                 vm_map_entry_t  next, new;
10797
10798                 /*
10799                  * Find the zone that the copies were allocated from
10800                  */
10801
10802                 entry = vm_map_copy_first_entry(copy);
10803
10804                 /*
10805                  * Reinitialize the copy so that vm_map_copy_entry_link
10806                  * will work.
10807                  */
10808                 vm_map_store_copy_reset(copy, entry);
10809                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10810
10811                 /*
10812                  * Copy each entry.
10813                  */
10814                 while (entry != vm_map_copy_to_entry(copy)) {
10815                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10816                         vm_map_entry_copy_full(new, entry);
10817                         new->vme_no_copy_on_read = FALSE;
10818                         assert(!new->iokit_acct);
10819                         if (new->is_sub_map) {
10820                                 /* clr address space specifics */
10821                                 new->use_pmap = FALSE;
10822                         }
10823                         vm_map_copy_entry_link(copy,
10824                             vm_map_copy_last_entry(copy),
10825                             new);
10826                         next = entry->vme_next;
10827                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10828                         zfree(old_zone, entry);
10829                         entry = next;
10830                 }
10831         }
10832
10833         /*
10834          *      Adjust the addresses in the copy chain, and
10835          *      reset the region attributes.
10836          */
10837
10838         for (entry = vm_map_copy_first_entry(copy);
10839             entry != vm_map_copy_to_entry(copy);
10840             entry = entry->vme_next) {
10841                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10842                         /*
10843                          * We're injecting this copy entry into a map that
10844                          * has the standard page alignment, so clear
10845                          * "map_aligned" (which might have been inherited
10846                          * from the original map entry).
10847                          */
10848                         entry->map_aligned = FALSE;
10849                 }
10850
10851                 entry->vme_start += adjustment;
10852                 entry->vme_end += adjustment;
10853
10854                 if (entry->map_aligned) {
10855                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10856                             VM_MAP_PAGE_MASK(dst_map)));
10857                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10858                             VM_MAP_PAGE_MASK(dst_map)));
10859                 }
10860
10861                 entry->inheritance = VM_INHERIT_DEFAULT;
10862                 entry->protection = VM_PROT_DEFAULT;
10863                 entry->max_protection = VM_PROT_ALL;
10864                 entry->behavior = VM_BEHAVIOR_DEFAULT;
10865
10866                 /*
10867                  * If the entry is now wired,
10868                  * map the pages into the destination map.
10869                  */
10870                 if (entry->wired_count != 0) {
10871                         vm_map_offset_t va;
10872                         vm_object_offset_t       offset;
10873                         vm_object_t object;
10874                         vm_prot_t prot;
10875                         int     type_of_fault;
10876
10877                         object = VME_OBJECT(entry);
10878                         offset = VME_OFFSET(entry);
10879                         va = entry->vme_start;
10880
10881                         pmap_pageable(dst_map->pmap,
10882                             entry->vme_start,
10883                             entry->vme_end,
10884                             TRUE);
10885
10886                         while (va < entry->vme_end) {
10887                                 vm_page_t       m;
10888                                 struct vm_object_fault_info fault_info = {};
10889
10890                                 /*
10891                                  * Look up the page in the object.
10892                                  * Assert that the page will be found in the
10893                                  * top object:
10894                                  * either
10895                                  *      the object was newly created by
10896                                  *      vm_object_copy_slowly, and has
10897                                  *      copies of all of the pages from
10898                                  *      the source object
10899                                  * or
10900                                  *      the object was moved from the old
10901                                  *      map entry; because the old map
10902                                  *      entry was wired, all of the pages
10903                                  *      were in the top-level object.
10904                                  *      (XXX not true if we wire pages for
10905                                  *       reading)
10906                                  */
10907                                 vm_object_lock(object);
10908
10909                                 m = vm_page_lookup(object, offset);
10910                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10911                                     m->vmp_absent) {
10912                                         panic("vm_map_copyout: wiring %p", m);
10913                                 }
10914
10915                                 prot = entry->protection;
10916
10917                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10918                                     prot) {
10919                                         prot |= VM_PROT_EXECUTE;
10920                                 }
10921
10922                                 type_of_fault = DBG_CACHE_HIT_FAULT;
10923
10924                                 fault_info.user_tag = VME_ALIAS(entry);
10925                                 fault_info.pmap_options = 0;
10926                                 if (entry->iokit_acct ||
10927                                     (!entry->is_sub_map && !entry->use_pmap)) {
10928                                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10929                                 }
10930
10931                                 vm_fault_enter(m,
10932                                     dst_map->pmap,
10933                                     va,
10934                                     prot,
10935                                     prot,
10936                                     VM_PAGE_WIRED(m),
10937                                     FALSE,            /* change_wiring */
10938                                     VM_KERN_MEMORY_NONE,            /* tag - not wiring */
10939                                     &fault_info,
10940                                     NULL,             /* need_retry */
10941                                     &type_of_fault);
10942
10943                                 vm_object_unlock(object);
10944
10945                                 offset += PAGE_SIZE_64;
10946                                 va += PAGE_SIZE;
10947                         }
10948                 }
10949         }
10950
10951 after_adjustments:
10952
10953         /*
10954          *      Correct the page alignment for the result
10955          */
10956
10957         *dst_addr = start + (copy->offset - vm_copy_start);
10958
10959 #if KASAN
10960         kasan_notify_address(*dst_addr, size);
10961 #endif
10962
10963         /*
10964          *      Update the hints and the map size
10965          */
10966
10967         if (consume_on_success) {
10968                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10969         } else {
10970                 SAVE_HINT_MAP_WRITE(dst_map, last);
10971         }
10972
10973         dst_map->size += size;
10974
10975         /*
10976          *      Link in the copy
10977          */
10978
10979         if (consume_on_success) {
10980                 vm_map_copy_insert(dst_map, last, copy);
10981         } else {
10982                 vm_map_copy_remap(dst_map, last, copy, adjustment,
10983                     cur_protection, max_protection,
10984                     inheritance);
10985         }
10986
10987         vm_map_unlock(dst_map);
10988
10989         /*
10990          * XXX  If wiring_required, call vm_map_pageable
10991          */
10992
10993         return KERN_SUCCESS;
10994 }
10995
10996 /*
10997  *      Routine:        vm_map_copyin
10998  *
10999  *      Description:
11000  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
11001  *
11002  */
11003
11004 #undef vm_map_copyin
11005
11006 kern_return_t
11007 vm_map_copyin(
11008         vm_map_t                        src_map,
11009         vm_map_address_t        src_addr,
11010         vm_map_size_t           len,
11011         boolean_t                       src_destroy,
11012         vm_map_copy_t           *copy_result)   /* OUT */
11013 {
11014         return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11015                    FALSE, copy_result, FALSE);
11016 }
11017
11018 /*
11019  *      Routine:        vm_map_copyin_common
11020  *
11021  *      Description:
11022  *              Copy the specified region (src_addr, len) from the
11023  *              source address space (src_map), possibly removing
11024  *              the region from the source address space (src_destroy).
11025  *
11026  *      Returns:
11027  *              A vm_map_copy_t object (copy_result), suitable for
11028  *              insertion into another address space (using vm_map_copyout),
11029  *              copying over another address space region (using
11030  *              vm_map_copy_overwrite).  If the copy is unused, it
11031  *              should be destroyed (using vm_map_copy_discard).
11032  *
11033  *      In/out conditions:
11034  *              The source map should not be locked on entry.
11035  */
11036
11037 typedef struct submap_map {
11038         vm_map_t        parent_map;
11039         vm_map_offset_t base_start;
11040         vm_map_offset_t base_end;
11041         vm_map_size_t   base_len;
11042         struct submap_map *next;
11043 } submap_map_t;
11044
11045 kern_return_t
11046 vm_map_copyin_common(
11047         vm_map_t        src_map,
11048         vm_map_address_t src_addr,
11049         vm_map_size_t   len,
11050         boolean_t       src_destroy,
11051         __unused boolean_t      src_volatile,
11052         vm_map_copy_t   *copy_result,   /* OUT */
11053         boolean_t       use_maxprot)
11054 {
11055         int flags;
11056
11057         flags = 0;
11058         if (src_destroy) {
11059                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11060         }
11061         if (use_maxprot) {
11062                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11063         }
11064         return vm_map_copyin_internal(src_map,
11065                    src_addr,
11066                    len,
11067                    flags,
11068                    copy_result);
11069 }
11070 kern_return_t
11071 vm_map_copyin_internal(
11072         vm_map_t        src_map,
11073         vm_map_address_t src_addr,
11074         vm_map_size_t   len,
11075         int             flags,
11076         vm_map_copy_t   *copy_result)   /* OUT */
11077 {
11078         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
11079                                          * in multi-level lookup, this
11080                                          * entry contains the actual
11081                                          * vm_object/offset.
11082                                          */
11083         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
11084
11085         vm_map_offset_t src_start;      /* Start of current entry --
11086                                          * where copy is taking place now
11087                                          */
11088         vm_map_offset_t src_end;        /* End of entire region to be
11089                                          * copied */
11090         vm_map_offset_t src_base;
11091         vm_map_t        base_map = src_map;
11092         boolean_t       map_share = FALSE;
11093         submap_map_t    *parent_maps = NULL;
11094
11095         vm_map_copy_t   copy;           /* Resulting copy */
11096         vm_map_address_t copy_addr;
11097         vm_map_size_t   copy_size;
11098         boolean_t       src_destroy;
11099         boolean_t       use_maxprot;
11100         boolean_t       preserve_purgeable;
11101         boolean_t       entry_was_shared;
11102         vm_map_entry_t  saved_src_entry;
11103
11104         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11105                 return KERN_INVALID_ARGUMENT;
11106         }
11107
11108         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11109         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11110         preserve_purgeable =
11111             (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11112
11113         /*
11114          *      Check for copies of zero bytes.
11115          */
11116
11117         if (len == 0) {
11118                 *copy_result = VM_MAP_COPY_NULL;
11119                 return KERN_SUCCESS;
11120         }
11121
11122         /*
11123          *      Check that the end address doesn't overflow
11124          */
11125         src_end = src_addr + len;
11126         if (src_end < src_addr) {
11127                 return KERN_INVALID_ADDRESS;
11128         }
11129
11130         /*
11131          *      Compute (page aligned) start and end of region
11132          */
11133         src_start = vm_map_trunc_page(src_addr,
11134             VM_MAP_PAGE_MASK(src_map));
11135         src_end = vm_map_round_page(src_end,
11136             VM_MAP_PAGE_MASK(src_map));
11137
11138         /*
11139          * If the copy is sufficiently small, use a kernel buffer instead
11140          * of making a virtual copy.  The theory being that the cost of
11141          * setting up VM (and taking C-O-W faults) dominates the copy costs
11142          * for small regions.
11143          */
11144         if ((len < msg_ool_size_small) &&
11145             !use_maxprot &&
11146             !preserve_purgeable &&
11147             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11148             /*
11149              * Since the "msg_ool_size_small" threshold was increased and
11150              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11151              * address space limits, we revert to doing a virtual copy if the
11152              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
11153              * of the commpage would now fail when it used to work.
11154              */
11155             (src_start >= vm_map_min(src_map) &&
11156             src_start < vm_map_max(src_map) &&
11157             src_end >= vm_map_min(src_map) &&
11158             src_end < vm_map_max(src_map))) {
11159                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11160                            src_destroy, copy_result);
11161         }
11162
11163         /*
11164          *      Allocate a header element for the list.
11165          *
11166          *      Use the start and end in the header to
11167          *      remember the endpoints prior to rounding.
11168          */
11169
11170         copy = vm_map_copy_allocate();
11171         copy->type = VM_MAP_COPY_ENTRY_LIST;
11172         copy->cpy_hdr.entries_pageable = TRUE;
11173 #if 00
11174         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11175 #else
11176         /*
11177          * The copy entries can be broken down for a variety of reasons,
11178          * so we can't guarantee that they will remain map-aligned...
11179          * Will need to adjust the first copy_entry's "vme_start" and
11180          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11181          * rather than the original map's alignment.
11182          */
11183         copy->cpy_hdr.page_shift = PAGE_SHIFT;
11184 #endif
11185
11186         vm_map_store_init( &(copy->cpy_hdr));
11187
11188         copy->offset = src_addr;
11189         copy->size = len;
11190
11191         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11192
11193 #define RETURN(x)                                               \
11194         MACRO_BEGIN                                             \
11195         vm_map_unlock(src_map);                                 \
11196         if(src_map != base_map)                                 \
11197                 vm_map_deallocate(src_map);                     \
11198         if (new_entry != VM_MAP_ENTRY_NULL)                     \
11199                 vm_map_copy_entry_dispose(copy,new_entry);      \
11200         vm_map_copy_discard(copy);                              \
11201         {                                                       \
11202                 submap_map_t    *_ptr;                          \
11203                                                                 \
11204                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11205                         parent_maps=parent_maps->next;          \
11206                         if (_ptr->parent_map != base_map)       \
11207                                 vm_map_deallocate(_ptr->parent_map);    \
11208                         kfree(_ptr, sizeof(submap_map_t));      \
11209                 }                                               \
11210         }                                                       \
11211         MACRO_RETURN(x);                                        \
11212         MACRO_END
11213
11214         /*
11215          *      Find the beginning of the region.
11216          */
11217
11218         vm_map_lock(src_map);
11219
11220         /*
11221          * Lookup the original "src_addr" rather than the truncated
11222          * "src_start", in case "src_start" falls in a non-map-aligned
11223          * map entry *before* the map entry that contains "src_addr"...
11224          */
11225         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11226                 RETURN(KERN_INVALID_ADDRESS);
11227         }
11228         if (!tmp_entry->is_sub_map) {
11229                 /*
11230                  * ... but clip to the map-rounded "src_start" rather than
11231                  * "src_addr" to preserve map-alignment.  We'll adjust the
11232                  * first copy entry at the end, if needed.
11233                  */
11234                 vm_map_clip_start(src_map, tmp_entry, src_start);
11235         }
11236         if (src_start < tmp_entry->vme_start) {
11237                 /*
11238                  * Move "src_start" up to the start of the
11239                  * first map entry to copy.
11240                  */
11241                 src_start = tmp_entry->vme_start;
11242         }
11243         /* set for later submap fix-up */
11244         copy_addr = src_start;
11245
11246         /*
11247          *      Go through entries until we get to the end.
11248          */
11249
11250         while (TRUE) {
11251                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11252                 vm_map_size_t   src_size;               /* Size of source
11253                                                          * map entry (in both
11254                                                          * maps)
11255                                                          */
11256
11257                 vm_object_t             src_object;     /* Object to copy */
11258                 vm_object_offset_t      src_offset;
11259
11260                 boolean_t       src_needs_copy;         /* Should source map
11261                                                          * be made read-only
11262                                                          * for copy-on-write?
11263                                                          */
11264
11265                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11266
11267                 boolean_t       was_wired;              /* Was source wired? */
11268                 vm_map_version_t version;               /* Version before locks
11269                                                          * dropped to make copy
11270                                                          */
11271                 kern_return_t   result;                 /* Return value from
11272                                                          * copy_strategically.
11273                                                          */
11274                 while (tmp_entry->is_sub_map) {
11275                         vm_map_size_t submap_len;
11276                         submap_map_t *ptr;
11277
11278                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11279                         ptr->next = parent_maps;
11280                         parent_maps = ptr;
11281                         ptr->parent_map = src_map;
11282                         ptr->base_start = src_start;
11283                         ptr->base_end = src_end;
11284                         submap_len = tmp_entry->vme_end - src_start;
11285                         if (submap_len > (src_end - src_start)) {
11286                                 submap_len = src_end - src_start;
11287                         }
11288                         ptr->base_len = submap_len;
11289
11290                         src_start -= tmp_entry->vme_start;
11291                         src_start += VME_OFFSET(tmp_entry);
11292                         src_end = src_start + submap_len;
11293                         src_map = VME_SUBMAP(tmp_entry);
11294                         vm_map_lock(src_map);
11295                         /* keep an outstanding reference for all maps in */
11296                         /* the parents tree except the base map */
11297                         vm_map_reference(src_map);
11298                         vm_map_unlock(ptr->parent_map);
11299                         if (!vm_map_lookup_entry(
11300                                     src_map, src_start, &tmp_entry)) {
11301                                 RETURN(KERN_INVALID_ADDRESS);
11302                         }
11303                         map_share = TRUE;
11304                         if (!tmp_entry->is_sub_map) {
11305                                 vm_map_clip_start(src_map, tmp_entry, src_start);
11306                         }
11307                         src_entry = tmp_entry;
11308                 }
11309                 /* we are now in the lowest level submap... */
11310
11311                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11312                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11313                         /* This is not, supported for now.In future */
11314                         /* we will need to detect the phys_contig   */
11315                         /* condition and then upgrade copy_slowly   */
11316                         /* to do physical copy from the device mem  */
11317                         /* based object. We can piggy-back off of   */
11318                         /* the was wired boolean to set-up the      */
11319                         /* proper handling */
11320                         RETURN(KERN_PROTECTION_FAILURE);
11321                 }
11322                 /*
11323                  *      Create a new address map entry to hold the result.
11324                  *      Fill in the fields from the appropriate source entries.
11325                  *      We must unlock the source map to do this if we need
11326                  *      to allocate a map entry.
11327                  */
11328                 if (new_entry == VM_MAP_ENTRY_NULL) {
11329                         version.main_timestamp = src_map->timestamp;
11330                         vm_map_unlock(src_map);
11331
11332                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11333
11334                         vm_map_lock(src_map);
11335                         if ((version.main_timestamp + 1) != src_map->timestamp) {
11336                                 if (!vm_map_lookup_entry(src_map, src_start,
11337                                     &tmp_entry)) {
11338                                         RETURN(KERN_INVALID_ADDRESS);
11339                                 }
11340                                 if (!tmp_entry->is_sub_map) {
11341                                         vm_map_clip_start(src_map, tmp_entry, src_start);
11342                                 }
11343                                 continue; /* restart w/ new tmp_entry */
11344                         }
11345                 }
11346
11347                 /*
11348                  *      Verify that the region can be read.
11349                  */
11350                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11351                     !use_maxprot) ||
11352                     (src_entry->max_protection & VM_PROT_READ) == 0) {
11353                         RETURN(KERN_PROTECTION_FAILURE);
11354                 }
11355
11356                 /*
11357                  *      Clip against the endpoints of the entire region.
11358                  */
11359
11360                 vm_map_clip_end(src_map, src_entry, src_end);
11361
11362                 src_size = src_entry->vme_end - src_start;
11363                 src_object = VME_OBJECT(src_entry);
11364                 src_offset = VME_OFFSET(src_entry);
11365                 was_wired = (src_entry->wired_count != 0);
11366
11367                 vm_map_entry_copy(new_entry, src_entry);
11368                 if (new_entry->is_sub_map) {
11369                         /* clr address space specifics */
11370                         new_entry->use_pmap = FALSE;
11371                 } else {
11372                         /*
11373                          * We're dealing with a copy-on-write operation,
11374                          * so the resulting mapping should not inherit the
11375                          * original mapping's accounting settings.
11376                          * "iokit_acct" should have been cleared in
11377                          * vm_map_entry_copy().
11378                          * "use_pmap" should be reset to its default (TRUE)
11379                          * so that the new mapping gets accounted for in
11380                          * the task's memory footprint.
11381                          */
11382                         assert(!new_entry->iokit_acct);
11383                         new_entry->use_pmap = TRUE;
11384                 }
11385
11386                 /*
11387                  *      Attempt non-blocking copy-on-write optimizations.
11388                  */
11389
11390                 /*
11391                  * If we are destroying the source, and the object
11392                  * is internal, we could move the object reference
11393                  * from the source to the copy.  The copy is
11394                  * copy-on-write only if the source is.
11395                  * We make another reference to the object, because
11396                  * destroying the source entry will deallocate it.
11397                  *
11398                  * This memory transfer has to be atomic, (to prevent
11399                  * the VM object from being shared or copied while
11400                  * it's being moved here), so we could only do this
11401                  * if we won't have to unlock the VM map until the
11402                  * original mapping has been fully removed.
11403                  */
11404
11405 RestartCopy:
11406                 if ((src_object == VM_OBJECT_NULL ||
11407                     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11408                     vm_object_copy_quickly(
11409                             VME_OBJECT_PTR(new_entry),
11410                             src_offset,
11411                             src_size,
11412                             &src_needs_copy,
11413                             &new_entry_needs_copy)) {
11414                         new_entry->needs_copy = new_entry_needs_copy;
11415
11416                         /*
11417                          *      Handle copy-on-write obligations
11418                          */
11419
11420                         if (src_needs_copy && !tmp_entry->needs_copy) {
11421                                 vm_prot_t prot;
11422
11423                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11424
11425                                 if (override_nx(src_map, VME_ALIAS(src_entry))
11426                                     && prot) {
11427                                         prot |= VM_PROT_EXECUTE;
11428                                 }
11429
11430                                 vm_object_pmap_protect(
11431                                         src_object,
11432                                         src_offset,
11433                                         src_size,
11434                                         (src_entry->is_shared ?
11435                                         PMAP_NULL
11436                                         : src_map->pmap),
11437                                         src_entry->vme_start,
11438                                         prot);
11439
11440                                 assert(tmp_entry->wired_count == 0);
11441                                 tmp_entry->needs_copy = TRUE;
11442                         }
11443
11444                         /*
11445                          *      The map has never been unlocked, so it's safe
11446                          *      to move to the next entry rather than doing
11447                          *      another lookup.
11448                          */
11449
11450                         goto CopySuccessful;
11451                 }
11452
11453                 entry_was_shared = tmp_entry->is_shared;
11454
11455                 /*
11456                  *      Take an object reference, so that we may
11457                  *      release the map lock(s).
11458                  */
11459
11460                 assert(src_object != VM_OBJECT_NULL);
11461                 vm_object_reference(src_object);
11462
11463                 /*
11464                  *      Record the timestamp for later verification.
11465                  *      Unlock the map.
11466                  */
11467
11468                 version.main_timestamp = src_map->timestamp;
11469                 vm_map_unlock(src_map); /* Increments timestamp once! */
11470                 saved_src_entry = src_entry;
11471                 tmp_entry = VM_MAP_ENTRY_NULL;
11472                 src_entry = VM_MAP_ENTRY_NULL;
11473
11474                 /*
11475                  *      Perform the copy
11476                  */
11477
11478                 if (was_wired) {
11479 CopySlowly:
11480                         vm_object_lock(src_object);
11481                         result = vm_object_copy_slowly(
11482                                 src_object,
11483                                 src_offset,
11484                                 src_size,
11485                                 THREAD_UNINT,
11486                                 VME_OBJECT_PTR(new_entry));
11487                         VME_OFFSET_SET(new_entry, 0);
11488                         new_entry->needs_copy = FALSE;
11489                 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11490                     (entry_was_shared || map_share)) {
11491                         vm_object_t new_object;
11492
11493                         vm_object_lock_shared(src_object);
11494                         new_object = vm_object_copy_delayed(
11495                                 src_object,
11496                                 src_offset,
11497                                 src_size,
11498                                 TRUE);
11499                         if (new_object == VM_OBJECT_NULL) {
11500                                 goto CopySlowly;
11501                         }
11502
11503                         VME_OBJECT_SET(new_entry, new_object);
11504                         assert(new_entry->wired_count == 0);
11505                         new_entry->needs_copy = TRUE;
11506                         assert(!new_entry->iokit_acct);
11507                         assert(new_object->purgable == VM_PURGABLE_DENY);
11508                         assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11509                         result = KERN_SUCCESS;
11510                 } else {
11511                         vm_object_offset_t new_offset;
11512                         new_offset = VME_OFFSET(new_entry);
11513                         result = vm_object_copy_strategically(src_object,
11514                             src_offset,
11515                             src_size,
11516                             VME_OBJECT_PTR(new_entry),
11517                             &new_offset,
11518                             &new_entry_needs_copy);
11519                         if (new_offset != VME_OFFSET(new_entry)) {
11520                                 VME_OFFSET_SET(new_entry, new_offset);
11521                         }
11522
11523                         new_entry->needs_copy = new_entry_needs_copy;
11524                 }
11525
11526                 if (result == KERN_SUCCESS &&
11527                     preserve_purgeable &&
11528                     src_object->purgable != VM_PURGABLE_DENY) {
11529                         vm_object_t     new_object;
11530
11531                         new_object = VME_OBJECT(new_entry);
11532                         assert(new_object != src_object);
11533                         vm_object_lock(new_object);
11534                         assert(new_object->ref_count == 1);
11535                         assert(new_object->shadow == VM_OBJECT_NULL);
11536                         assert(new_object->copy == VM_OBJECT_NULL);
11537                         assert(new_object->vo_owner == NULL);
11538
11539                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11540                         new_object->true_share = TRUE;
11541                         /* start as non-volatile with no owner... */
11542                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
11543                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11544                         /* ... and move to src_object's purgeable state */
11545                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11546                                 int state;
11547                                 state = src_object->purgable;
11548                                 vm_object_purgable_control(
11549                                         new_object,
11550                                         VM_PURGABLE_SET_STATE_FROM_KERNEL,
11551                                         &state);
11552                         }
11553                         vm_object_unlock(new_object);
11554                         new_object = VM_OBJECT_NULL;
11555                         /* no pmap accounting for purgeable objects */
11556                         new_entry->use_pmap = FALSE;
11557                 }
11558
11559                 if (result != KERN_SUCCESS &&
11560                     result != KERN_MEMORY_RESTART_COPY) {
11561                         vm_map_lock(src_map);
11562                         RETURN(result);
11563                 }
11564
11565                 /*
11566                  *      Throw away the extra reference
11567                  */
11568
11569                 vm_object_deallocate(src_object);
11570
11571                 /*
11572                  *      Verify that the map has not substantially
11573                  *      changed while the copy was being made.
11574                  */
11575
11576                 vm_map_lock(src_map);
11577
11578                 if ((version.main_timestamp + 1) == src_map->timestamp) {
11579                         /* src_map hasn't changed: src_entry is still valid */
11580                         src_entry = saved_src_entry;
11581                         goto VerificationSuccessful;
11582                 }
11583
11584                 /*
11585                  *      Simple version comparison failed.
11586                  *
11587                  *      Retry the lookup and verify that the
11588                  *      same object/offset are still present.
11589                  *
11590                  *      [Note: a memory manager that colludes with
11591                  *      the calling task can detect that we have
11592                  *      cheated.  While the map was unlocked, the
11593                  *      mapping could have been changed and restored.]
11594                  */
11595
11596                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11597                         if (result != KERN_MEMORY_RESTART_COPY) {
11598                                 vm_object_deallocate(VME_OBJECT(new_entry));
11599                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11600                                 /* reset accounting state */
11601                                 new_entry->iokit_acct = FALSE;
11602                                 new_entry->use_pmap = TRUE;
11603                         }
11604                         RETURN(KERN_INVALID_ADDRESS);
11605                 }
11606
11607                 src_entry = tmp_entry;
11608                 vm_map_clip_start(src_map, src_entry, src_start);
11609
11610                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11611                     !use_maxprot) ||
11612                     ((src_entry->max_protection & VM_PROT_READ) == 0)) {
11613                         goto VerificationFailed;
11614                 }
11615
11616                 if (src_entry->vme_end < new_entry->vme_end) {
11617                         /*
11618                          * This entry might have been shortened
11619                          * (vm_map_clip_end) or been replaced with
11620                          * an entry that ends closer to "src_start"
11621                          * than before.
11622                          * Adjust "new_entry" accordingly; copying
11623                          * less memory would be correct but we also
11624                          * redo the copy (see below) if the new entry
11625                          * no longer points at the same object/offset.
11626                          */
11627                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11628                             VM_MAP_COPY_PAGE_MASK(copy)));
11629                         new_entry->vme_end = src_entry->vme_end;
11630                         src_size = new_entry->vme_end - src_start;
11631                 } else if (src_entry->vme_end > new_entry->vme_end) {
11632                         /*
11633                          * This entry might have been extended
11634                          * (vm_map_entry_simplify() or coalesce)
11635                          * or been replaced with an entry that ends farther
11636                          * from "src_start" than before.
11637                          *
11638                          * We've called vm_object_copy_*() only on
11639                          * the previous <start:end> range, so we can't
11640                          * just extend new_entry.  We have to re-do
11641                          * the copy based on the new entry as if it was
11642                          * pointing at a different object/offset (see
11643                          * "Verification failed" below).
11644                          */
11645                 }
11646
11647                 if ((VME_OBJECT(src_entry) != src_object) ||
11648                     (VME_OFFSET(src_entry) != src_offset) ||
11649                     (src_entry->vme_end > new_entry->vme_end)) {
11650                         /*
11651                          *      Verification failed.
11652                          *
11653                          *      Start over with this top-level entry.
11654                          */
11655
11656 VerificationFailed:     ;
11657
11658                         vm_object_deallocate(VME_OBJECT(new_entry));
11659                         tmp_entry = src_entry;
11660                         continue;
11661                 }
11662
11663                 /*
11664                  *      Verification succeeded.
11665                  */
11666
11667 VerificationSuccessful:;
11668
11669                 if (result == KERN_MEMORY_RESTART_COPY) {
11670                         goto RestartCopy;
11671                 }
11672
11673                 /*
11674                  *      Copy succeeded.
11675                  */
11676
11677 CopySuccessful: ;
11678
11679                 /*
11680                  *      Link in the new copy entry.
11681                  */
11682
11683                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11684                     new_entry);
11685
11686                 /*
11687                  *      Determine whether the entire region
11688                  *      has been copied.
11689                  */
11690                 src_base = src_start;
11691                 src_start = new_entry->vme_end;
11692                 new_entry = VM_MAP_ENTRY_NULL;
11693                 while ((src_start >= src_end) && (src_end != 0)) {
11694                         submap_map_t    *ptr;
11695
11696                         if (src_map == base_map) {
11697                                 /* back to the top */
11698                                 break;
11699                         }
11700
11701                         ptr = parent_maps;
11702                         assert(ptr != NULL);
11703                         parent_maps = parent_maps->next;
11704
11705                         /* fix up the damage we did in that submap */
11706                         vm_map_simplify_range(src_map,
11707                             src_base,
11708                             src_end);
11709
11710                         vm_map_unlock(src_map);
11711                         vm_map_deallocate(src_map);
11712                         vm_map_lock(ptr->parent_map);
11713                         src_map = ptr->parent_map;
11714                         src_base = ptr->base_start;
11715                         src_start = ptr->base_start + ptr->base_len;
11716                         src_end = ptr->base_end;
11717                         if (!vm_map_lookup_entry(src_map,
11718                             src_start,
11719                             &tmp_entry) &&
11720                             (src_end > src_start)) {
11721                                 RETURN(KERN_INVALID_ADDRESS);
11722                         }
11723                         kfree(ptr, sizeof(submap_map_t));
11724                         if (parent_maps == NULL) {
11725                                 map_share = FALSE;
11726                         }
11727                         src_entry = tmp_entry->vme_prev;
11728                 }
11729
11730                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11731                     (src_start >= src_addr + len) &&
11732                     (src_addr + len != 0)) {
11733                         /*
11734                          * Stop copying now, even though we haven't reached
11735                          * "src_end".  We'll adjust the end of the last copy
11736                          * entry at the end, if needed.
11737                          *
11738                          * If src_map's aligment is different from the
11739                          * system's page-alignment, there could be
11740                          * extra non-map-aligned map entries between
11741                          * the original (non-rounded) "src_addr + len"
11742                          * and the rounded "src_end".
11743                          * We do not want to copy those map entries since
11744                          * they're not part of the copied range.
11745                          */
11746                         break;
11747                 }
11748
11749                 if ((src_start >= src_end) && (src_end != 0)) {
11750                         break;
11751                 }
11752
11753                 /*
11754                  *      Verify that there are no gaps in the region
11755                  */
11756
11757                 tmp_entry = src_entry->vme_next;
11758                 if ((tmp_entry->vme_start != src_start) ||
11759                     (tmp_entry == vm_map_to_entry(src_map))) {
11760                         RETURN(KERN_INVALID_ADDRESS);
11761                 }
11762         }
11763
11764         /*
11765          * If the source should be destroyed, do it now, since the
11766          * copy was successful.
11767          */
11768         if (src_destroy) {
11769                 (void) vm_map_delete(
11770                         src_map,
11771                         vm_map_trunc_page(src_addr,
11772                         VM_MAP_PAGE_MASK(src_map)),
11773                         src_end,
11774                         ((src_map == kernel_map) ?
11775                         VM_MAP_REMOVE_KUNWIRE :
11776                         VM_MAP_REMOVE_NO_FLAGS),
11777                         VM_MAP_NULL);
11778         } else {
11779                 /* fix up the damage we did in the base map */
11780                 vm_map_simplify_range(
11781                         src_map,
11782                         vm_map_trunc_page(src_addr,
11783                         VM_MAP_PAGE_MASK(src_map)),
11784                         vm_map_round_page(src_end,
11785                         VM_MAP_PAGE_MASK(src_map)));
11786         }
11787
11788         vm_map_unlock(src_map);
11789         tmp_entry = VM_MAP_ENTRY_NULL;
11790
11791         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11792                 vm_map_offset_t original_start, original_offset, original_end;
11793
11794                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11795
11796                 /* adjust alignment of first copy_entry's "vme_start" */
11797                 tmp_entry = vm_map_copy_first_entry(copy);
11798                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11799                         vm_map_offset_t adjustment;
11800
11801                         original_start = tmp_entry->vme_start;
11802                         original_offset = VME_OFFSET(tmp_entry);
11803
11804                         /* map-align the start of the first copy entry... */
11805                         adjustment = (tmp_entry->vme_start -
11806                             vm_map_trunc_page(
11807                                     tmp_entry->vme_start,
11808                                     VM_MAP_PAGE_MASK(src_map)));
11809                         tmp_entry->vme_start -= adjustment;
11810                         VME_OFFSET_SET(tmp_entry,
11811                             VME_OFFSET(tmp_entry) - adjustment);
11812                         copy_addr -= adjustment;
11813                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
11814                         /* ... adjust for mis-aligned start of copy range */
11815                         adjustment =
11816                             (vm_map_trunc_page(copy->offset,
11817                             PAGE_MASK) -
11818                             vm_map_trunc_page(copy->offset,
11819                             VM_MAP_PAGE_MASK(src_map)));
11820                         if (adjustment) {
11821                                 assert(page_aligned(adjustment));
11822                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11823                                 tmp_entry->vme_start += adjustment;
11824                                 VME_OFFSET_SET(tmp_entry,
11825                                     (VME_OFFSET(tmp_entry) +
11826                                     adjustment));
11827                                 copy_addr += adjustment;
11828                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11829                         }
11830
11831                         /*
11832                          * Assert that the adjustments haven't exposed
11833                          * more than was originally copied...
11834                          */
11835                         assert(tmp_entry->vme_start >= original_start);
11836                         assert(VME_OFFSET(tmp_entry) >= original_offset);
11837                         /*
11838                          * ... and that it did not adjust outside of a
11839                          * a single 16K page.
11840                          */
11841                         assert(vm_map_trunc_page(tmp_entry->vme_start,
11842                             VM_MAP_PAGE_MASK(src_map)) ==
11843                             vm_map_trunc_page(original_start,
11844                             VM_MAP_PAGE_MASK(src_map)));
11845                 }
11846
11847                 /* adjust alignment of last copy_entry's "vme_end" */
11848                 tmp_entry = vm_map_copy_last_entry(copy);
11849                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11850                         vm_map_offset_t adjustment;
11851
11852                         original_end = tmp_entry->vme_end;
11853
11854                         /* map-align the end of the last copy entry... */
11855                         tmp_entry->vme_end =
11856                             vm_map_round_page(tmp_entry->vme_end,
11857                             VM_MAP_PAGE_MASK(src_map));
11858                         /* ... adjust for mis-aligned end of copy range */
11859                         adjustment =
11860                             (vm_map_round_page((copy->offset +
11861                             copy->size),
11862                             VM_MAP_PAGE_MASK(src_map)) -
11863                             vm_map_round_page((copy->offset +
11864                             copy->size),
11865                             PAGE_MASK));
11866                         if (adjustment) {
11867                                 assert(page_aligned(adjustment));
11868                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11869                                 tmp_entry->vme_end -= adjustment;
11870                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11871                         }
11872
11873                         /*
11874                          * Assert that the adjustments haven't exposed
11875                          * more than was originally copied...
11876                          */
11877                         assert(tmp_entry->vme_end <= original_end);
11878                         /*
11879                          * ... and that it did not adjust outside of a
11880                          * a single 16K page.
11881                          */
11882                         assert(vm_map_round_page(tmp_entry->vme_end,
11883                             VM_MAP_PAGE_MASK(src_map)) ==
11884                             vm_map_round_page(original_end,
11885                             VM_MAP_PAGE_MASK(src_map)));
11886                 }
11887         }
11888
11889         /* Fix-up start and end points in copy.  This is necessary */
11890         /* when the various entries in the copy object were picked */
11891         /* up from different sub-maps */
11892
11893         tmp_entry = vm_map_copy_first_entry(copy);
11894         copy_size = 0; /* compute actual size */
11895         while (tmp_entry != vm_map_copy_to_entry(copy)) {
11896                 assert(VM_MAP_PAGE_ALIGNED(
11897                             copy_addr + (tmp_entry->vme_end -
11898                             tmp_entry->vme_start),
11899                             VM_MAP_COPY_PAGE_MASK(copy)));
11900                 assert(VM_MAP_PAGE_ALIGNED(
11901                             copy_addr,
11902                             VM_MAP_COPY_PAGE_MASK(copy)));
11903
11904                 /*
11905                  * The copy_entries will be injected directly into the
11906                  * destination map and might not be "map aligned" there...
11907                  */
11908                 tmp_entry->map_aligned = FALSE;
11909
11910                 tmp_entry->vme_end = copy_addr +
11911                     (tmp_entry->vme_end - tmp_entry->vme_start);
11912                 tmp_entry->vme_start = copy_addr;
11913                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11914                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11915                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11916                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11917         }
11918
11919         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11920             copy_size < copy->size) {
11921                 /*
11922                  * The actual size of the VM map copy is smaller than what
11923                  * was requested by the caller.  This must be because some
11924                  * PAGE_SIZE-sized pages are missing at the end of the last
11925                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11926                  * The caller might not have been aware of those missing
11927                  * pages and might not want to be aware of it, which is
11928                  * fine as long as they don't try to access (and crash on)
11929                  * those missing pages.
11930                  * Let's adjust the size of the "copy", to avoid failing
11931                  * in vm_map_copyout() or vm_map_copy_overwrite().
11932                  */
11933                 assert(vm_map_round_page(copy_size,
11934                     VM_MAP_PAGE_MASK(src_map)) ==
11935                     vm_map_round_page(copy->size,
11936                     VM_MAP_PAGE_MASK(src_map)));
11937                 copy->size = copy_size;
11938         }
11939
11940         *copy_result = copy;
11941         return KERN_SUCCESS;
11942
11943 #undef  RETURN
11944 }
11945
11946 kern_return_t
11947 vm_map_copy_extract(
11948         vm_map_t                src_map,
11949         vm_map_address_t        src_addr,
11950         vm_map_size_t           len,
11951         vm_map_copy_t           *copy_result,   /* OUT */
11952         vm_prot_t               *cur_prot,      /* OUT */
11953         vm_prot_t               *max_prot)
11954 {
11955         vm_map_offset_t src_start, src_end;
11956         vm_map_copy_t   copy;
11957         kern_return_t   kr;
11958
11959         /*
11960          *      Check for copies of zero bytes.
11961          */
11962
11963         if (len == 0) {
11964                 *copy_result = VM_MAP_COPY_NULL;
11965                 return KERN_SUCCESS;
11966         }
11967
11968         /*
11969          *      Check that the end address doesn't overflow
11970          */
11971         src_end = src_addr + len;
11972         if (src_end < src_addr) {
11973                 return KERN_INVALID_ADDRESS;
11974         }
11975
11976         /*
11977          *      Compute (page aligned) start and end of region
11978          */
11979         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11980         src_end = vm_map_round_page(src_end, PAGE_MASK);
11981
11982         /*
11983          *      Allocate a header element for the list.
11984          *
11985          *      Use the start and end in the header to
11986          *      remember the endpoints prior to rounding.
11987          */
11988
11989         copy = vm_map_copy_allocate();
11990         copy->type = VM_MAP_COPY_ENTRY_LIST;
11991         copy->cpy_hdr.entries_pageable = TRUE;
11992
11993         vm_map_store_init(&copy->cpy_hdr);
11994
11995         copy->offset = 0;
11996         copy->size = len;
11997
11998         kr = vm_map_remap_extract(src_map,
11999             src_addr,
12000             len,
12001             FALSE,                       /* copy */
12002             &copy->cpy_hdr,
12003             cur_prot,
12004             max_prot,
12005             VM_INHERIT_SHARE,
12006             TRUE,                       /* pageable */
12007             FALSE,                       /* same_map */
12008             VM_MAP_KERNEL_FLAGS_NONE);
12009         if (kr != KERN_SUCCESS) {
12010                 vm_map_copy_discard(copy);
12011                 return kr;
12012         }
12013
12014         *copy_result = copy;
12015         return KERN_SUCCESS;
12016 }
12017
12018 /*
12019  *      vm_map_copyin_object:
12020  *
12021  *      Create a copy object from an object.
12022  *      Our caller donates an object reference.
12023  */
12024
12025 kern_return_t
12026 vm_map_copyin_object(
12027         vm_object_t             object,
12028         vm_object_offset_t      offset, /* offset of region in object */
12029         vm_object_size_t        size,   /* size of region in object */
12030         vm_map_copy_t   *copy_result)   /* OUT */
12031 {
12032         vm_map_copy_t   copy;           /* Resulting copy */
12033
12034         /*
12035          *      We drop the object into a special copy object
12036          *      that contains the object directly.
12037          */
12038
12039         copy = vm_map_copy_allocate();
12040         copy->type = VM_MAP_COPY_OBJECT;
12041         copy->cpy_object = object;
12042         copy->offset = offset;
12043         copy->size = size;
12044
12045         *copy_result = copy;
12046         return KERN_SUCCESS;
12047 }
12048
12049 static void
12050 vm_map_fork_share(
12051         vm_map_t        old_map,
12052         vm_map_entry_t  old_entry,
12053         vm_map_t        new_map)
12054 {
12055         vm_object_t     object;
12056         vm_map_entry_t  new_entry;
12057
12058         /*
12059          *      New sharing code.  New map entry
12060          *      references original object.  Internal
12061          *      objects use asynchronous copy algorithm for
12062          *      future copies.  First make sure we have
12063          *      the right object.  If we need a shadow,
12064          *      or someone else already has one, then
12065          *      make a new shadow and share it.
12066          */
12067
12068         object = VME_OBJECT(old_entry);
12069         if (old_entry->is_sub_map) {
12070                 assert(old_entry->wired_count == 0);
12071 #ifndef NO_NESTED_PMAP
12072                 if (old_entry->use_pmap) {
12073                         kern_return_t   result;
12074
12075                         result = pmap_nest(new_map->pmap,
12076                             (VME_SUBMAP(old_entry))->pmap,
12077                             (addr64_t)old_entry->vme_start,
12078                             (addr64_t)old_entry->vme_start,
12079                             (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12080                         if (result) {
12081                                 panic("vm_map_fork_share: pmap_nest failed!");
12082                         }
12083                 }
12084 #endif  /* NO_NESTED_PMAP */
12085         } else if (object == VM_OBJECT_NULL) {
12086                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12087                     old_entry->vme_start));
12088                 VME_OFFSET_SET(old_entry, 0);
12089                 VME_OBJECT_SET(old_entry, object);
12090                 old_entry->use_pmap = TRUE;
12091 //              assert(!old_entry->needs_copy);
12092         } else if (object->copy_strategy !=
12093             MEMORY_OBJECT_COPY_SYMMETRIC) {
12094                 /*
12095                  *      We are already using an asymmetric
12096                  *      copy, and therefore we already have
12097                  *      the right object.
12098                  */
12099
12100                 assert(!old_entry->needs_copy);
12101         } else if (old_entry->needs_copy ||       /* case 1 */
12102             object->shadowed ||                 /* case 2 */
12103             (!object->true_share &&             /* case 3 */
12104             !old_entry->is_shared &&
12105             (object->vo_size >
12106             (vm_map_size_t)(old_entry->vme_end -
12107             old_entry->vme_start)))) {
12108                 /*
12109                  *      We need to create a shadow.
12110                  *      There are three cases here.
12111                  *      In the first case, we need to
12112                  *      complete a deferred symmetrical
12113                  *      copy that we participated in.
12114                  *      In the second and third cases,
12115                  *      we need to create the shadow so
12116                  *      that changes that we make to the
12117                  *      object do not interfere with
12118                  *      any symmetrical copies which
12119                  *      have occured (case 2) or which
12120                  *      might occur (case 3).
12121                  *
12122                  *      The first case is when we had
12123                  *      deferred shadow object creation
12124                  *      via the entry->needs_copy mechanism.
12125                  *      This mechanism only works when
12126                  *      only one entry points to the source
12127                  *      object, and we are about to create
12128                  *      a second entry pointing to the
12129                  *      same object. The problem is that
12130                  *      there is no way of mapping from
12131                  *      an object to the entries pointing
12132                  *      to it. (Deferred shadow creation
12133                  *      works with one entry because occurs
12134                  *      at fault time, and we walk from the
12135                  *      entry to the object when handling
12136                  *      the fault.)
12137                  *
12138                  *      The second case is when the object
12139                  *      to be shared has already been copied
12140                  *      with a symmetric copy, but we point
12141                  *      directly to the object without
12142                  *      needs_copy set in our entry. (This
12143                  *      can happen because different ranges
12144                  *      of an object can be pointed to by
12145                  *      different entries. In particular,
12146                  *      a single entry pointing to an object
12147                  *      can be split by a call to vm_inherit,
12148                  *      which, combined with task_create, can
12149                  *      result in the different entries
12150                  *      having different needs_copy values.)
12151                  *      The shadowed flag in the object allows
12152                  *      us to detect this case. The problem
12153                  *      with this case is that if this object
12154                  *      has or will have shadows, then we
12155                  *      must not perform an asymmetric copy
12156                  *      of this object, since such a copy
12157                  *      allows the object to be changed, which
12158                  *      will break the previous symmetrical
12159                  *      copies (which rely upon the object
12160                  *      not changing). In a sense, the shadowed
12161                  *      flag says "don't change this object".
12162                  *      We fix this by creating a shadow
12163                  *      object for this object, and sharing
12164                  *      that. This works because we are free
12165                  *      to change the shadow object (and thus
12166                  *      to use an asymmetric copy strategy);
12167                  *      this is also semantically correct,
12168                  *      since this object is temporary, and
12169                  *      therefore a copy of the object is
12170                  *      as good as the object itself. (This
12171                  *      is not true for permanent objects,
12172                  *      since the pager needs to see changes,
12173                  *      which won't happen if the changes
12174                  *      are made to a copy.)
12175                  *
12176                  *      The third case is when the object
12177                  *      to be shared has parts sticking
12178                  *      outside of the entry we're working
12179                  *      with, and thus may in the future
12180                  *      be subject to a symmetrical copy.
12181                  *      (This is a preemptive version of
12182                  *      case 2.)
12183                  */
12184                 VME_OBJECT_SHADOW(old_entry,
12185                     (vm_map_size_t) (old_entry->vme_end -
12186                     old_entry->vme_start));
12187
12188                 /*
12189                  *      If we're making a shadow for other than
12190                  *      copy on write reasons, then we have
12191                  *      to remove write permission.
12192                  */
12193
12194                 if (!old_entry->needs_copy &&
12195                     (old_entry->protection & VM_PROT_WRITE)) {
12196                         vm_prot_t prot;
12197
12198                         assert(!pmap_has_prot_policy(old_entry->protection));
12199
12200                         prot = old_entry->protection & ~VM_PROT_WRITE;
12201
12202                         assert(!pmap_has_prot_policy(prot));
12203
12204                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12205                                 prot |= VM_PROT_EXECUTE;
12206                         }
12207
12208
12209                         if (old_map->mapped_in_other_pmaps) {
12210                                 vm_object_pmap_protect(
12211                                         VME_OBJECT(old_entry),
12212                                         VME_OFFSET(old_entry),
12213                                         (old_entry->vme_end -
12214                                         old_entry->vme_start),
12215                                         PMAP_NULL,
12216                                         old_entry->vme_start,
12217                                         prot);
12218                         } else {
12219                                 pmap_protect(old_map->pmap,
12220                                     old_entry->vme_start,
12221                                     old_entry->vme_end,
12222                                     prot);
12223                         }
12224                 }
12225
12226                 old_entry->needs_copy = FALSE;
12227                 object = VME_OBJECT(old_entry);
12228         }
12229
12230
12231         /*
12232          *      If object was using a symmetric copy strategy,
12233          *      change its copy strategy to the default
12234          *      asymmetric copy strategy, which is copy_delay
12235          *      in the non-norma case and copy_call in the
12236          *      norma case. Bump the reference count for the
12237          *      new entry.
12238          */
12239
12240         if (old_entry->is_sub_map) {
12241                 vm_map_lock(VME_SUBMAP(old_entry));
12242                 vm_map_reference(VME_SUBMAP(old_entry));
12243                 vm_map_unlock(VME_SUBMAP(old_entry));
12244         } else {
12245                 vm_object_lock(object);
12246                 vm_object_reference_locked(object);
12247                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12248                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12249                 }
12250                 vm_object_unlock(object);
12251         }
12252
12253         /*
12254          *      Clone the entry, using object ref from above.
12255          *      Mark both entries as shared.
12256          */
12257
12258         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12259                                                           * map or descendants */
12260         vm_map_entry_copy(new_entry, old_entry);
12261         old_entry->is_shared = TRUE;
12262         new_entry->is_shared = TRUE;
12263
12264         /*
12265          * We're dealing with a shared mapping, so the resulting mapping
12266          * should inherit some of the original mapping's accounting settings.
12267          * "iokit_acct" should have been cleared in vm_map_entry_copy().
12268          * "use_pmap" should stay the same as before (if it hasn't been reset
12269          * to TRUE when we cleared "iokit_acct").
12270          */
12271         assert(!new_entry->iokit_acct);
12272
12273         /*
12274          *      If old entry's inheritence is VM_INHERIT_NONE,
12275          *      the new entry is for corpse fork, remove the
12276          *      write permission from the new entry.
12277          */
12278         if (old_entry->inheritance == VM_INHERIT_NONE) {
12279                 new_entry->protection &= ~VM_PROT_WRITE;
12280                 new_entry->max_protection &= ~VM_PROT_WRITE;
12281         }
12282
12283         /*
12284          *      Insert the entry into the new map -- we
12285          *      know we're inserting at the end of the new
12286          *      map.
12287          */
12288
12289         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12290             VM_MAP_KERNEL_FLAGS_NONE);
12291
12292         /*
12293          *      Update the physical map
12294          */
12295
12296         if (old_entry->is_sub_map) {
12297                 /* Bill Angell pmap support goes here */
12298         } else {
12299                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12300                     old_entry->vme_end - old_entry->vme_start,
12301                     old_entry->vme_start);
12302         }
12303 }
12304
12305 static boolean_t
12306 vm_map_fork_copy(
12307         vm_map_t        old_map,
12308         vm_map_entry_t  *old_entry_p,
12309         vm_map_t        new_map,
12310         int             vm_map_copyin_flags)
12311 {
12312         vm_map_entry_t old_entry = *old_entry_p;
12313         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12314         vm_map_offset_t start = old_entry->vme_start;
12315         vm_map_copy_t copy;
12316         vm_map_entry_t last = vm_map_last_entry(new_map);
12317
12318         vm_map_unlock(old_map);
12319         /*
12320          *      Use maxprot version of copyin because we
12321          *      care about whether this memory can ever
12322          *      be accessed, not just whether it's accessible
12323          *      right now.
12324          */
12325         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12326         if (vm_map_copyin_internal(old_map, start, entry_size,
12327             vm_map_copyin_flags, &copy)
12328             != KERN_SUCCESS) {
12329                 /*
12330                  *      The map might have changed while it
12331                  *      was unlocked, check it again.  Skip
12332                  *      any blank space or permanently
12333                  *      unreadable region.
12334                  */
12335                 vm_map_lock(old_map);
12336                 if (!vm_map_lookup_entry(old_map, start, &last) ||
12337                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12338                         last = last->vme_next;
12339                 }
12340                 *old_entry_p = last;
12341
12342                 /*
12343                  * XXX  For some error returns, want to
12344                  * XXX  skip to the next element.  Note
12345                  *      that INVALID_ADDRESS and
12346                  *      PROTECTION_FAILURE are handled above.
12347                  */
12348
12349                 return FALSE;
12350         }
12351
12352         /*
12353          *      Insert the copy into the new map
12354          */
12355
12356         vm_map_copy_insert(new_map, last, copy);
12357
12358         /*
12359          *      Pick up the traversal at the end of
12360          *      the copied region.
12361          */
12362
12363         vm_map_lock(old_map);
12364         start += entry_size;
12365         if (!vm_map_lookup_entry(old_map, start, &last)) {
12366                 last = last->vme_next;
12367         } else {
12368                 if (last->vme_start == start) {
12369                         /*
12370                          * No need to clip here and we don't
12371                          * want to cause any unnecessary
12372                          * unnesting...
12373                          */
12374                 } else {
12375                         vm_map_clip_start(old_map, last, start);
12376                 }
12377         }
12378         *old_entry_p = last;
12379
12380         return TRUE;
12381 }
12382
12383 /*
12384  *      vm_map_fork:
12385  *
12386  *      Create and return a new map based on the old
12387  *      map, according to the inheritance values on the
12388  *      regions in that map and the options.
12389  *
12390  *      The source map must not be locked.
12391  */
12392 vm_map_t
12393 vm_map_fork(
12394         ledger_t        ledger,
12395         vm_map_t        old_map,
12396         int             options)
12397 {
12398         pmap_t          new_pmap;
12399         vm_map_t        new_map;
12400         vm_map_entry_t  old_entry;
12401         vm_map_size_t   new_size = 0, entry_size;
12402         vm_map_entry_t  new_entry;
12403         boolean_t       src_needs_copy;
12404         boolean_t       new_entry_needs_copy;
12405         boolean_t       pmap_is64bit;
12406         int             vm_map_copyin_flags;
12407         vm_inherit_t    old_entry_inheritance;
12408         int             map_create_options;
12409         kern_return_t   footprint_collect_kr;
12410
12411         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12412             VM_MAP_FORK_PRESERVE_PURGEABLE |
12413             VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12414                 /* unsupported option */
12415                 return VM_MAP_NULL;
12416         }
12417
12418         pmap_is64bit =
12419 #if defined(__i386__) || defined(__x86_64__)
12420             old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12421 #elif defined(__arm64__)
12422             old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12423 #elif defined(__arm__)
12424             FALSE;
12425 #else
12426 #error Unknown architecture.
12427 #endif
12428
12429         unsigned int pmap_flags = 0;
12430         pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12431 #if defined(HAS_APPLE_PAC)
12432         pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12433 #endif
12434         new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12435
12436         vm_map_reference_swap(old_map);
12437         vm_map_lock(old_map);
12438
12439         map_create_options = 0;
12440         if (old_map->hdr.entries_pageable) {
12441                 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12442         }
12443         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12444                 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12445                 footprint_collect_kr = KERN_SUCCESS;
12446         }
12447         new_map = vm_map_create_options(new_pmap,
12448             old_map->min_offset,
12449             old_map->max_offset,
12450             map_create_options);
12451         vm_map_lock(new_map);
12452         vm_commit_pagezero_status(new_map);
12453         /* inherit the parent map's page size */
12454         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12455         for (
12456                 old_entry = vm_map_first_entry(old_map);
12457                 old_entry != vm_map_to_entry(old_map);
12458                 ) {
12459                 entry_size = old_entry->vme_end - old_entry->vme_start;
12460
12461                 old_entry_inheritance = old_entry->inheritance;
12462                 /*
12463                  * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12464                  * share VM_INHERIT_NONE entries that are not backed by a
12465                  * device pager.
12466                  */
12467                 if (old_entry_inheritance == VM_INHERIT_NONE &&
12468                     (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12469                     !(!old_entry->is_sub_map &&
12470                     VME_OBJECT(old_entry) != NULL &&
12471                     VME_OBJECT(old_entry)->pager != NULL &&
12472                     is_device_pager_ops(
12473                             VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12474                         old_entry_inheritance = VM_INHERIT_SHARE;
12475                 }
12476
12477                 if (old_entry_inheritance != VM_INHERIT_NONE &&
12478                     (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12479                     footprint_collect_kr == KERN_SUCCESS) {
12480                         /*
12481                          * The corpse won't have old_map->pmap to query
12482                          * footprint information, so collect that data now
12483                          * and store it in new_map->vmmap_corpse_footprint
12484                          * for later autopsy.
12485                          */
12486                         footprint_collect_kr =
12487                             vm_map_corpse_footprint_collect(old_map,
12488                             old_entry,
12489                             new_map);
12490                 }
12491
12492                 switch (old_entry_inheritance) {
12493                 case VM_INHERIT_NONE:
12494                         break;
12495
12496                 case VM_INHERIT_SHARE:
12497                         vm_map_fork_share(old_map, old_entry, new_map);
12498                         new_size += entry_size;
12499                         break;
12500
12501                 case VM_INHERIT_COPY:
12502
12503                         /*
12504                          *      Inline the copy_quickly case;
12505                          *      upon failure, fall back on call
12506                          *      to vm_map_fork_copy.
12507                          */
12508
12509                         if (old_entry->is_sub_map) {
12510                                 break;
12511                         }
12512                         if ((old_entry->wired_count != 0) ||
12513                             ((VME_OBJECT(old_entry) != NULL) &&
12514                             (VME_OBJECT(old_entry)->true_share))) {
12515                                 goto slow_vm_map_fork_copy;
12516                         }
12517
12518                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12519                         vm_map_entry_copy(new_entry, old_entry);
12520                         if (new_entry->is_sub_map) {
12521                                 /* clear address space specifics */
12522                                 new_entry->use_pmap = FALSE;
12523                         } else {
12524                                 /*
12525                                  * We're dealing with a copy-on-write operation,
12526                                  * so the resulting mapping should not inherit
12527                                  * the original mapping's accounting settings.
12528                                  * "iokit_acct" should have been cleared in
12529                                  * vm_map_entry_copy().
12530                                  * "use_pmap" should be reset to its default
12531                                  * (TRUE) so that the new mapping gets
12532                                  * accounted for in the task's memory footprint.
12533                                  */
12534                                 assert(!new_entry->iokit_acct);
12535                                 new_entry->use_pmap = TRUE;
12536                         }
12537
12538                         if (!vm_object_copy_quickly(
12539                                     VME_OBJECT_PTR(new_entry),
12540                                     VME_OFFSET(old_entry),
12541                                     (old_entry->vme_end -
12542                                     old_entry->vme_start),
12543                                     &src_needs_copy,
12544                                     &new_entry_needs_copy)) {
12545                                 vm_map_entry_dispose(new_map, new_entry);
12546                                 goto slow_vm_map_fork_copy;
12547                         }
12548
12549                         /*
12550                          *      Handle copy-on-write obligations
12551                          */
12552
12553                         if (src_needs_copy && !old_entry->needs_copy) {
12554                                 vm_prot_t prot;
12555
12556                                 assert(!pmap_has_prot_policy(old_entry->protection));
12557
12558                                 prot = old_entry->protection & ~VM_PROT_WRITE;
12559
12560                                 if (override_nx(old_map, VME_ALIAS(old_entry))
12561                                     && prot) {
12562                                         prot |= VM_PROT_EXECUTE;
12563                                 }
12564
12565                                 assert(!pmap_has_prot_policy(prot));
12566
12567                                 vm_object_pmap_protect(
12568                                         VME_OBJECT(old_entry),
12569                                         VME_OFFSET(old_entry),
12570                                         (old_entry->vme_end -
12571                                         old_entry->vme_start),
12572                                         ((old_entry->is_shared
12573                                         || old_map->mapped_in_other_pmaps)
12574                                         ? PMAP_NULL :
12575                                         old_map->pmap),
12576                                         old_entry->vme_start,
12577                                         prot);
12578
12579                                 assert(old_entry->wired_count == 0);
12580                                 old_entry->needs_copy = TRUE;
12581                         }
12582                         new_entry->needs_copy = new_entry_needs_copy;
12583
12584                         /*
12585                          *      Insert the entry at the end
12586                          *      of the map.
12587                          */
12588
12589                         vm_map_store_entry_link(new_map,
12590                             vm_map_last_entry(new_map),
12591                             new_entry,
12592                             VM_MAP_KERNEL_FLAGS_NONE);
12593                         new_size += entry_size;
12594                         break;
12595
12596 slow_vm_map_fork_copy:
12597                         vm_map_copyin_flags = 0;
12598                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12599                                 vm_map_copyin_flags |=
12600                                     VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12601                         }
12602                         if (vm_map_fork_copy(old_map,
12603                             &old_entry,
12604                             new_map,
12605                             vm_map_copyin_flags)) {
12606                                 new_size += entry_size;
12607                         }
12608                         continue;
12609                 }
12610                 old_entry = old_entry->vme_next;
12611         }
12612
12613 #if defined(__arm64__)
12614         pmap_insert_sharedpage(new_map->pmap);
12615 #endif
12616
12617         new_map->size = new_size;
12618
12619         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12620                 vm_map_corpse_footprint_collect_done(new_map);
12621         }
12622
12623         vm_map_unlock(new_map);
12624         vm_map_unlock(old_map);
12625         vm_map_deallocate(old_map);
12626
12627         return new_map;
12628 }
12629
12630 /*
12631  * vm_map_exec:
12632  *
12633  *      Setup the "new_map" with the proper execution environment according
12634  *      to the type of executable (platform, 64bit, chroot environment).
12635  *      Map the comm page and shared region, etc...
12636  */
12637 kern_return_t
12638 vm_map_exec(
12639         vm_map_t        new_map,
12640         task_t          task,
12641         boolean_t       is64bit,
12642         void            *fsroot,
12643         cpu_type_t      cpu,
12644         cpu_subtype_t   cpu_subtype)
12645 {
12646         SHARED_REGION_TRACE_DEBUG(
12647                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12648                 (void *)VM_KERNEL_ADDRPERM(current_task()),
12649                 (void *)VM_KERNEL_ADDRPERM(new_map),
12650                 (void *)VM_KERNEL_ADDRPERM(task),
12651                 (void *)VM_KERNEL_ADDRPERM(fsroot),
12652                 cpu,
12653                 cpu_subtype));
12654         (void) vm_commpage_enter(new_map, task, is64bit);
12655         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12656         SHARED_REGION_TRACE_DEBUG(
12657                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12658                 (void *)VM_KERNEL_ADDRPERM(current_task()),
12659                 (void *)VM_KERNEL_ADDRPERM(new_map),
12660                 (void *)VM_KERNEL_ADDRPERM(task),
12661                 (void *)VM_KERNEL_ADDRPERM(fsroot),
12662                 cpu,
12663                 cpu_subtype));
12664         return KERN_SUCCESS;
12665 }
12666
12667 /*
12668  *      vm_map_lookup_locked:
12669  *
12670  *      Finds the VM object, offset, and
12671  *      protection for a given virtual address in the
12672  *      specified map, assuming a page fault of the
12673  *      type specified.
12674  *
12675  *      Returns the (object, offset, protection) for
12676  *      this address, whether it is wired down, and whether
12677  *      this map has the only reference to the data in question.
12678  *      In order to later verify this lookup, a "version"
12679  *      is returned.
12680  *
12681  *      The map MUST be locked by the caller and WILL be
12682  *      locked on exit.  In order to guarantee the
12683  *      existence of the returned object, it is returned
12684  *      locked.
12685  *
12686  *      If a lookup is requested with "write protection"
12687  *      specified, the map may be changed to perform virtual
12688  *      copying operations, although the data referenced will
12689  *      remain the same.
12690  */
12691 kern_return_t
12692 vm_map_lookup_locked(
12693         vm_map_t                *var_map,       /* IN/OUT */
12694         vm_map_offset_t         vaddr,
12695         vm_prot_t               fault_type,
12696         int                     object_lock_type,
12697         vm_map_version_t        *out_version,   /* OUT */
12698         vm_object_t             *object,        /* OUT */
12699         vm_object_offset_t      *offset,        /* OUT */
12700         vm_prot_t               *out_prot,      /* OUT */
12701         boolean_t               *wired,         /* OUT */
12702         vm_object_fault_info_t  fault_info,     /* OUT */
12703         vm_map_t                *real_map)
12704 {
12705         vm_map_entry_t                  entry;
12706         vm_map_t                        map = *var_map;
12707         vm_map_t                        old_map = *var_map;
12708         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
12709         vm_map_offset_t                 cow_parent_vaddr = 0;
12710         vm_map_offset_t                 old_start = 0;
12711         vm_map_offset_t                 old_end = 0;
12712         vm_prot_t                       prot;
12713         boolean_t                       mask_protections;
12714         boolean_t                       force_copy;
12715         vm_prot_t                       original_fault_type;
12716
12717         /*
12718          * VM_PROT_MASK means that the caller wants us to use "fault_type"
12719          * as a mask against the mapping's actual protections, not as an
12720          * absolute value.
12721          */
12722         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12723         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12724         fault_type &= VM_PROT_ALL;
12725         original_fault_type = fault_type;
12726
12727         *real_map = map;
12728
12729 RetryLookup:
12730         fault_type = original_fault_type;
12731
12732         /*
12733          *      If the map has an interesting hint, try it before calling
12734          *      full blown lookup routine.
12735          */
12736         entry = map->hint;
12737
12738         if ((entry == vm_map_to_entry(map)) ||
12739             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12740                 vm_map_entry_t  tmp_entry;
12741
12742                 /*
12743                  *      Entry was either not a valid hint, or the vaddr
12744                  *      was not contained in the entry, so do a full lookup.
12745                  */
12746                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12747                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12748                                 vm_map_unlock(cow_sub_map_parent);
12749                         }
12750                         if ((*real_map != map)
12751                             && (*real_map != cow_sub_map_parent)) {
12752                                 vm_map_unlock(*real_map);
12753                         }
12754                         return KERN_INVALID_ADDRESS;
12755                 }
12756
12757                 entry = tmp_entry;
12758         }
12759         if (map == old_map) {
12760                 old_start = entry->vme_start;
12761                 old_end = entry->vme_end;
12762         }
12763
12764         /*
12765          *      Handle submaps.  Drop lock on upper map, submap is
12766          *      returned locked.
12767          */
12768
12769 submap_recurse:
12770         if (entry->is_sub_map) {
12771                 vm_map_offset_t         local_vaddr;
12772                 vm_map_offset_t         end_delta;
12773                 vm_map_offset_t         start_delta;
12774                 vm_map_entry_t          submap_entry;
12775                 vm_prot_t               subentry_protection;
12776                 vm_prot_t               subentry_max_protection;
12777                 boolean_t               subentry_no_copy_on_read;
12778                 boolean_t               mapped_needs_copy = FALSE;
12779
12780                 local_vaddr = vaddr;
12781
12782                 if ((entry->use_pmap &&
12783                     !((fault_type & VM_PROT_WRITE) ||
12784                     force_copy))) {
12785                         /* if real_map equals map we unlock below */
12786                         if ((*real_map != map) &&
12787                             (*real_map != cow_sub_map_parent)) {
12788                                 vm_map_unlock(*real_map);
12789                         }
12790                         *real_map = VME_SUBMAP(entry);
12791                 }
12792
12793                 if (entry->needs_copy &&
12794                     ((fault_type & VM_PROT_WRITE) ||
12795                     force_copy)) {
12796                         if (!mapped_needs_copy) {
12797                                 if (vm_map_lock_read_to_write(map)) {
12798                                         vm_map_lock_read(map);
12799                                         *real_map = map;
12800                                         goto RetryLookup;
12801                                 }
12802                                 vm_map_lock_read(VME_SUBMAP(entry));
12803                                 *var_map = VME_SUBMAP(entry);
12804                                 cow_sub_map_parent = map;
12805                                 /* reset base to map before cow object */
12806                                 /* this is the map which will accept   */
12807                                 /* the new cow object */
12808                                 old_start = entry->vme_start;
12809                                 old_end = entry->vme_end;
12810                                 cow_parent_vaddr = vaddr;
12811                                 mapped_needs_copy = TRUE;
12812                         } else {
12813                                 vm_map_lock_read(VME_SUBMAP(entry));
12814                                 *var_map = VME_SUBMAP(entry);
12815                                 if ((cow_sub_map_parent != map) &&
12816                                     (*real_map != map)) {
12817                                         vm_map_unlock(map);
12818                                 }
12819                         }
12820                 } else {
12821                         vm_map_lock_read(VME_SUBMAP(entry));
12822                         *var_map = VME_SUBMAP(entry);
12823                         /* leave map locked if it is a target */
12824                         /* cow sub_map above otherwise, just  */
12825                         /* follow the maps down to the object */
12826                         /* here we unlock knowing we are not  */
12827                         /* revisiting the map.  */
12828                         if ((*real_map != map) && (map != cow_sub_map_parent)) {
12829                                 vm_map_unlock_read(map);
12830                         }
12831                 }
12832
12833                 map = *var_map;
12834
12835                 /* calculate the offset in the submap for vaddr */
12836                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12837
12838 RetrySubMap:
12839                 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12840                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12841                                 vm_map_unlock(cow_sub_map_parent);
12842                         }
12843                         if ((*real_map != map)
12844                             && (*real_map != cow_sub_map_parent)) {
12845                                 vm_map_unlock(*real_map);
12846                         }
12847                         *real_map = map;
12848                         return KERN_INVALID_ADDRESS;
12849                 }
12850
12851                 /* find the attenuated shadow of the underlying object */
12852                 /* on our target map */
12853
12854                 /* in english the submap object may extend beyond the     */
12855                 /* region mapped by the entry or, may only fill a portion */
12856                 /* of it.  For our purposes, we only care if the object   */
12857                 /* doesn't fill.  In this case the area which will        */
12858                 /* ultimately be clipped in the top map will only need    */
12859                 /* to be as big as the portion of the underlying entry    */
12860                 /* which is mapped */
12861                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12862                     submap_entry->vme_start - VME_OFFSET(entry) : 0;
12863
12864                 end_delta =
12865                     (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12866                     submap_entry->vme_end ?
12867                     0 : (VME_OFFSET(entry) +
12868                     (old_end - old_start))
12869                     - submap_entry->vme_end;
12870
12871                 old_start += start_delta;
12872                 old_end -= end_delta;
12873
12874                 if (submap_entry->is_sub_map) {
12875                         entry = submap_entry;
12876                         vaddr = local_vaddr;
12877                         goto submap_recurse;
12878                 }
12879
12880                 if (((fault_type & VM_PROT_WRITE) ||
12881                     force_copy)
12882                     && cow_sub_map_parent) {
12883                         vm_object_t     sub_object, copy_object;
12884                         vm_object_offset_t copy_offset;
12885                         vm_map_offset_t local_start;
12886                         vm_map_offset_t local_end;
12887                         boolean_t               copied_slowly = FALSE;
12888
12889                         if (vm_map_lock_read_to_write(map)) {
12890                                 vm_map_lock_read(map);
12891                                 old_start -= start_delta;
12892                                 old_end += end_delta;
12893                                 goto RetrySubMap;
12894                         }
12895
12896
12897                         sub_object = VME_OBJECT(submap_entry);
12898                         if (sub_object == VM_OBJECT_NULL) {
12899                                 sub_object =
12900                                     vm_object_allocate(
12901                                         (vm_map_size_t)
12902                                         (submap_entry->vme_end -
12903                                         submap_entry->vme_start));
12904                                 VME_OBJECT_SET(submap_entry, sub_object);
12905                                 VME_OFFSET_SET(submap_entry, 0);
12906                                 assert(!submap_entry->is_sub_map);
12907                                 assert(submap_entry->use_pmap);
12908                         }
12909                         local_start =  local_vaddr -
12910                             (cow_parent_vaddr - old_start);
12911                         local_end = local_vaddr +
12912                             (old_end - cow_parent_vaddr);
12913                         vm_map_clip_start(map, submap_entry, local_start);
12914                         vm_map_clip_end(map, submap_entry, local_end);
12915                         if (submap_entry->is_sub_map) {
12916                                 /* unnesting was done when clipping */
12917                                 assert(!submap_entry->use_pmap);
12918                         }
12919
12920                         /* This is the COW case, lets connect */
12921                         /* an entry in our space to the underlying */
12922                         /* object in the submap, bypassing the  */
12923                         /* submap. */
12924
12925
12926                         if (submap_entry->wired_count != 0 ||
12927                             (sub_object->copy_strategy ==
12928                             MEMORY_OBJECT_COPY_NONE)) {
12929                                 vm_object_lock(sub_object);
12930                                 vm_object_copy_slowly(sub_object,
12931                                     VME_OFFSET(submap_entry),
12932                                     (submap_entry->vme_end -
12933                                     submap_entry->vme_start),
12934                                     FALSE,
12935                                     &copy_object);
12936                                 copied_slowly = TRUE;
12937                         } else {
12938                                 /* set up shadow object */
12939                                 copy_object = sub_object;
12940                                 vm_object_lock(sub_object);
12941                                 vm_object_reference_locked(sub_object);
12942                                 sub_object->shadowed = TRUE;
12943                                 vm_object_unlock(sub_object);
12944
12945                                 assert(submap_entry->wired_count == 0);
12946                                 submap_entry->needs_copy = TRUE;
12947
12948                                 prot = submap_entry->protection;
12949                                 assert(!pmap_has_prot_policy(prot));
12950                                 prot = prot & ~VM_PROT_WRITE;
12951                                 assert(!pmap_has_prot_policy(prot));
12952
12953                                 if (override_nx(old_map,
12954                                     VME_ALIAS(submap_entry))
12955                                     && prot) {
12956                                         prot |= VM_PROT_EXECUTE;
12957                                 }
12958
12959                                 vm_object_pmap_protect(
12960                                         sub_object,
12961                                         VME_OFFSET(submap_entry),
12962                                         submap_entry->vme_end -
12963                                         submap_entry->vme_start,
12964                                         (submap_entry->is_shared
12965                                         || map->mapped_in_other_pmaps) ?
12966                                         PMAP_NULL : map->pmap,
12967                                         submap_entry->vme_start,
12968                                         prot);
12969                         }
12970
12971                         /*
12972                          * Adjust the fault offset to the submap entry.
12973                          */
12974                         copy_offset = (local_vaddr -
12975                             submap_entry->vme_start +
12976                             VME_OFFSET(submap_entry));
12977
12978                         /* This works diffently than the   */
12979                         /* normal submap case. We go back  */
12980                         /* to the parent of the cow map and*/
12981                         /* clip out the target portion of  */
12982                         /* the sub_map, substituting the   */
12983                         /* new copy object,                */
12984
12985                         subentry_protection = submap_entry->protection;
12986                         subentry_max_protection = submap_entry->max_protection;
12987                         subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
12988                         vm_map_unlock(map);
12989                         submap_entry = NULL; /* not valid after map unlock */
12990
12991                         local_start = old_start;
12992                         local_end = old_end;
12993                         map = cow_sub_map_parent;
12994                         *var_map = cow_sub_map_parent;
12995                         vaddr = cow_parent_vaddr;
12996                         cow_sub_map_parent = NULL;
12997
12998                         if (!vm_map_lookup_entry(map,
12999                             vaddr, &entry)) {
13000                                 vm_object_deallocate(
13001                                         copy_object);
13002                                 vm_map_lock_write_to_read(map);
13003                                 return KERN_INVALID_ADDRESS;
13004                         }
13005
13006                         /* clip out the portion of space */
13007                         /* mapped by the sub map which   */
13008                         /* corresponds to the underlying */
13009                         /* object */
13010
13011                         /*
13012                          * Clip (and unnest) the smallest nested chunk
13013                          * possible around the faulting address...
13014                          */
13015                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
13016                         local_end = local_start + pmap_nesting_size_min;
13017                         /*
13018                          * ... but don't go beyond the "old_start" to "old_end"
13019                          * range, to avoid spanning over another VM region
13020                          * with a possibly different VM object and/or offset.
13021                          */
13022                         if (local_start < old_start) {
13023                                 local_start = old_start;
13024                         }
13025                         if (local_end > old_end) {
13026                                 local_end = old_end;
13027                         }
13028                         /*
13029                          * Adjust copy_offset to the start of the range.
13030                          */
13031                         copy_offset -= (vaddr - local_start);
13032
13033                         vm_map_clip_start(map, entry, local_start);
13034                         vm_map_clip_end(map, entry, local_end);
13035                         if (entry->is_sub_map) {
13036                                 /* unnesting was done when clipping */
13037                                 assert(!entry->use_pmap);
13038                         }
13039
13040                         /* substitute copy object for */
13041                         /* shared map entry           */
13042                         vm_map_deallocate(VME_SUBMAP(entry));
13043                         assert(!entry->iokit_acct);
13044                         entry->is_sub_map = FALSE;
13045                         entry->use_pmap = TRUE;
13046                         VME_OBJECT_SET(entry, copy_object);
13047
13048                         /* propagate the submap entry's protections */
13049                         if (entry->protection != VM_PROT_READ) {
13050                                 /*
13051                                  * Someone has already altered the top entry's
13052                                  * protections via vm_protect(VM_PROT_COPY).
13053                                  * Respect these new values and ignore the
13054                                  * submap entry's protections.
13055                                  */
13056                         } else {
13057                                 /*
13058                                  * Regular copy-on-write: propagate the submap
13059                                  * entry's protections to the top map entry.
13060                                  */
13061                                 entry->protection |= subentry_protection;
13062                         }
13063                         entry->max_protection |= subentry_max_protection;
13064                         /* propagate no_copy_on_read */
13065                         entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13066
13067                         if ((entry->protection & VM_PROT_WRITE) &&
13068                             (entry->protection & VM_PROT_EXECUTE) &&
13069 #if !CONFIG_EMBEDDED
13070                             map != kernel_map &&
13071                             cs_process_enforcement(NULL) &&
13072 #endif /* !CONFIG_EMBEDDED */
13073                             !(entry->used_for_jit)) {
13074                                 DTRACE_VM3(cs_wx,
13075                                     uint64_t, (uint64_t)entry->vme_start,
13076                                     uint64_t, (uint64_t)entry->vme_end,
13077                                     vm_prot_t, entry->protection);
13078                                 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13079                                     proc_selfpid(),
13080                                     (current_task()->bsd_info
13081                                     ? proc_name_address(current_task()->bsd_info)
13082                                     : "?"),
13083                                     __FUNCTION__);
13084                                 entry->protection &= ~VM_PROT_EXECUTE;
13085                         }
13086
13087                         if (copied_slowly) {
13088                                 VME_OFFSET_SET(entry, local_start - old_start);
13089                                 entry->needs_copy = FALSE;
13090                                 entry->is_shared = FALSE;
13091                         } else {
13092                                 VME_OFFSET_SET(entry, copy_offset);
13093                                 assert(entry->wired_count == 0);
13094                                 entry->needs_copy = TRUE;
13095                                 if (entry->inheritance == VM_INHERIT_SHARE) {
13096                                         entry->inheritance = VM_INHERIT_COPY;
13097                                 }
13098                                 if (map != old_map) {
13099                                         entry->is_shared = TRUE;
13100                                 }
13101                         }
13102                         if (entry->inheritance == VM_INHERIT_SHARE) {
13103                                 entry->inheritance = VM_INHERIT_COPY;
13104                         }
13105
13106                         vm_map_lock_write_to_read(map);
13107                 } else {
13108                         if ((cow_sub_map_parent)
13109                             && (cow_sub_map_parent != *real_map)
13110                             && (cow_sub_map_parent != map)) {
13111                                 vm_map_unlock(cow_sub_map_parent);
13112                         }
13113                         entry = submap_entry;
13114                         vaddr = local_vaddr;
13115                 }
13116         }
13117
13118         /*
13119          *      Check whether this task is allowed to have
13120          *      this page.
13121          */
13122
13123         prot = entry->protection;
13124
13125         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13126                 /*
13127                  * HACK -- if not a stack, then allow execution
13128                  */
13129                 prot |= VM_PROT_EXECUTE;
13130         }
13131
13132         if (mask_protections) {
13133                 fault_type &= prot;
13134                 if (fault_type == VM_PROT_NONE) {
13135                         goto protection_failure;
13136                 }
13137         }
13138         if (((fault_type & prot) != fault_type)
13139 #if __arm64__
13140             /* prefetch abort in execute-only page */
13141             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13142 #endif
13143             ) {
13144 protection_failure:
13145                 if (*real_map != map) {
13146                         vm_map_unlock(*real_map);
13147                 }
13148                 *real_map = map;
13149
13150                 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13151                         log_stack_execution_failure((addr64_t)vaddr, prot);
13152                 }
13153
13154                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13155                 return KERN_PROTECTION_FAILURE;
13156         }
13157
13158         /*
13159          *      If this page is not pageable, we have to get
13160          *      it for all possible accesses.
13161          */
13162
13163         *wired = (entry->wired_count != 0);
13164         if (*wired) {
13165                 fault_type = prot;
13166         }
13167
13168         /*
13169          *      If the entry was copy-on-write, we either ...
13170          */
13171
13172         if (entry->needs_copy) {
13173                 /*
13174                  *      If we want to write the page, we may as well
13175                  *      handle that now since we've got the map locked.
13176                  *
13177                  *      If we don't need to write the page, we just
13178                  *      demote the permissions allowed.
13179                  */
13180
13181                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13182                         /*
13183                          *      Make a new object, and place it in the
13184                          *      object chain.  Note that no new references
13185                          *      have appeared -- one just moved from the
13186                          *      map to the new object.
13187                          */
13188
13189                         if (vm_map_lock_read_to_write(map)) {
13190                                 vm_map_lock_read(map);
13191                                 goto RetryLookup;
13192                         }
13193
13194                         if (VME_OBJECT(entry)->shadowed == FALSE) {
13195                                 vm_object_lock(VME_OBJECT(entry));
13196                                 VME_OBJECT(entry)->shadowed = TRUE;
13197                                 vm_object_unlock(VME_OBJECT(entry));
13198                         }
13199                         VME_OBJECT_SHADOW(entry,
13200                             (vm_map_size_t) (entry->vme_end -
13201                             entry->vme_start));
13202                         entry->needs_copy = FALSE;
13203
13204                         vm_map_lock_write_to_read(map);
13205                 }
13206                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13207                         /*
13208                          *      We're attempting to read a copy-on-write
13209                          *      page -- don't allow writes.
13210                          */
13211
13212                         prot &= (~VM_PROT_WRITE);
13213                 }
13214         }
13215
13216         /*
13217          *      Create an object if necessary.
13218          */
13219         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13220                 if (vm_map_lock_read_to_write(map)) {
13221                         vm_map_lock_read(map);
13222                         goto RetryLookup;
13223                 }
13224
13225                 VME_OBJECT_SET(entry,
13226                     vm_object_allocate(
13227                             (vm_map_size_t)(entry->vme_end -
13228                             entry->vme_start)));
13229                 VME_OFFSET_SET(entry, 0);
13230                 assert(entry->use_pmap);
13231                 vm_map_lock_write_to_read(map);
13232         }
13233
13234         /*
13235          *      Return the object/offset from this entry.  If the entry
13236          *      was copy-on-write or empty, it has been fixed up.  Also
13237          *      return the protection.
13238          */
13239
13240         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13241         *object = VME_OBJECT(entry);
13242         *out_prot = prot;
13243         KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
13244
13245         if (fault_info) {
13246                 fault_info->interruptible = THREAD_UNINT; /* for now... */
13247                 /* ... the caller will change "interruptible" if needed */
13248                 fault_info->cluster_size = 0;
13249                 fault_info->user_tag = VME_ALIAS(entry);
13250                 fault_info->pmap_options = 0;
13251                 if (entry->iokit_acct ||
13252                     (!entry->is_sub_map && !entry->use_pmap)) {
13253                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13254                 }
13255                 fault_info->behavior = entry->behavior;
13256                 fault_info->lo_offset = VME_OFFSET(entry);
13257                 fault_info->hi_offset =
13258                     (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13259                 fault_info->no_cache  = entry->no_cache;
13260                 fault_info->stealth = FALSE;
13261                 fault_info->io_sync = FALSE;
13262                 if (entry->used_for_jit ||
13263                     entry->vme_resilient_codesign) {
13264                         fault_info->cs_bypass = TRUE;
13265                 } else {
13266                         fault_info->cs_bypass = FALSE;
13267                 }
13268                 fault_info->pmap_cs_associated = FALSE;
13269 #if CONFIG_PMAP_CS
13270                 if (entry->pmap_cs_associated) {
13271                         /*
13272                          * The pmap layer will validate this page
13273                          * before allowing it to be executed from.
13274                          */
13275                         fault_info->pmap_cs_associated = TRUE;
13276                 }
13277 #endif /* CONFIG_PMAP_CS */
13278                 fault_info->mark_zf_absent = FALSE;
13279                 fault_info->batch_pmap_op = FALSE;
13280                 fault_info->resilient_media = entry->vme_resilient_media;
13281                 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13282         }
13283
13284         /*
13285          *      Lock the object to prevent it from disappearing
13286          */
13287         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13288                 vm_object_lock(*object);
13289         } else {
13290                 vm_object_lock_shared(*object);
13291         }
13292
13293         /*
13294          *      Save the version number
13295          */
13296
13297         out_version->main_timestamp = map->timestamp;
13298
13299         return KERN_SUCCESS;
13300 }
13301
13302
13303 /*
13304  *      vm_map_verify:
13305  *
13306  *      Verifies that the map in question has not changed
13307  *      since the given version. The map has to be locked
13308  *      ("shared" mode is fine) before calling this function
13309  *      and it will be returned locked too.
13310  */
13311 boolean_t
13312 vm_map_verify(
13313         vm_map_t                map,
13314         vm_map_version_t        *version)       /* REF */
13315 {
13316         boolean_t       result;
13317
13318         vm_map_lock_assert_held(map);
13319         result = (map->timestamp == version->main_timestamp);
13320
13321         return result;
13322 }
13323
13324 /*
13325  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13326  *      Goes away after regular vm_region_recurse function migrates to
13327  *      64 bits
13328  *      vm_region_recurse: A form of vm_region which follows the
13329  *      submaps in a target map
13330  *
13331  */
13332
13333 kern_return_t
13334 vm_map_region_recurse_64(
13335         vm_map_t                 map,
13336         vm_map_offset_t *address,               /* IN/OUT */
13337         vm_map_size_t           *size,                  /* OUT */
13338         natural_t               *nesting_depth, /* IN/OUT */
13339         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
13340         mach_msg_type_number_t  *count) /* IN/OUT */
13341 {
13342         mach_msg_type_number_t  original_count;
13343         vm_region_extended_info_data_t  extended;
13344         vm_map_entry_t                  tmp_entry;
13345         vm_map_offset_t                 user_address;
13346         unsigned int                    user_max_depth;
13347
13348         /*
13349          * "curr_entry" is the VM map entry preceding or including the
13350          * address we're looking for.
13351          * "curr_map" is the map or sub-map containing "curr_entry".
13352          * "curr_address" is the equivalent of the top map's "user_address"
13353          * in the current map.
13354          * "curr_offset" is the cumulated offset of "curr_map" in the
13355          * target task's address space.
13356          * "curr_depth" is the depth of "curr_map" in the chain of
13357          * sub-maps.
13358          *
13359          * "curr_max_below" and "curr_max_above" limit the range (around
13360          * "curr_address") we should take into account in the current (sub)map.
13361          * They limit the range to what's visible through the map entries
13362          * we've traversed from the top map to the current map.
13363          *
13364          */
13365         vm_map_entry_t                  curr_entry;
13366         vm_map_address_t                curr_address;
13367         vm_map_offset_t                 curr_offset;
13368         vm_map_t                        curr_map;
13369         unsigned int                    curr_depth;
13370         vm_map_offset_t                 curr_max_below, curr_max_above;
13371         vm_map_offset_t                 curr_skip;
13372
13373         /*
13374          * "next_" is the same as "curr_" but for the VM region immediately
13375          * after the address we're looking for.  We need to keep track of this
13376          * too because we want to return info about that region if the
13377          * address we're looking for is not mapped.
13378          */
13379         vm_map_entry_t                  next_entry;
13380         vm_map_offset_t                 next_offset;
13381         vm_map_offset_t                 next_address;
13382         vm_map_t                        next_map;
13383         unsigned int                    next_depth;
13384         vm_map_offset_t                 next_max_below, next_max_above;
13385         vm_map_offset_t                 next_skip;
13386
13387         boolean_t                       look_for_pages;
13388         vm_region_submap_short_info_64_t short_info;
13389         boolean_t                       do_region_footprint;
13390
13391         if (map == VM_MAP_NULL) {
13392                 /* no address space to work on */
13393                 return KERN_INVALID_ARGUMENT;
13394         }
13395
13396
13397         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13398                 /*
13399                  * "info" structure is not big enough and
13400                  * would overflow
13401                  */
13402                 return KERN_INVALID_ARGUMENT;
13403         }
13404
13405         do_region_footprint = task_self_region_footprint();
13406         original_count = *count;
13407
13408         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13409                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13410                 look_for_pages = FALSE;
13411                 short_info = (vm_region_submap_short_info_64_t) submap_info;
13412                 submap_info = NULL;
13413         } else {
13414                 look_for_pages = TRUE;
13415                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13416                 short_info = NULL;
13417
13418                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13419                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13420                 }
13421                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13422                         *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13423                 }
13424         }
13425
13426         user_address = *address;
13427         user_max_depth = *nesting_depth;
13428
13429         if (not_in_kdp) {
13430                 vm_map_lock_read(map);
13431         }
13432
13433 recurse_again:
13434         curr_entry = NULL;
13435         curr_map = map;
13436         curr_address = user_address;
13437         curr_offset = 0;
13438         curr_skip = 0;
13439         curr_depth = 0;
13440         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13441         curr_max_below = curr_address;
13442
13443         next_entry = NULL;
13444         next_map = NULL;
13445         next_address = 0;
13446         next_offset = 0;
13447         next_skip = 0;
13448         next_depth = 0;
13449         next_max_above = (vm_map_offset_t) -1;
13450         next_max_below = (vm_map_offset_t) -1;
13451
13452         for (;;) {
13453                 if (vm_map_lookup_entry(curr_map,
13454                     curr_address,
13455                     &tmp_entry)) {
13456                         /* tmp_entry contains the address we're looking for */
13457                         curr_entry = tmp_entry;
13458                 } else {
13459                         vm_map_offset_t skip;
13460                         /*
13461                          * The address is not mapped.  "tmp_entry" is the
13462                          * map entry preceding the address.  We want the next
13463                          * one, if it exists.
13464                          */
13465                         curr_entry = tmp_entry->vme_next;
13466
13467                         if (curr_entry == vm_map_to_entry(curr_map) ||
13468                             (curr_entry->vme_start >=
13469                             curr_address + curr_max_above)) {
13470                                 /* no next entry at this level: stop looking */
13471                                 if (not_in_kdp) {
13472                                         vm_map_unlock_read(curr_map);
13473                                 }
13474                                 curr_entry = NULL;
13475                                 curr_map = NULL;
13476                                 curr_skip = 0;
13477                                 curr_offset = 0;
13478                                 curr_depth = 0;
13479                                 curr_max_above = 0;
13480                                 curr_max_below = 0;
13481                                 break;
13482                         }
13483
13484                         /* adjust current address and offset */
13485                         skip = curr_entry->vme_start - curr_address;
13486                         curr_address = curr_entry->vme_start;
13487                         curr_skip += skip;
13488                         curr_offset += skip;
13489                         curr_max_above -= skip;
13490                         curr_max_below = 0;
13491                 }
13492
13493                 /*
13494                  * Is the next entry at this level closer to the address (or
13495                  * deeper in the submap chain) than the one we had
13496                  * so far ?
13497                  */
13498                 tmp_entry = curr_entry->vme_next;
13499                 if (tmp_entry == vm_map_to_entry(curr_map)) {
13500                         /* no next entry at this level */
13501                 } else if (tmp_entry->vme_start >=
13502                     curr_address + curr_max_above) {
13503                         /*
13504                          * tmp_entry is beyond the scope of what we mapped of
13505                          * this submap in the upper level: ignore it.
13506                          */
13507                 } else if ((next_entry == NULL) ||
13508                     (tmp_entry->vme_start + curr_offset <=
13509                     next_entry->vme_start + next_offset)) {
13510                         /*
13511                          * We didn't have a "next_entry" or this one is
13512                          * closer to the address we're looking for:
13513                          * use this "tmp_entry" as the new "next_entry".
13514                          */
13515                         if (next_entry != NULL) {
13516                                 /* unlock the last "next_map" */
13517                                 if (next_map != curr_map && not_in_kdp) {
13518                                         vm_map_unlock_read(next_map);
13519                                 }
13520                         }
13521                         next_entry = tmp_entry;
13522                         next_map = curr_map;
13523                         next_depth = curr_depth;
13524                         next_address = next_entry->vme_start;
13525                         next_skip = curr_skip;
13526                         next_skip += (next_address - curr_address);
13527                         next_offset = curr_offset;
13528                         next_offset += (next_address - curr_address);
13529                         next_max_above = MIN(next_max_above, curr_max_above);
13530                         next_max_above = MIN(next_max_above,
13531                             next_entry->vme_end - next_address);
13532                         next_max_below = MIN(next_max_below, curr_max_below);
13533                         next_max_below = MIN(next_max_below,
13534                             next_address - next_entry->vme_start);
13535                 }
13536
13537                 /*
13538                  * "curr_max_{above,below}" allow us to keep track of the
13539                  * portion of the submap that is actually mapped at this level:
13540                  * the rest of that submap is irrelevant to us, since it's not
13541                  * mapped here.
13542                  * The relevant portion of the map starts at
13543                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13544                  */
13545                 curr_max_above = MIN(curr_max_above,
13546                     curr_entry->vme_end - curr_address);
13547                 curr_max_below = MIN(curr_max_below,
13548                     curr_address - curr_entry->vme_start);
13549
13550                 if (!curr_entry->is_sub_map ||
13551                     curr_depth >= user_max_depth) {
13552                         /*
13553                          * We hit a leaf map or we reached the maximum depth
13554                          * we could, so stop looking.  Keep the current map
13555                          * locked.
13556                          */
13557                         break;
13558                 }
13559
13560                 /*
13561                  * Get down to the next submap level.
13562                  */
13563
13564                 /*
13565                  * Lock the next level and unlock the current level,
13566                  * unless we need to keep it locked to access the "next_entry"
13567                  * later.
13568                  */
13569                 if (not_in_kdp) {
13570                         vm_map_lock_read(VME_SUBMAP(curr_entry));
13571                 }
13572                 if (curr_map == next_map) {
13573                         /* keep "next_map" locked in case we need it */
13574                 } else {
13575                         /* release this map */
13576                         if (not_in_kdp) {
13577                                 vm_map_unlock_read(curr_map);
13578                         }
13579                 }
13580
13581                 /*
13582                  * Adjust the offset.  "curr_entry" maps the submap
13583                  * at relative address "curr_entry->vme_start" in the
13584                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
13585                  * bytes of the submap.
13586                  * "curr_offset" always represents the offset of a virtual
13587                  * address in the curr_map relative to the absolute address
13588                  * space (i.e. the top-level VM map).
13589                  */
13590                 curr_offset +=
13591                     (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13592                 curr_address = user_address + curr_offset;
13593                 /* switch to the submap */
13594                 curr_map = VME_SUBMAP(curr_entry);
13595                 curr_depth++;
13596                 curr_entry = NULL;
13597         }
13598
13599 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13600 // so probably should be a real 32b ID vs. ptr.
13601 // Current users just check for equality
13602
13603         if (curr_entry == NULL) {
13604                 /* no VM region contains the address... */
13605
13606                 if (do_region_footprint && /* we want footprint numbers */
13607                     next_entry == NULL && /* & there are no more regions */
13608                     /* & we haven't already provided our fake region: */
13609                     user_address <= vm_map_last_entry(map)->vme_end) {
13610                         ledger_amount_t ledger_resident, ledger_compressed;
13611
13612                         /*
13613                          * Add a fake memory region to account for
13614                          * purgeable and/or ledger-tagged memory that
13615                          * counts towards this task's memory footprint,
13616                          * i.e. the resident/compressed pages of non-volatile
13617                          * objects owned by that task.
13618                          */
13619                         task_ledgers_footprint(map->pmap->ledger,
13620                             &ledger_resident,
13621                             &ledger_compressed);
13622                         if (ledger_resident + ledger_compressed == 0) {
13623                                 /* no purgeable memory usage to report */
13624                                 return KERN_INVALID_ADDRESS;
13625                         }
13626                         /* fake region to show nonvolatile footprint */
13627                         if (look_for_pages) {
13628                                 submap_info->protection = VM_PROT_DEFAULT;
13629                                 submap_info->max_protection = VM_PROT_DEFAULT;
13630                                 submap_info->inheritance = VM_INHERIT_DEFAULT;
13631                                 submap_info->offset = 0;
13632                                 submap_info->user_tag = -1;
13633                                 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
13634                                 submap_info->pages_shared_now_private = 0;
13635                                 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
13636                                 submap_info->pages_dirtied = submap_info->pages_resident;
13637                                 submap_info->ref_count = 1;
13638                                 submap_info->shadow_depth = 0;
13639                                 submap_info->external_pager = 0;
13640                                 submap_info->share_mode = SM_PRIVATE;
13641                                 submap_info->is_submap = 0;
13642                                 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13643                                 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13644                                 submap_info->user_wired_count = 0;
13645                                 submap_info->pages_reusable = 0;
13646                         } else {
13647                                 short_info->user_tag = -1;
13648                                 short_info->offset = 0;
13649                                 short_info->protection = VM_PROT_DEFAULT;
13650                                 short_info->inheritance = VM_INHERIT_DEFAULT;
13651                                 short_info->max_protection = VM_PROT_DEFAULT;
13652                                 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13653                                 short_info->user_wired_count = 0;
13654                                 short_info->is_submap = 0;
13655                                 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13656                                 short_info->external_pager = 0;
13657                                 short_info->shadow_depth = 0;
13658                                 short_info->share_mode = SM_PRIVATE;
13659                                 short_info->ref_count = 1;
13660                         }
13661                         *nesting_depth = 0;
13662                         *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
13663 //                      *address = user_address;
13664                         *address = vm_map_last_entry(map)->vme_end;
13665                         return KERN_SUCCESS;
13666                 }
13667
13668                 if (next_entry == NULL) {
13669                         /* ... and no VM region follows it either */
13670                         return KERN_INVALID_ADDRESS;
13671                 }
13672                 /* ... gather info about the next VM region */
13673                 curr_entry = next_entry;
13674                 curr_map = next_map;    /* still locked ... */
13675                 curr_address = next_address;
13676                 curr_skip = next_skip;
13677                 curr_offset = next_offset;
13678                 curr_depth = next_depth;
13679                 curr_max_above = next_max_above;
13680                 curr_max_below = next_max_below;
13681         } else {
13682                 /* we won't need "next_entry" after all */
13683                 if (next_entry != NULL) {
13684                         /* release "next_map" */
13685                         if (next_map != curr_map && not_in_kdp) {
13686                                 vm_map_unlock_read(next_map);
13687                         }
13688                 }
13689         }
13690         next_entry = NULL;
13691         next_map = NULL;
13692         next_offset = 0;
13693         next_skip = 0;
13694         next_depth = 0;
13695         next_max_below = -1;
13696         next_max_above = -1;
13697
13698         if (curr_entry->is_sub_map &&
13699             curr_depth < user_max_depth) {
13700                 /*
13701                  * We're not as deep as we could be:  we must have
13702                  * gone back up after not finding anything mapped
13703                  * below the original top-level map entry's.
13704                  * Let's move "curr_address" forward and recurse again.
13705                  */
13706                 user_address = curr_address;
13707                 goto recurse_again;
13708         }
13709
13710         *nesting_depth = curr_depth;
13711         *size = curr_max_above + curr_max_below;
13712         *address = user_address + curr_skip - curr_max_below;
13713
13714 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13715 // so probably should be a real 32b ID vs. ptr.
13716 // Current users just check for equality
13717 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13718
13719         if (look_for_pages) {
13720                 submap_info->user_tag = VME_ALIAS(curr_entry);
13721                 submap_info->offset = VME_OFFSET(curr_entry);
13722                 submap_info->protection = curr_entry->protection;
13723                 submap_info->inheritance = curr_entry->inheritance;
13724                 submap_info->max_protection = curr_entry->max_protection;
13725                 submap_info->behavior = curr_entry->behavior;
13726                 submap_info->user_wired_count = curr_entry->user_wired_count;
13727                 submap_info->is_submap = curr_entry->is_sub_map;
13728                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13729         } else {
13730                 short_info->user_tag = VME_ALIAS(curr_entry);
13731                 short_info->offset = VME_OFFSET(curr_entry);
13732                 short_info->protection = curr_entry->protection;
13733                 short_info->inheritance = curr_entry->inheritance;
13734                 short_info->max_protection = curr_entry->max_protection;
13735                 short_info->behavior = curr_entry->behavior;
13736                 short_info->user_wired_count = curr_entry->user_wired_count;
13737                 short_info->is_submap = curr_entry->is_sub_map;
13738                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13739         }
13740
13741         extended.pages_resident = 0;
13742         extended.pages_swapped_out = 0;
13743         extended.pages_shared_now_private = 0;
13744         extended.pages_dirtied = 0;
13745         extended.pages_reusable = 0;
13746         extended.external_pager = 0;
13747         extended.shadow_depth = 0;
13748         extended.share_mode = SM_EMPTY;
13749         extended.ref_count = 0;
13750
13751         if (not_in_kdp) {
13752                 if (!curr_entry->is_sub_map) {
13753                         vm_map_offset_t range_start, range_end;
13754                         range_start = MAX((curr_address - curr_max_below),
13755                             curr_entry->vme_start);
13756                         range_end = MIN((curr_address + curr_max_above),
13757                             curr_entry->vme_end);
13758                         vm_map_region_walk(curr_map,
13759                             range_start,
13760                             curr_entry,
13761                             (VME_OFFSET(curr_entry) +
13762                             (range_start -
13763                             curr_entry->vme_start)),
13764                             range_end - range_start,
13765                             &extended,
13766                             look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13767                         if (extended.external_pager &&
13768                             extended.ref_count == 2 &&
13769                             extended.share_mode == SM_SHARED) {
13770                                 extended.share_mode = SM_PRIVATE;
13771                         }
13772                 } else {
13773                         if (curr_entry->use_pmap) {
13774                                 extended.share_mode = SM_TRUESHARED;
13775                         } else {
13776                                 extended.share_mode = SM_PRIVATE;
13777                         }
13778                         extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
13779                 }
13780         }
13781
13782         if (look_for_pages) {
13783                 submap_info->pages_resident = extended.pages_resident;
13784                 submap_info->pages_swapped_out = extended.pages_swapped_out;
13785                 submap_info->pages_shared_now_private =
13786                     extended.pages_shared_now_private;
13787                 submap_info->pages_dirtied = extended.pages_dirtied;
13788                 submap_info->external_pager = extended.external_pager;
13789                 submap_info->shadow_depth = extended.shadow_depth;
13790                 submap_info->share_mode = extended.share_mode;
13791                 submap_info->ref_count = extended.ref_count;
13792
13793                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13794                         submap_info->pages_reusable = extended.pages_reusable;
13795                 }
13796                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13797                         submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13798                 }
13799         } else {
13800                 short_info->external_pager = extended.external_pager;
13801                 short_info->shadow_depth = extended.shadow_depth;
13802                 short_info->share_mode = extended.share_mode;
13803                 short_info->ref_count = extended.ref_count;
13804         }
13805
13806         if (not_in_kdp) {
13807                 vm_map_unlock_read(curr_map);
13808         }
13809
13810         return KERN_SUCCESS;
13811 }
13812
13813 /*
13814  *      vm_region:
13815  *
13816  *      User call to obtain information about a region in
13817  *      a task's address map. Currently, only one flavor is
13818  *      supported.
13819  *
13820  *      XXX The reserved and behavior fields cannot be filled
13821  *          in until the vm merge from the IK is completed, and
13822  *          vm_reserve is implemented.
13823  */
13824
13825 kern_return_t
13826 vm_map_region(
13827         vm_map_t                 map,
13828         vm_map_offset_t *address,               /* IN/OUT */
13829         vm_map_size_t           *size,                  /* OUT */
13830         vm_region_flavor_t       flavor,                /* IN */
13831         vm_region_info_t         info,                  /* OUT */
13832         mach_msg_type_number_t  *count, /* IN/OUT */
13833         mach_port_t             *object_name)           /* OUT */
13834 {
13835         vm_map_entry_t          tmp_entry;
13836         vm_map_entry_t          entry;
13837         vm_map_offset_t         start;
13838
13839         if (map == VM_MAP_NULL) {
13840                 return KERN_INVALID_ARGUMENT;
13841         }
13842
13843         switch (flavor) {
13844         case VM_REGION_BASIC_INFO:
13845                 /* legacy for old 32-bit objects info */
13846         {
13847                 vm_region_basic_info_t  basic;
13848
13849                 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13850                         return KERN_INVALID_ARGUMENT;
13851                 }
13852
13853                 basic = (vm_region_basic_info_t) info;
13854                 *count = VM_REGION_BASIC_INFO_COUNT;
13855
13856                 vm_map_lock_read(map);
13857
13858                 start = *address;
13859                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13860                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13861                                 vm_map_unlock_read(map);
13862                                 return KERN_INVALID_ADDRESS;
13863                         }
13864                 } else {
13865                         entry = tmp_entry;
13866                 }
13867
13868                 start = entry->vme_start;
13869
13870                 basic->offset = (uint32_t)VME_OFFSET(entry);
13871                 basic->protection = entry->protection;
13872                 basic->inheritance = entry->inheritance;
13873                 basic->max_protection = entry->max_protection;
13874                 basic->behavior = entry->behavior;
13875                 basic->user_wired_count = entry->user_wired_count;
13876                 basic->reserved = entry->is_sub_map;
13877                 *address = start;
13878                 *size = (entry->vme_end - start);
13879
13880                 if (object_name) {
13881                         *object_name = IP_NULL;
13882                 }
13883                 if (entry->is_sub_map) {
13884                         basic->shared = FALSE;
13885                 } else {
13886                         basic->shared = entry->is_shared;
13887                 }
13888
13889                 vm_map_unlock_read(map);
13890                 return KERN_SUCCESS;
13891         }
13892
13893         case VM_REGION_BASIC_INFO_64:
13894         {
13895                 vm_region_basic_info_64_t       basic;
13896
13897                 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13898                         return KERN_INVALID_ARGUMENT;
13899                 }
13900
13901                 basic = (vm_region_basic_info_64_t) info;
13902                 *count = VM_REGION_BASIC_INFO_COUNT_64;
13903
13904                 vm_map_lock_read(map);
13905
13906                 start = *address;
13907                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13908                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13909                                 vm_map_unlock_read(map);
13910                                 return KERN_INVALID_ADDRESS;
13911                         }
13912                 } else {
13913                         entry = tmp_entry;
13914                 }
13915
13916                 start = entry->vme_start;
13917
13918                 basic->offset = VME_OFFSET(entry);
13919                 basic->protection = entry->protection;
13920                 basic->inheritance = entry->inheritance;
13921                 basic->max_protection = entry->max_protection;
13922                 basic->behavior = entry->behavior;
13923                 basic->user_wired_count = entry->user_wired_count;
13924                 basic->reserved = entry->is_sub_map;
13925                 *address = start;
13926                 *size = (entry->vme_end - start);
13927
13928                 if (object_name) {
13929                         *object_name = IP_NULL;
13930                 }
13931                 if (entry->is_sub_map) {
13932                         basic->shared = FALSE;
13933                 } else {
13934                         basic->shared = entry->is_shared;
13935                 }
13936
13937                 vm_map_unlock_read(map);
13938                 return KERN_SUCCESS;
13939         }
13940         case VM_REGION_EXTENDED_INFO:
13941                 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13942                         return KERN_INVALID_ARGUMENT;
13943                 }
13944         /*fallthru*/
13945         case VM_REGION_EXTENDED_INFO__legacy:
13946                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
13947                         return KERN_INVALID_ARGUMENT;
13948                 }
13949
13950                 {
13951                         vm_region_extended_info_t       extended;
13952                         mach_msg_type_number_t original_count;
13953
13954                         extended = (vm_region_extended_info_t) info;
13955
13956                         vm_map_lock_read(map);
13957
13958                         start = *address;
13959                         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13960                                 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13961                                         vm_map_unlock_read(map);
13962                                         return KERN_INVALID_ADDRESS;
13963                                 }
13964                         } else {
13965                                 entry = tmp_entry;
13966                         }
13967                         start = entry->vme_start;
13968
13969                         extended->protection = entry->protection;
13970                         extended->user_tag = VME_ALIAS(entry);
13971                         extended->pages_resident = 0;
13972                         extended->pages_swapped_out = 0;
13973                         extended->pages_shared_now_private = 0;
13974                         extended->pages_dirtied = 0;
13975                         extended->external_pager = 0;
13976                         extended->shadow_depth = 0;
13977
13978                         original_count = *count;
13979                         if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13980                                 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13981                         } else {
13982                                 extended->pages_reusable = 0;
13983                                 *count = VM_REGION_EXTENDED_INFO_COUNT;
13984                         }
13985
13986                         vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13987
13988                         if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
13989                                 extended->share_mode = SM_PRIVATE;
13990                         }
13991
13992                         if (object_name) {
13993                                 *object_name = IP_NULL;
13994                         }
13995                         *address = start;
13996                         *size = (entry->vme_end - start);
13997
13998                         vm_map_unlock_read(map);
13999                         return KERN_SUCCESS;
14000                 }
14001         case VM_REGION_TOP_INFO:
14002         {
14003                 vm_region_top_info_t    top;
14004
14005                 if (*count < VM_REGION_TOP_INFO_COUNT) {
14006                         return KERN_INVALID_ARGUMENT;
14007                 }
14008
14009                 top = (vm_region_top_info_t) info;
14010                 *count = VM_REGION_TOP_INFO_COUNT;
14011
14012                 vm_map_lock_read(map);
14013
14014                 start = *address;
14015                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14016                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14017                                 vm_map_unlock_read(map);
14018                                 return KERN_INVALID_ADDRESS;
14019                         }
14020                 } else {
14021                         entry = tmp_entry;
14022                 }
14023                 start = entry->vme_start;
14024
14025                 top->private_pages_resident = 0;
14026                 top->shared_pages_resident = 0;
14027
14028                 vm_map_region_top_walk(entry, top);
14029
14030                 if (object_name) {
14031                         *object_name = IP_NULL;
14032                 }
14033                 *address = start;
14034                 *size = (entry->vme_end - start);
14035
14036                 vm_map_unlock_read(map);
14037                 return KERN_SUCCESS;
14038         }
14039         default:
14040                 return KERN_INVALID_ARGUMENT;
14041         }
14042 }
14043
14044 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
14045         MIN((entry_size),                                               \
14046             ((obj)->all_reusable ?                                      \
14047              (obj)->wired_page_count :                                  \
14048              (obj)->resident_page_count - (obj)->reusable_page_count))
14049
14050 void
14051 vm_map_region_top_walk(
14052         vm_map_entry_t             entry,
14053         vm_region_top_info_t       top)
14054 {
14055         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14056                 top->share_mode = SM_EMPTY;
14057                 top->ref_count = 0;
14058                 top->obj_id = 0;
14059                 return;
14060         }
14061
14062         {
14063                 struct  vm_object *obj, *tmp_obj;
14064                 int             ref_count;
14065                 uint32_t        entry_size;
14066
14067                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14068
14069                 obj = VME_OBJECT(entry);
14070
14071                 vm_object_lock(obj);
14072
14073                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14074                         ref_count--;
14075                 }
14076
14077                 assert(obj->reusable_page_count <= obj->resident_page_count);
14078                 if (obj->shadow) {
14079                         if (ref_count == 1) {
14080                                 top->private_pages_resident =
14081                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14082                         } else {
14083                                 top->shared_pages_resident =
14084                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14085                         }
14086                         top->ref_count  = ref_count;
14087                         top->share_mode = SM_COW;
14088
14089                         while ((tmp_obj = obj->shadow)) {
14090                                 vm_object_lock(tmp_obj);
14091                                 vm_object_unlock(obj);
14092                                 obj = tmp_obj;
14093
14094                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14095                                         ref_count--;
14096                                 }
14097
14098                                 assert(obj->reusable_page_count <= obj->resident_page_count);
14099                                 top->shared_pages_resident +=
14100                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14101                                 top->ref_count += ref_count - 1;
14102                         }
14103                 } else {
14104                         if (entry->superpage_size) {
14105                                 top->share_mode = SM_LARGE_PAGE;
14106                                 top->shared_pages_resident = 0;
14107                                 top->private_pages_resident = entry_size;
14108                         } else if (entry->needs_copy) {
14109                                 top->share_mode = SM_COW;
14110                                 top->shared_pages_resident =
14111                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14112                         } else {
14113                                 if (ref_count == 1 ||
14114                                     (ref_count == 2 && obj->named)) {
14115                                         top->share_mode = SM_PRIVATE;
14116                                         top->private_pages_resident =
14117                                             OBJ_RESIDENT_COUNT(obj,
14118                                             entry_size);
14119                                 } else {
14120                                         top->share_mode = SM_SHARED;
14121                                         top->shared_pages_resident =
14122                                             OBJ_RESIDENT_COUNT(obj,
14123                                             entry_size);
14124                                 }
14125                         }
14126                         top->ref_count = ref_count;
14127                 }
14128                 /* XXX K64: obj_id will be truncated */
14129                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14130
14131                 vm_object_unlock(obj);
14132         }
14133 }
14134
14135 void
14136 vm_map_region_walk(
14137         vm_map_t                        map,
14138         vm_map_offset_t                 va,
14139         vm_map_entry_t                  entry,
14140         vm_object_offset_t              offset,
14141         vm_object_size_t                range,
14142         vm_region_extended_info_t       extended,
14143         boolean_t                       look_for_pages,
14144         mach_msg_type_number_t count)
14145 {
14146         struct vm_object *obj, *tmp_obj;
14147         vm_map_offset_t       last_offset;
14148         int               i;
14149         int               ref_count;
14150         struct vm_object        *shadow_object;
14151         int                     shadow_depth;
14152         boolean_t         do_region_footprint;
14153
14154         do_region_footprint = task_self_region_footprint();
14155
14156         if ((VME_OBJECT(entry) == 0) ||
14157             (entry->is_sub_map) ||
14158             (VME_OBJECT(entry)->phys_contiguous &&
14159             !entry->superpage_size)) {
14160                 extended->share_mode = SM_EMPTY;
14161                 extended->ref_count = 0;
14162                 return;
14163         }
14164
14165         if (entry->superpage_size) {
14166                 extended->shadow_depth = 0;
14167                 extended->share_mode = SM_LARGE_PAGE;
14168                 extended->ref_count = 1;
14169                 extended->external_pager = 0;
14170                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14171                 extended->shadow_depth = 0;
14172                 return;
14173         }
14174
14175         obj = VME_OBJECT(entry);
14176
14177         vm_object_lock(obj);
14178
14179         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14180                 ref_count--;
14181         }
14182
14183         if (look_for_pages) {
14184                 for (last_offset = offset + range;
14185                     offset < last_offset;
14186                     offset += PAGE_SIZE_64, va += PAGE_SIZE) {
14187                         if (do_region_footprint) {
14188                                 int disp;
14189
14190                                 disp = 0;
14191                                 if (map->has_corpse_footprint) {
14192                                         /*
14193                                          * Query the page info data we saved
14194                                          * while forking the corpse.
14195                                          */
14196                                         vm_map_corpse_footprint_query_page_info(
14197                                                 map,
14198                                                 va,
14199                                                 &disp);
14200                                 } else {
14201                                         /*
14202                                          * Query the pmap.
14203                                          */
14204                                         pmap_query_page_info(map->pmap,
14205                                             va,
14206                                             &disp);
14207                                 }
14208                                 if (disp & PMAP_QUERY_PAGE_PRESENT) {
14209                                         if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14210                                                 extended->pages_resident++;
14211                                         }
14212                                         if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14213                                                 extended->pages_reusable++;
14214                                         } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
14215                                             (disp & PMAP_QUERY_PAGE_ALTACCT)) {
14216                                                 /* alternate accounting */
14217                                         } else {
14218                                                 extended->pages_dirtied++;
14219                                         }
14220                                 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14221                                         if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14222                                                 /* alternate accounting */
14223                                         } else {
14224                                                 extended->pages_swapped_out++;
14225                                         }
14226                                 }
14227                                 /* deal with alternate accounting */
14228                                 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14229                                     /* && not tagged as no-footprint? */
14230                                     VM_OBJECT_OWNER(obj) != NULL &&
14231                                     VM_OBJECT_OWNER(obj)->map == map) {
14232                                         if ((((va
14233                                             - entry->vme_start
14234                                             + VME_OFFSET(entry))
14235                                             / PAGE_SIZE) <
14236                                             (obj->resident_page_count +
14237                                             vm_compressor_pager_get_count(obj->pager)))) {
14238                                                 /*
14239                                                  * Non-volatile purgeable object owned
14240                                                  * by this task: report the first
14241                                                  * "#resident + #compressed" pages as
14242                                                  * "resident" (to show that they
14243                                                  * contribute to the footprint) but not
14244                                                  * "dirty" (to avoid double-counting
14245                                                  * with the fake "non-volatile" region
14246                                                  * we'll report at the end of the
14247                                                  * address space to account for all
14248                                                  * (mapped or not) non-volatile memory
14249                                                  * owned by this task.
14250                                                  */
14251                                                 extended->pages_resident++;
14252                                         }
14253                                 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14254                                     obj->purgable == VM_PURGABLE_EMPTY) &&
14255                                     /* && not tagged as no-footprint? */
14256                                     VM_OBJECT_OWNER(obj) != NULL &&
14257                                     VM_OBJECT_OWNER(obj)->map == map) {
14258                                         if ((((va
14259                                             - entry->vme_start
14260                                             + VME_OFFSET(entry))
14261                                             / PAGE_SIZE) <
14262                                             obj->wired_page_count)) {
14263                                                 /*
14264                                                  * Volatile|empty purgeable object owned
14265                                                  * by this task: report the first
14266                                                  * "#wired" pages as "resident" (to
14267                                                  * show that they contribute to the
14268                                                  * footprint) but not "dirty" (to avoid
14269                                                  * double-counting with the fake
14270                                                  * "non-volatile" region we'll report
14271                                                  * at the end of the address space to
14272                                                  * account for all (mapped or not)
14273                                                  * non-volatile memory owned by this
14274                                                  * task.
14275                                                  */
14276                                                 extended->pages_resident++;
14277                                         }
14278                                 } else if (obj->purgable != VM_PURGABLE_DENY) {
14279                                         /*
14280                                          * Pages from purgeable objects
14281                                          * will be reported as dirty
14282                                          * appropriately in an extra
14283                                          * fake memory region at the end of
14284                                          * the address space.
14285                                          */
14286                                 } else if (entry->iokit_acct) {
14287                                         /*
14288                                          * IOKit mappings are considered
14289                                          * as fully dirty for footprint's
14290                                          * sake.
14291                                          */
14292                                         extended->pages_dirtied++;
14293                                 }
14294                                 continue;
14295                         }
14296
14297                         vm_map_region_look_for_page(map, va, obj,
14298                             offset, ref_count,
14299                             0, extended, count);
14300                 }
14301
14302                 if (do_region_footprint) {
14303                         goto collect_object_info;
14304                 }
14305         } else {
14306 collect_object_info:
14307                 shadow_object = obj->shadow;
14308                 shadow_depth = 0;
14309
14310                 if (!(obj->internal)) {
14311                         extended->external_pager = 1;
14312                 }
14313
14314                 if (shadow_object != VM_OBJECT_NULL) {
14315                         vm_object_lock(shadow_object);
14316                         for (;
14317                             shadow_object != VM_OBJECT_NULL;
14318                             shadow_depth++) {
14319                                 vm_object_t     next_shadow;
14320
14321                                 if (!(shadow_object->internal)) {
14322                                         extended->external_pager = 1;
14323                                 }
14324
14325                                 next_shadow = shadow_object->shadow;
14326                                 if (next_shadow) {
14327                                         vm_object_lock(next_shadow);
14328                                 }
14329                                 vm_object_unlock(shadow_object);
14330                                 shadow_object = next_shadow;
14331                         }
14332                 }
14333                 extended->shadow_depth = shadow_depth;
14334         }
14335
14336         if (extended->shadow_depth || entry->needs_copy) {
14337                 extended->share_mode = SM_COW;
14338         } else {
14339                 if (ref_count == 1) {
14340                         extended->share_mode = SM_PRIVATE;
14341                 } else {
14342                         if (obj->true_share) {
14343                                 extended->share_mode = SM_TRUESHARED;
14344                         } else {
14345                                 extended->share_mode = SM_SHARED;
14346                         }
14347                 }
14348         }
14349         extended->ref_count = ref_count - extended->shadow_depth;
14350
14351         for (i = 0; i < extended->shadow_depth; i++) {
14352                 if ((tmp_obj = obj->shadow) == 0) {
14353                         break;
14354                 }
14355                 vm_object_lock(tmp_obj);
14356                 vm_object_unlock(obj);
14357
14358                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14359                         ref_count--;
14360                 }
14361
14362                 extended->ref_count += ref_count;
14363                 obj = tmp_obj;
14364         }
14365         vm_object_unlock(obj);
14366
14367         if (extended->share_mode == SM_SHARED) {
14368                 vm_map_entry_t       cur;
14369                 vm_map_entry_t       last;
14370                 int      my_refs;
14371
14372                 obj = VME_OBJECT(entry);
14373                 last = vm_map_to_entry(map);
14374                 my_refs = 0;
14375
14376                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14377                         ref_count--;
14378                 }
14379                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14380                         my_refs += vm_map_region_count_obj_refs(cur, obj);
14381                 }
14382
14383                 if (my_refs == ref_count) {
14384                         extended->share_mode = SM_PRIVATE_ALIASED;
14385                 } else if (my_refs > 1) {
14386                         extended->share_mode = SM_SHARED_ALIASED;
14387                 }
14388         }
14389 }
14390
14391
14392 /* object is locked on entry and locked on return */
14393
14394
14395 static void
14396 vm_map_region_look_for_page(
14397         __unused vm_map_t               map,
14398         __unused vm_map_offset_t        va,
14399         vm_object_t                     object,
14400         vm_object_offset_t              offset,
14401         int                             max_refcnt,
14402         int                             depth,
14403         vm_region_extended_info_t       extended,
14404         mach_msg_type_number_t count)
14405 {
14406         vm_page_t       p;
14407         vm_object_t     shadow;
14408         int             ref_count;
14409         vm_object_t     caller_object;
14410
14411         shadow = object->shadow;
14412         caller_object = object;
14413
14414
14415         while (TRUE) {
14416                 if (!(object->internal)) {
14417                         extended->external_pager = 1;
14418                 }
14419
14420                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14421                         if (shadow && (max_refcnt == 1)) {
14422                                 extended->pages_shared_now_private++;
14423                         }
14424
14425                         if (!p->vmp_fictitious &&
14426                             (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14427                                 extended->pages_dirtied++;
14428                         } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14429                                 if (p->vmp_reusable || object->all_reusable) {
14430                                         extended->pages_reusable++;
14431                                 }
14432                         }
14433
14434                         extended->pages_resident++;
14435
14436                         if (object != caller_object) {
14437                                 vm_object_unlock(object);
14438                         }
14439
14440                         return;
14441                 }
14442                 if (object->internal &&
14443                     object->alive &&
14444                     !object->terminating &&
14445                     object->pager_ready) {
14446                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14447                             == VM_EXTERNAL_STATE_EXISTS) {
14448                                 /* the pager has that page */
14449                                 extended->pages_swapped_out++;
14450                                 if (object != caller_object) {
14451                                         vm_object_unlock(object);
14452                                 }
14453                                 return;
14454                         }
14455                 }
14456
14457                 if (shadow) {
14458                         vm_object_lock(shadow);
14459
14460                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14461                                 ref_count--;
14462                         }
14463
14464                         if (++depth > extended->shadow_depth) {
14465                                 extended->shadow_depth = depth;
14466                         }
14467
14468                         if (ref_count > max_refcnt) {
14469                                 max_refcnt = ref_count;
14470                         }
14471
14472                         if (object != caller_object) {
14473                                 vm_object_unlock(object);
14474                         }
14475
14476                         offset = offset + object->vo_shadow_offset;
14477                         object = shadow;
14478                         shadow = object->shadow;
14479                         continue;
14480                 }
14481                 if (object != caller_object) {
14482                         vm_object_unlock(object);
14483                 }
14484                 break;
14485         }
14486 }
14487
14488 static int
14489 vm_map_region_count_obj_refs(
14490         vm_map_entry_t    entry,
14491         vm_object_t       object)
14492 {
14493         int ref_count;
14494         vm_object_t chk_obj;
14495         vm_object_t tmp_obj;
14496
14497         if (VME_OBJECT(entry) == 0) {
14498                 return 0;
14499         }
14500
14501         if (entry->is_sub_map) {
14502                 return 0;
14503         } else {
14504                 ref_count = 0;
14505
14506                 chk_obj = VME_OBJECT(entry);
14507                 vm_object_lock(chk_obj);
14508
14509                 while (chk_obj) {
14510                         if (chk_obj == object) {
14511                                 ref_count++;
14512                         }
14513                         tmp_obj = chk_obj->shadow;
14514                         if (tmp_obj) {
14515                                 vm_object_lock(tmp_obj);
14516                         }
14517                         vm_object_unlock(chk_obj);
14518
14519                         chk_obj = tmp_obj;
14520                 }
14521         }
14522         return ref_count;
14523 }
14524
14525
14526 /*
14527  *      Routine:        vm_map_simplify
14528  *
14529  *      Description:
14530  *              Attempt to simplify the map representation in
14531  *              the vicinity of the given starting address.
14532  *      Note:
14533  *              This routine is intended primarily to keep the
14534  *              kernel maps more compact -- they generally don't
14535  *              benefit from the "expand a map entry" technology
14536  *              at allocation time because the adjacent entry
14537  *              is often wired down.
14538  */
14539 void
14540 vm_map_simplify_entry(
14541         vm_map_t        map,
14542         vm_map_entry_t  this_entry)
14543 {
14544         vm_map_entry_t  prev_entry;
14545
14546         counter(c_vm_map_simplify_entry_called++);
14547
14548         prev_entry = this_entry->vme_prev;
14549
14550         if ((this_entry != vm_map_to_entry(map)) &&
14551             (prev_entry != vm_map_to_entry(map)) &&
14552
14553             (prev_entry->vme_end == this_entry->vme_start) &&
14554
14555             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14556             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14557             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14558             prev_entry->vme_start))
14559             == VME_OFFSET(this_entry)) &&
14560
14561             (prev_entry->behavior == this_entry->behavior) &&
14562             (prev_entry->needs_copy == this_entry->needs_copy) &&
14563             (prev_entry->protection == this_entry->protection) &&
14564             (prev_entry->max_protection == this_entry->max_protection) &&
14565             (prev_entry->inheritance == this_entry->inheritance) &&
14566             (prev_entry->use_pmap == this_entry->use_pmap) &&
14567             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14568             (prev_entry->no_cache == this_entry->no_cache) &&
14569             (prev_entry->permanent == this_entry->permanent) &&
14570             (prev_entry->map_aligned == this_entry->map_aligned) &&
14571             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14572             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14573             (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14574             /* from_reserved_zone: OK if that field doesn't match */
14575             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14576             (prev_entry->vme_resilient_codesign ==
14577             this_entry->vme_resilient_codesign) &&
14578             (prev_entry->vme_resilient_media ==
14579             this_entry->vme_resilient_media) &&
14580             (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
14581
14582             (prev_entry->wired_count == this_entry->wired_count) &&
14583             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14584
14585             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14586             (prev_entry->in_transition == FALSE) &&
14587             (this_entry->in_transition == FALSE) &&
14588             (prev_entry->needs_wakeup == FALSE) &&
14589             (this_entry->needs_wakeup == FALSE) &&
14590             (prev_entry->is_shared == FALSE) &&
14591             (this_entry->is_shared == FALSE) &&
14592             (prev_entry->superpage_size == FALSE) &&
14593             (this_entry->superpage_size == FALSE)
14594             ) {
14595                 vm_map_store_entry_unlink(map, prev_entry);
14596                 assert(prev_entry->vme_start < this_entry->vme_end);
14597                 if (prev_entry->map_aligned) {
14598                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14599                             VM_MAP_PAGE_MASK(map)));
14600                 }
14601                 this_entry->vme_start = prev_entry->vme_start;
14602                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14603
14604                 if (map->holelistenabled) {
14605                         vm_map_store_update_first_free(map, this_entry, TRUE);
14606                 }
14607
14608                 if (prev_entry->is_sub_map) {
14609                         vm_map_deallocate(VME_SUBMAP(prev_entry));
14610                 } else {
14611                         vm_object_deallocate(VME_OBJECT(prev_entry));
14612                 }
14613                 vm_map_entry_dispose(map, prev_entry);
14614                 SAVE_HINT_MAP_WRITE(map, this_entry);
14615                 counter(c_vm_map_simplified++);
14616         }
14617 }
14618
14619 void
14620 vm_map_simplify(
14621         vm_map_t        map,
14622         vm_map_offset_t start)
14623 {
14624         vm_map_entry_t  this_entry;
14625
14626         vm_map_lock(map);
14627         if (vm_map_lookup_entry(map, start, &this_entry)) {
14628                 vm_map_simplify_entry(map, this_entry);
14629                 vm_map_simplify_entry(map, this_entry->vme_next);
14630         }
14631         counter(c_vm_map_simplify_called++);
14632         vm_map_unlock(map);
14633 }
14634
14635 static void
14636 vm_map_simplify_range(
14637         vm_map_t        map,
14638         vm_map_offset_t start,
14639         vm_map_offset_t end)
14640 {
14641         vm_map_entry_t  entry;
14642
14643         /*
14644          * The map should be locked (for "write") by the caller.
14645          */
14646
14647         if (start >= end) {
14648                 /* invalid address range */
14649                 return;
14650         }
14651
14652         start = vm_map_trunc_page(start,
14653             VM_MAP_PAGE_MASK(map));
14654         end = vm_map_round_page(end,
14655             VM_MAP_PAGE_MASK(map));
14656
14657         if (!vm_map_lookup_entry(map, start, &entry)) {
14658                 /* "start" is not mapped and "entry" ends before "start" */
14659                 if (entry == vm_map_to_entry(map)) {
14660                         /* start with first entry in the map */
14661                         entry = vm_map_first_entry(map);
14662                 } else {
14663                         /* start with next entry */
14664                         entry = entry->vme_next;
14665                 }
14666         }
14667
14668         while (entry != vm_map_to_entry(map) &&
14669             entry->vme_start <= end) {
14670                 /* try and coalesce "entry" with its previous entry */
14671                 vm_map_simplify_entry(map, entry);
14672                 entry = entry->vme_next;
14673         }
14674 }
14675
14676
14677 /*
14678  *      Routine:        vm_map_machine_attribute
14679  *      Purpose:
14680  *              Provide machine-specific attributes to mappings,
14681  *              such as cachability etc. for machines that provide
14682  *              them.  NUMA architectures and machines with big/strange
14683  *              caches will use this.
14684  *      Note:
14685  *              Responsibilities for locking and checking are handled here,
14686  *              everything else in the pmap module. If any non-volatile
14687  *              information must be kept, the pmap module should handle
14688  *              it itself. [This assumes that attributes do not
14689  *              need to be inherited, which seems ok to me]
14690  */
14691 kern_return_t
14692 vm_map_machine_attribute(
14693         vm_map_t                        map,
14694         vm_map_offset_t         start,
14695         vm_map_offset_t         end,
14696         vm_machine_attribute_t  attribute,
14697         vm_machine_attribute_val_t* value)              /* IN/OUT */
14698 {
14699         kern_return_t   ret;
14700         vm_map_size_t sync_size;
14701         vm_map_entry_t entry;
14702
14703         if (start < vm_map_min(map) || end > vm_map_max(map)) {
14704                 return KERN_INVALID_ADDRESS;
14705         }
14706
14707         /* Figure how much memory we need to flush (in page increments) */
14708         sync_size = end - start;
14709
14710         vm_map_lock(map);
14711
14712         if (attribute != MATTR_CACHE) {
14713                 /* If we don't have to find physical addresses, we */
14714                 /* don't have to do an explicit traversal here.    */
14715                 ret = pmap_attribute(map->pmap, start, end - start,
14716                     attribute, value);
14717                 vm_map_unlock(map);
14718                 return ret;
14719         }
14720
14721         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
14722
14723         while (sync_size) {
14724                 if (vm_map_lookup_entry(map, start, &entry)) {
14725                         vm_map_size_t   sub_size;
14726                         if ((entry->vme_end - start) > sync_size) {
14727                                 sub_size = sync_size;
14728                                 sync_size = 0;
14729                         } else {
14730                                 sub_size = entry->vme_end - start;
14731                                 sync_size -= sub_size;
14732                         }
14733                         if (entry->is_sub_map) {
14734                                 vm_map_offset_t sub_start;
14735                                 vm_map_offset_t sub_end;
14736
14737                                 sub_start = (start - entry->vme_start)
14738                                     + VME_OFFSET(entry);
14739                                 sub_end = sub_start + sub_size;
14740                                 vm_map_machine_attribute(
14741                                         VME_SUBMAP(entry),
14742                                         sub_start,
14743                                         sub_end,
14744                                         attribute, value);
14745                         } else {
14746                                 if (VME_OBJECT(entry)) {
14747                                         vm_page_t               m;
14748                                         vm_object_t             object;
14749                                         vm_object_t             base_object;
14750                                         vm_object_t             last_object;
14751                                         vm_object_offset_t      offset;
14752                                         vm_object_offset_t      base_offset;
14753                                         vm_map_size_t           range;
14754                                         range = sub_size;
14755                                         offset = (start - entry->vme_start)
14756                                             + VME_OFFSET(entry);
14757                                         base_offset = offset;
14758                                         object = VME_OBJECT(entry);
14759                                         base_object = object;
14760                                         last_object = NULL;
14761
14762                                         vm_object_lock(object);
14763
14764                                         while (range) {
14765                                                 m = vm_page_lookup(
14766                                                         object, offset);
14767
14768                                                 if (m && !m->vmp_fictitious) {
14769                                                         ret =
14770                                                             pmap_attribute_cache_sync(
14771                                                                 VM_PAGE_GET_PHYS_PAGE(m),
14772                                                                 PAGE_SIZE,
14773                                                                 attribute, value);
14774                                                 } else if (object->shadow) {
14775                                                         offset = offset + object->vo_shadow_offset;
14776                                                         last_object = object;
14777                                                         object = object->shadow;
14778                                                         vm_object_lock(last_object->shadow);
14779                                                         vm_object_unlock(last_object);
14780                                                         continue;
14781                                                 }
14782                                                 range -= PAGE_SIZE;
14783
14784                                                 if (base_object != object) {
14785                                                         vm_object_unlock(object);
14786                                                         vm_object_lock(base_object);
14787                                                         object = base_object;
14788                                                 }
14789                                                 /* Bump to the next page */
14790                                                 base_offset += PAGE_SIZE;
14791                                                 offset = base_offset;
14792                                         }
14793                                         vm_object_unlock(object);
14794                                 }
14795                         }
14796                         start += sub_size;
14797                 } else {
14798                         vm_map_unlock(map);
14799                         return KERN_FAILURE;
14800                 }
14801         }
14802
14803         vm_map_unlock(map);
14804
14805         return ret;
14806 }
14807
14808 /*
14809  *      vm_map_behavior_set:
14810  *
14811  *      Sets the paging reference behavior of the specified address
14812  *      range in the target map.  Paging reference behavior affects
14813  *      how pagein operations resulting from faults on the map will be
14814  *      clustered.
14815  */
14816 kern_return_t
14817 vm_map_behavior_set(
14818         vm_map_t        map,
14819         vm_map_offset_t start,
14820         vm_map_offset_t end,
14821         vm_behavior_t   new_behavior)
14822 {
14823         vm_map_entry_t  entry;
14824         vm_map_entry_t  temp_entry;
14825
14826         if (start > end ||
14827             start < vm_map_min(map) ||
14828             end > vm_map_max(map)) {
14829                 return KERN_NO_SPACE;
14830         }
14831
14832         switch (new_behavior) {
14833         /*
14834          * This first block of behaviors all set a persistent state on the specified
14835          * memory range.  All we have to do here is to record the desired behavior
14836          * in the vm_map_entry_t's.
14837          */
14838
14839         case VM_BEHAVIOR_DEFAULT:
14840         case VM_BEHAVIOR_RANDOM:
14841         case VM_BEHAVIOR_SEQUENTIAL:
14842         case VM_BEHAVIOR_RSEQNTL:
14843         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14844                 vm_map_lock(map);
14845
14846                 /*
14847                  *      The entire address range must be valid for the map.
14848                  *      Note that vm_map_range_check() does a
14849                  *      vm_map_lookup_entry() internally and returns the
14850                  *      entry containing the start of the address range if
14851                  *      the entire range is valid.
14852                  */
14853                 if (vm_map_range_check(map, start, end, &temp_entry)) {
14854                         entry = temp_entry;
14855                         vm_map_clip_start(map, entry, start);
14856                 } else {
14857                         vm_map_unlock(map);
14858                         return KERN_INVALID_ADDRESS;
14859                 }
14860
14861                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14862                         vm_map_clip_end(map, entry, end);
14863                         if (entry->is_sub_map) {
14864                                 assert(!entry->use_pmap);
14865                         }
14866
14867                         if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
14868                                 entry->zero_wired_pages = TRUE;
14869                         } else {
14870                                 entry->behavior = new_behavior;
14871                         }
14872                         entry = entry->vme_next;
14873                 }
14874
14875                 vm_map_unlock(map);
14876                 break;
14877
14878         /*
14879          * The rest of these are different from the above in that they cause
14880          * an immediate action to take place as opposed to setting a behavior that
14881          * affects future actions.
14882          */
14883
14884         case VM_BEHAVIOR_WILLNEED:
14885                 return vm_map_willneed(map, start, end);
14886
14887         case VM_BEHAVIOR_DONTNEED:
14888                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14889
14890         case VM_BEHAVIOR_FREE:
14891                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14892
14893         case VM_BEHAVIOR_REUSABLE:
14894                 return vm_map_reusable_pages(map, start, end);
14895
14896         case VM_BEHAVIOR_REUSE:
14897                 return vm_map_reuse_pages(map, start, end);
14898
14899         case VM_BEHAVIOR_CAN_REUSE:
14900                 return vm_map_can_reuse(map, start, end);
14901
14902 #if MACH_ASSERT
14903         case VM_BEHAVIOR_PAGEOUT:
14904                 return vm_map_pageout(map, start, end);
14905 #endif /* MACH_ASSERT */
14906
14907         default:
14908                 return KERN_INVALID_ARGUMENT;
14909         }
14910
14911         return KERN_SUCCESS;
14912 }
14913
14914
14915 /*
14916  * Internals for madvise(MADV_WILLNEED) system call.
14917  *
14918  * The implementation is to do:-
14919  * a) read-ahead if the mapping corresponds to a mapped regular file
14920  * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14921  */
14922
14923
14924 static kern_return_t
14925 vm_map_willneed(
14926         vm_map_t        map,
14927         vm_map_offset_t start,
14928         vm_map_offset_t end
14929         )
14930 {
14931         vm_map_entry_t                  entry;
14932         vm_object_t                     object;
14933         memory_object_t                 pager;
14934         struct vm_object_fault_info     fault_info = {};
14935         kern_return_t                   kr;
14936         vm_object_size_t                len;
14937         vm_object_offset_t              offset;
14938
14939         fault_info.interruptible = THREAD_UNINT;        /* ignored value */
14940         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
14941         fault_info.stealth       = TRUE;
14942
14943         /*
14944          * The MADV_WILLNEED operation doesn't require any changes to the
14945          * vm_map_entry_t's, so the read lock is sufficient.
14946          */
14947
14948         vm_map_lock_read(map);
14949
14950         /*
14951          * The madvise semantics require that the address range be fully
14952          * allocated with no holes.  Otherwise, we're required to return
14953          * an error.
14954          */
14955
14956         if (!vm_map_range_check(map, start, end, &entry)) {
14957                 vm_map_unlock_read(map);
14958                 return KERN_INVALID_ADDRESS;
14959         }
14960
14961         /*
14962          * Examine each vm_map_entry_t in the range.
14963          */
14964         for (; entry != vm_map_to_entry(map) && start < end;) {
14965                 /*
14966                  * The first time through, the start address could be anywhere
14967                  * within the vm_map_entry we found.  So adjust the offset to
14968                  * correspond.  After that, the offset will always be zero to
14969                  * correspond to the beginning of the current vm_map_entry.
14970                  */
14971                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14972
14973                 /*
14974                  * Set the length so we don't go beyond the end of the
14975                  * map_entry or beyond the end of the range we were given.
14976                  * This range could span also multiple map entries all of which
14977                  * map different files, so make sure we only do the right amount
14978                  * of I/O for each object.  Note that it's possible for there
14979                  * to be multiple map entries all referring to the same object
14980                  * but with different page permissions, but it's not worth
14981                  * trying to optimize that case.
14982                  */
14983                 len = MIN(entry->vme_end - start, end - start);
14984
14985                 if ((vm_size_t) len != len) {
14986                         /* 32-bit overflow */
14987                         len = (vm_size_t) (0 - PAGE_SIZE);
14988                 }
14989                 fault_info.cluster_size = (vm_size_t) len;
14990                 fault_info.lo_offset    = offset;
14991                 fault_info.hi_offset    = offset + len;
14992                 fault_info.user_tag     = VME_ALIAS(entry);
14993                 fault_info.pmap_options = 0;
14994                 if (entry->iokit_acct ||
14995                     (!entry->is_sub_map && !entry->use_pmap)) {
14996                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14997                 }
14998
14999                 /*
15000                  * If the entry is a submap OR there's no read permission
15001                  * to this mapping, then just skip it.
15002                  */
15003                 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15004                         entry = entry->vme_next;
15005                         start = entry->vme_start;
15006                         continue;
15007                 }
15008
15009                 object = VME_OBJECT(entry);
15010
15011                 if (object == NULL ||
15012                     (object && object->internal)) {
15013                         /*
15014                          * Memory range backed by anonymous memory.
15015                          */
15016                         vm_size_t region_size = 0, effective_page_size = 0;
15017                         vm_map_offset_t addr = 0, effective_page_mask = 0;
15018
15019                         region_size = len;
15020                         addr = start;
15021
15022                         effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15023                         effective_page_size = effective_page_mask + 1;
15024
15025                         vm_map_unlock_read(map);
15026
15027                         while (region_size) {
15028                                 vm_pre_fault(
15029                                         vm_map_trunc_page(addr, effective_page_mask),
15030                                         VM_PROT_READ | VM_PROT_WRITE);
15031
15032                                 region_size -= effective_page_size;
15033                                 addr += effective_page_size;
15034                         }
15035                 } else {
15036                         /*
15037                          * Find the file object backing this map entry.  If there is
15038                          * none, then we simply ignore the "will need" advice for this
15039                          * entry and go on to the next one.
15040                          */
15041                         if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15042                                 entry = entry->vme_next;
15043                                 start = entry->vme_start;
15044                                 continue;
15045                         }
15046
15047                         vm_object_paging_begin(object);
15048                         pager = object->pager;
15049                         vm_object_unlock(object);
15050
15051                         /*
15052                          * The data_request() could take a long time, so let's
15053                          * release the map lock to avoid blocking other threads.
15054                          */
15055                         vm_map_unlock_read(map);
15056
15057                         /*
15058                          * Get the data from the object asynchronously.
15059                          *
15060                          * Note that memory_object_data_request() places limits on the
15061                          * amount of I/O it will do.  Regardless of the len we
15062                          * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15063                          * silently truncates the len to that size.  This isn't
15064                          * necessarily bad since madvise shouldn't really be used to
15065                          * page in unlimited amounts of data.  Other Unix variants
15066                          * limit the willneed case as well.  If this turns out to be an
15067                          * issue for developers, then we can always adjust the policy
15068                          * here and still be backwards compatible since this is all
15069                          * just "advice".
15070                          */
15071                         kr = memory_object_data_request(
15072                                 pager,
15073                                 offset + object->paging_offset,
15074                                 0,      /* ignored */
15075                                 VM_PROT_READ,
15076                                 (memory_object_fault_info_t)&fault_info);
15077
15078                         vm_object_lock(object);
15079                         vm_object_paging_end(object);
15080                         vm_object_unlock(object);
15081
15082                         /*
15083                          * If we couldn't do the I/O for some reason, just give up on
15084                          * the madvise.  We still return success to the user since
15085                          * madvise isn't supposed to fail when the advice can't be
15086                          * taken.
15087                          */
15088
15089                         if (kr != KERN_SUCCESS) {
15090                                 return KERN_SUCCESS;
15091                         }
15092                 }
15093
15094                 start += len;
15095                 if (start >= end) {
15096                         /* done */
15097                         return KERN_SUCCESS;
15098                 }
15099
15100                 /* look up next entry */
15101                 vm_map_lock_read(map);
15102                 if (!vm_map_lookup_entry(map, start, &entry)) {
15103                         /*
15104                          * There's a new hole in the address range.
15105                          */
15106                         vm_map_unlock_read(map);
15107                         return KERN_INVALID_ADDRESS;
15108                 }
15109         }
15110
15111         vm_map_unlock_read(map);
15112         return KERN_SUCCESS;
15113 }
15114
15115 static boolean_t
15116 vm_map_entry_is_reusable(
15117         vm_map_entry_t entry)
15118 {
15119         /* Only user map entries */
15120
15121         vm_object_t object;
15122
15123         if (entry->is_sub_map) {
15124                 return FALSE;
15125         }
15126
15127         switch (VME_ALIAS(entry)) {
15128         case VM_MEMORY_MALLOC:
15129         case VM_MEMORY_MALLOC_SMALL:
15130         case VM_MEMORY_MALLOC_LARGE:
15131         case VM_MEMORY_REALLOC:
15132         case VM_MEMORY_MALLOC_TINY:
15133         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15134         case VM_MEMORY_MALLOC_LARGE_REUSED:
15135                 /*
15136                  * This is a malloc() memory region: check if it's still
15137                  * in its original state and can be re-used for more
15138                  * malloc() allocations.
15139                  */
15140                 break;
15141         default:
15142                 /*
15143                  * Not a malloc() memory region: let the caller decide if
15144                  * it's re-usable.
15145                  */
15146                 return TRUE;
15147         }
15148
15149         if (/*entry->is_shared ||*/
15150                 entry->is_sub_map ||
15151                 entry->in_transition ||
15152                 entry->protection != VM_PROT_DEFAULT ||
15153                 entry->max_protection != VM_PROT_ALL ||
15154                 entry->inheritance != VM_INHERIT_DEFAULT ||
15155                 entry->no_cache ||
15156                 entry->permanent ||
15157                 entry->superpage_size != FALSE ||
15158                 entry->zero_wired_pages ||
15159                 entry->wired_count != 0 ||
15160                 entry->user_wired_count != 0) {
15161                 return FALSE;
15162         }
15163
15164         object = VME_OBJECT(entry);
15165         if (object == VM_OBJECT_NULL) {
15166                 return TRUE;
15167         }
15168         if (
15169 #if 0
15170                 /*
15171                  * Let's proceed even if the VM object is potentially
15172                  * shared.
15173                  * We check for this later when processing the actual
15174                  * VM pages, so the contents will be safe if shared.
15175                  *
15176                  * But we can still mark this memory region as "reusable" to
15177                  * acknowledge that the caller did let us know that the memory
15178                  * could be re-used and should not be penalized for holding
15179                  * on to it.  This allows its "resident size" to not include
15180                  * the reusable range.
15181                  */
15182                 object->ref_count == 1 &&
15183 #endif
15184                 object->wired_page_count == 0 &&
15185                 object->copy == VM_OBJECT_NULL &&
15186                 object->shadow == VM_OBJECT_NULL &&
15187                 object->internal &&
15188                 object->purgable == VM_PURGABLE_DENY &&
15189                 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15190                 !object->true_share &&
15191                 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15192                 !object->code_signed) {
15193                 return TRUE;
15194         }
15195         return FALSE;
15196 }
15197
15198 static kern_return_t
15199 vm_map_reuse_pages(
15200         vm_map_t        map,
15201         vm_map_offset_t start,
15202         vm_map_offset_t end)
15203 {
15204         vm_map_entry_t                  entry;
15205         vm_object_t                     object;
15206         vm_object_offset_t              start_offset, end_offset;
15207
15208         /*
15209          * The MADV_REUSE operation doesn't require any changes to the
15210          * vm_map_entry_t's, so the read lock is sufficient.
15211          */
15212
15213         vm_map_lock_read(map);
15214         assert(map->pmap != kernel_pmap);       /* protect alias access */
15215
15216         /*
15217          * The madvise semantics require that the address range be fully
15218          * allocated with no holes.  Otherwise, we're required to return
15219          * an error.
15220          */
15221
15222         if (!vm_map_range_check(map, start, end, &entry)) {
15223                 vm_map_unlock_read(map);
15224                 vm_page_stats_reusable.reuse_pages_failure++;
15225                 return KERN_INVALID_ADDRESS;
15226         }
15227
15228         /*
15229          * Examine each vm_map_entry_t in the range.
15230          */
15231         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15232             entry = entry->vme_next) {
15233                 /*
15234                  * Sanity check on the VM map entry.
15235                  */
15236                 if (!vm_map_entry_is_reusable(entry)) {
15237                         vm_map_unlock_read(map);
15238                         vm_page_stats_reusable.reuse_pages_failure++;
15239                         return KERN_INVALID_ADDRESS;
15240                 }
15241
15242                 /*
15243                  * The first time through, the start address could be anywhere
15244                  * within the vm_map_entry we found.  So adjust the offset to
15245                  * correspond.
15246                  */
15247                 if (entry->vme_start < start) {
15248                         start_offset = start - entry->vme_start;
15249                 } else {
15250                         start_offset = 0;
15251                 }
15252                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15253                 start_offset += VME_OFFSET(entry);
15254                 end_offset += VME_OFFSET(entry);
15255
15256                 assert(!entry->is_sub_map);
15257                 object = VME_OBJECT(entry);
15258                 if (object != VM_OBJECT_NULL) {
15259                         vm_object_lock(object);
15260                         vm_object_reuse_pages(object, start_offset, end_offset,
15261                             TRUE);
15262                         vm_object_unlock(object);
15263                 }
15264
15265                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15266                         /*
15267                          * XXX
15268                          * We do not hold the VM map exclusively here.
15269                          * The "alias" field is not that critical, so it's
15270                          * safe to update it here, as long as it is the only
15271                          * one that can be modified while holding the VM map
15272                          * "shared".
15273                          */
15274                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15275                 }
15276         }
15277
15278         vm_map_unlock_read(map);
15279         vm_page_stats_reusable.reuse_pages_success++;
15280         return KERN_SUCCESS;
15281 }
15282
15283
15284 static kern_return_t
15285 vm_map_reusable_pages(
15286         vm_map_t        map,
15287         vm_map_offset_t start,
15288         vm_map_offset_t end)
15289 {
15290         vm_map_entry_t                  entry;
15291         vm_object_t                     object;
15292         vm_object_offset_t              start_offset, end_offset;
15293         vm_map_offset_t                 pmap_offset;
15294
15295         /*
15296          * The MADV_REUSABLE operation doesn't require any changes to the
15297          * vm_map_entry_t's, so the read lock is sufficient.
15298          */
15299
15300         vm_map_lock_read(map);
15301         assert(map->pmap != kernel_pmap);       /* protect alias access */
15302
15303         /*
15304          * The madvise semantics require that the address range be fully
15305          * allocated with no holes.  Otherwise, we're required to return
15306          * an error.
15307          */
15308
15309         if (!vm_map_range_check(map, start, end, &entry)) {
15310                 vm_map_unlock_read(map);
15311                 vm_page_stats_reusable.reusable_pages_failure++;
15312                 return KERN_INVALID_ADDRESS;
15313         }
15314
15315         /*
15316          * Examine each vm_map_entry_t in the range.
15317          */
15318         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15319             entry = entry->vme_next) {
15320                 int kill_pages = 0;
15321
15322                 /*
15323                  * Sanity check on the VM map entry.
15324                  */
15325                 if (!vm_map_entry_is_reusable(entry)) {
15326                         vm_map_unlock_read(map);
15327                         vm_page_stats_reusable.reusable_pages_failure++;
15328                         return KERN_INVALID_ADDRESS;
15329                 }
15330
15331                 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15332                         /* not writable: can't discard contents */
15333                         vm_map_unlock_read(map);
15334                         vm_page_stats_reusable.reusable_nonwritable++;
15335                         vm_page_stats_reusable.reusable_pages_failure++;
15336                         return KERN_PROTECTION_FAILURE;
15337                 }
15338
15339                 /*
15340                  * The first time through, the start address could be anywhere
15341                  * within the vm_map_entry we found.  So adjust the offset to
15342                  * correspond.
15343                  */
15344                 if (entry->vme_start < start) {
15345                         start_offset = start - entry->vme_start;
15346                         pmap_offset = start;
15347                 } else {
15348                         start_offset = 0;
15349                         pmap_offset = entry->vme_start;
15350                 }
15351                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15352                 start_offset += VME_OFFSET(entry);
15353                 end_offset += VME_OFFSET(entry);
15354
15355                 assert(!entry->is_sub_map);
15356                 object = VME_OBJECT(entry);
15357                 if (object == VM_OBJECT_NULL) {
15358                         continue;
15359                 }
15360
15361
15362                 vm_object_lock(object);
15363                 if (((object->ref_count == 1) ||
15364                     (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15365                     object->copy == VM_OBJECT_NULL)) &&
15366                     object->shadow == VM_OBJECT_NULL &&
15367                     /*
15368                      * "iokit_acct" entries are billed for their virtual size
15369                      * (rather than for their resident pages only), so they
15370                      * wouldn't benefit from making pages reusable, and it
15371                      * would be hard to keep track of pages that are both
15372                      * "iokit_acct" and "reusable" in the pmap stats and
15373                      * ledgers.
15374                      */
15375                     !(entry->iokit_acct ||
15376                     (!entry->is_sub_map && !entry->use_pmap))) {
15377                         if (object->ref_count != 1) {
15378                                 vm_page_stats_reusable.reusable_shared++;
15379                         }
15380                         kill_pages = 1;
15381                 } else {
15382                         kill_pages = -1;
15383                 }
15384                 if (kill_pages != -1) {
15385                         vm_object_deactivate_pages(object,
15386                             start_offset,
15387                             end_offset - start_offset,
15388                             kill_pages,
15389                             TRUE /*reusable_pages*/,
15390                             map->pmap,
15391                             pmap_offset);
15392                 } else {
15393                         vm_page_stats_reusable.reusable_pages_shared++;
15394                 }
15395                 vm_object_unlock(object);
15396
15397                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15398                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15399                         /*
15400                          * XXX
15401                          * We do not hold the VM map exclusively here.
15402                          * The "alias" field is not that critical, so it's
15403                          * safe to update it here, as long as it is the only
15404                          * one that can be modified while holding the VM map
15405                          * "shared".
15406                          */
15407                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15408                 }
15409         }
15410
15411         vm_map_unlock_read(map);
15412         vm_page_stats_reusable.reusable_pages_success++;
15413         return KERN_SUCCESS;
15414 }
15415
15416
15417 static kern_return_t
15418 vm_map_can_reuse(
15419         vm_map_t        map,
15420         vm_map_offset_t start,
15421         vm_map_offset_t end)
15422 {
15423         vm_map_entry_t                  entry;
15424
15425         /*
15426          * The MADV_REUSABLE operation doesn't require any changes to the
15427          * vm_map_entry_t's, so the read lock is sufficient.
15428          */
15429
15430         vm_map_lock_read(map);
15431         assert(map->pmap != kernel_pmap);       /* protect alias access */
15432
15433         /*
15434          * The madvise semantics require that the address range be fully
15435          * allocated with no holes.  Otherwise, we're required to return
15436          * an error.
15437          */
15438
15439         if (!vm_map_range_check(map, start, end, &entry)) {
15440                 vm_map_unlock_read(map);
15441                 vm_page_stats_reusable.can_reuse_failure++;
15442                 return KERN_INVALID_ADDRESS;
15443         }
15444
15445         /*
15446          * Examine each vm_map_entry_t in the range.
15447          */
15448         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15449             entry = entry->vme_next) {
15450                 /*
15451                  * Sanity check on the VM map entry.
15452                  */
15453                 if (!vm_map_entry_is_reusable(entry)) {
15454                         vm_map_unlock_read(map);
15455                         vm_page_stats_reusable.can_reuse_failure++;
15456                         return KERN_INVALID_ADDRESS;
15457                 }
15458         }
15459
15460         vm_map_unlock_read(map);
15461         vm_page_stats_reusable.can_reuse_success++;
15462         return KERN_SUCCESS;
15463 }
15464
15465
15466 #if MACH_ASSERT
15467 static kern_return_t
15468 vm_map_pageout(
15469         vm_map_t        map,
15470         vm_map_offset_t start,
15471         vm_map_offset_t end)
15472 {
15473         vm_map_entry_t                  entry;
15474
15475         /*
15476          * The MADV_PAGEOUT operation doesn't require any changes to the
15477          * vm_map_entry_t's, so the read lock is sufficient.
15478          */
15479
15480         vm_map_lock_read(map);
15481
15482         /*
15483          * The madvise semantics require that the address range be fully
15484          * allocated with no holes.  Otherwise, we're required to return
15485          * an error.
15486          */
15487
15488         if (!vm_map_range_check(map, start, end, &entry)) {
15489                 vm_map_unlock_read(map);
15490                 return KERN_INVALID_ADDRESS;
15491         }
15492
15493         /*
15494          * Examine each vm_map_entry_t in the range.
15495          */
15496         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15497             entry = entry->vme_next) {
15498                 vm_object_t     object;
15499
15500                 /*
15501                  * Sanity check on the VM map entry.
15502                  */
15503                 if (entry->is_sub_map) {
15504                         vm_map_t submap;
15505                         vm_map_offset_t submap_start;
15506                         vm_map_offset_t submap_end;
15507                         vm_map_entry_t submap_entry;
15508
15509                         submap = VME_SUBMAP(entry);
15510                         submap_start = VME_OFFSET(entry);
15511                         submap_end = submap_start + (entry->vme_end -
15512                             entry->vme_start);
15513
15514                         vm_map_lock_read(submap);
15515
15516                         if (!vm_map_range_check(submap,
15517                             submap_start,
15518                             submap_end,
15519                             &submap_entry)) {
15520                                 vm_map_unlock_read(submap);
15521                                 vm_map_unlock_read(map);
15522                                 return KERN_INVALID_ADDRESS;
15523                         }
15524
15525                         object = VME_OBJECT(submap_entry);
15526                         if (submap_entry->is_sub_map ||
15527                             object == VM_OBJECT_NULL ||
15528                             !object->internal) {
15529                                 vm_map_unlock_read(submap);
15530                                 continue;
15531                         }
15532
15533                         vm_object_pageout(object);
15534
15535                         vm_map_unlock_read(submap);
15536                         submap = VM_MAP_NULL;
15537                         submap_entry = VM_MAP_ENTRY_NULL;
15538                         continue;
15539                 }
15540
15541                 object = VME_OBJECT(entry);
15542                 if (entry->is_sub_map ||
15543                     object == VM_OBJECT_NULL ||
15544                     !object->internal) {
15545                         continue;
15546                 }
15547
15548                 vm_object_pageout(object);
15549         }
15550
15551         vm_map_unlock_read(map);
15552         return KERN_SUCCESS;
15553 }
15554 #endif /* MACH_ASSERT */
15555
15556
15557 /*
15558  *      Routine:        vm_map_entry_insert
15559  *
15560  *      Description:    This routine inserts a new vm_entry in a locked map.
15561  */
15562 vm_map_entry_t
15563 vm_map_entry_insert(
15564         vm_map_t                map,
15565         vm_map_entry_t          insp_entry,
15566         vm_map_offset_t         start,
15567         vm_map_offset_t         end,
15568         vm_object_t             object,
15569         vm_object_offset_t      offset,
15570         boolean_t               needs_copy,
15571         boolean_t               is_shared,
15572         boolean_t               in_transition,
15573         vm_prot_t               cur_protection,
15574         vm_prot_t               max_protection,
15575         vm_behavior_t           behavior,
15576         vm_inherit_t            inheritance,
15577         unsigned                wired_count,
15578         boolean_t               no_cache,
15579         boolean_t               permanent,
15580         boolean_t               no_copy_on_read,
15581         unsigned int            superpage_size,
15582         boolean_t               clear_map_aligned,
15583         boolean_t               is_submap,
15584         boolean_t               used_for_jit,
15585         int                     alias)
15586 {
15587         vm_map_entry_t  new_entry;
15588
15589         assert(insp_entry != (vm_map_entry_t)0);
15590         vm_map_lock_assert_exclusive(map);
15591
15592 #if DEVELOPMENT || DEBUG
15593         vm_object_offset_t      end_offset = 0;
15594         assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15595 #endif /* DEVELOPMENT || DEBUG */
15596
15597         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15598
15599         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15600                 new_entry->map_aligned = TRUE;
15601         } else {
15602                 new_entry->map_aligned = FALSE;
15603         }
15604         if (clear_map_aligned &&
15605             (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15606             !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15607                 new_entry->map_aligned = FALSE;
15608         }
15609
15610         new_entry->vme_start = start;
15611         new_entry->vme_end = end;
15612         assert(page_aligned(new_entry->vme_start));
15613         assert(page_aligned(new_entry->vme_end));
15614         if (new_entry->map_aligned) {
15615                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15616                     VM_MAP_PAGE_MASK(map)));
15617                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15618                     VM_MAP_PAGE_MASK(map)));
15619         }
15620         assert(new_entry->vme_start < new_entry->vme_end);
15621
15622         VME_OBJECT_SET(new_entry, object);
15623         VME_OFFSET_SET(new_entry, offset);
15624         new_entry->is_shared = is_shared;
15625         new_entry->is_sub_map = is_submap;
15626         new_entry->needs_copy = needs_copy;
15627         new_entry->in_transition = in_transition;
15628         new_entry->needs_wakeup = FALSE;
15629         new_entry->inheritance = inheritance;
15630         new_entry->protection = cur_protection;
15631         new_entry->max_protection = max_protection;
15632         new_entry->behavior = behavior;
15633         new_entry->wired_count = wired_count;
15634         new_entry->user_wired_count = 0;
15635         if (is_submap) {
15636                 /*
15637                  * submap: "use_pmap" means "nested".
15638                  * default: false.
15639                  */
15640                 new_entry->use_pmap = FALSE;
15641         } else {
15642                 /*
15643                  * object: "use_pmap" means "use pmap accounting" for footprint.
15644                  * default: true.
15645                  */
15646                 new_entry->use_pmap = TRUE;
15647         }
15648         VME_ALIAS_SET(new_entry, alias);
15649         new_entry->zero_wired_pages = FALSE;
15650         new_entry->no_cache = no_cache;
15651         new_entry->permanent = permanent;
15652         if (superpage_size) {
15653                 new_entry->superpage_size = TRUE;
15654         } else {
15655                 new_entry->superpage_size = FALSE;
15656         }
15657         if (used_for_jit) {
15658 #if CONFIG_EMBEDDED
15659                 if (!(map->jit_entry_exists))
15660 #endif /* CONFIG_EMBEDDED */
15661                 {
15662                         new_entry->used_for_jit = TRUE;
15663                         map->jit_entry_exists = TRUE;
15664                 }
15665         } else {
15666                 new_entry->used_for_jit = FALSE;
15667         }
15668         new_entry->pmap_cs_associated = FALSE;
15669         new_entry->iokit_acct = FALSE;
15670         new_entry->vme_resilient_codesign = FALSE;
15671         new_entry->vme_resilient_media = FALSE;
15672         new_entry->vme_atomic = FALSE;
15673         new_entry->vme_no_copy_on_read = no_copy_on_read;
15674
15675         /*
15676          *      Insert the new entry into the list.
15677          */
15678
15679         vm_map_store_entry_link(map, insp_entry, new_entry,
15680             VM_MAP_KERNEL_FLAGS_NONE);
15681         map->size += end - start;
15682
15683         /*
15684          *      Update the free space hint and the lookup hint.
15685          */
15686
15687         SAVE_HINT_MAP_WRITE(map, new_entry);
15688         return new_entry;
15689 }
15690
15691 /*
15692  *      Routine:        vm_map_remap_extract
15693  *
15694  *      Descritpion:    This routine returns a vm_entry list from a map.
15695  */
15696 static kern_return_t
15697 vm_map_remap_extract(
15698         vm_map_t                map,
15699         vm_map_offset_t         addr,
15700         vm_map_size_t           size,
15701         boolean_t               copy,
15702         struct vm_map_header    *map_header,
15703         vm_prot_t               *cur_protection,
15704         vm_prot_t               *max_protection,
15705         /* What, no behavior? */
15706         vm_inherit_t            inheritance,
15707         boolean_t               pageable,
15708         boolean_t               same_map,
15709         vm_map_kernel_flags_t   vmk_flags)
15710 {
15711         kern_return_t           result;
15712         vm_map_size_t           mapped_size;
15713         vm_map_size_t           tmp_size;
15714         vm_map_entry_t          src_entry;     /* result of last map lookup */
15715         vm_map_entry_t          new_entry;
15716         vm_object_offset_t      offset;
15717         vm_map_offset_t         map_address;
15718         vm_map_offset_t         src_start;     /* start of entry to map */
15719         vm_map_offset_t         src_end;       /* end of region to be mapped */
15720         vm_object_t             object;
15721         vm_map_version_t        version;
15722         boolean_t               src_needs_copy;
15723         boolean_t               new_entry_needs_copy;
15724         vm_map_entry_t          saved_src_entry;
15725         boolean_t               src_entry_was_wired;
15726         vm_prot_t               max_prot_for_prot_copy;
15727
15728         assert(map != VM_MAP_NULL);
15729         assert(size != 0);
15730         assert(size == vm_map_round_page(size, PAGE_MASK));
15731         assert(inheritance == VM_INHERIT_NONE ||
15732             inheritance == VM_INHERIT_COPY ||
15733             inheritance == VM_INHERIT_SHARE);
15734
15735         /*
15736          *      Compute start and end of region.
15737          */
15738         src_start = vm_map_trunc_page(addr, PAGE_MASK);
15739         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15740
15741
15742         /*
15743          *      Initialize map_header.
15744          */
15745         map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15746         map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15747         map_header->nentries = 0;
15748         map_header->entries_pageable = pageable;
15749         map_header->page_shift = PAGE_SHIFT;
15750
15751         vm_map_store_init( map_header );
15752
15753         if (copy && vmk_flags.vmkf_remap_prot_copy) {
15754                 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15755         } else {
15756                 max_prot_for_prot_copy = VM_PROT_NONE;
15757         }
15758         *cur_protection = VM_PROT_ALL;
15759         *max_protection = VM_PROT_ALL;
15760
15761         map_address = 0;
15762         mapped_size = 0;
15763         result = KERN_SUCCESS;
15764
15765         /*
15766          *      The specified source virtual space might correspond to
15767          *      multiple map entries, need to loop on them.
15768          */
15769         vm_map_lock(map);
15770         while (mapped_size != size) {
15771                 vm_map_size_t   entry_size;
15772
15773                 /*
15774                  *      Find the beginning of the region.
15775                  */
15776                 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
15777                         result = KERN_INVALID_ADDRESS;
15778                         break;
15779                 }
15780
15781                 if (src_start < src_entry->vme_start ||
15782                     (mapped_size && src_start != src_entry->vme_start)) {
15783                         result = KERN_INVALID_ADDRESS;
15784                         break;
15785                 }
15786
15787                 tmp_size = size - mapped_size;
15788                 if (src_end > src_entry->vme_end) {
15789                         tmp_size -= (src_end - src_entry->vme_end);
15790                 }
15791
15792                 entry_size = (vm_map_size_t)(src_entry->vme_end -
15793                     src_entry->vme_start);
15794
15795                 if (src_entry->is_sub_map) {
15796                         vm_map_reference(VME_SUBMAP(src_entry));
15797                         object = VM_OBJECT_NULL;
15798                 } else {
15799                         object = VME_OBJECT(src_entry);
15800                         if (src_entry->iokit_acct) {
15801                                 /*
15802                                  * This entry uses "IOKit accounting".
15803                                  */
15804                         } else if (object != VM_OBJECT_NULL &&
15805                             (object->purgable != VM_PURGABLE_DENY ||
15806                             object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
15807                                 /*
15808                                  * Purgeable objects have their own accounting:
15809                                  * no pmap accounting for them.
15810                                  */
15811                                 assertf(!src_entry->use_pmap,
15812                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15813                                     map,
15814                                     src_entry,
15815                                     (uint64_t)src_entry->vme_start,
15816                                     (uint64_t)src_entry->vme_end,
15817                                     src_entry->protection,
15818                                     src_entry->max_protection,
15819                                     VME_ALIAS(src_entry));
15820                         } else {
15821                                 /*
15822                                  * Not IOKit or purgeable:
15823                                  * must be accounted by pmap stats.
15824                                  */
15825                                 assertf(src_entry->use_pmap,
15826                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15827                                     map,
15828                                     src_entry,
15829                                     (uint64_t)src_entry->vme_start,
15830                                     (uint64_t)src_entry->vme_end,
15831                                     src_entry->protection,
15832                                     src_entry->max_protection,
15833                                     VME_ALIAS(src_entry));
15834                         }
15835
15836                         if (object == VM_OBJECT_NULL) {
15837                                 object = vm_object_allocate(entry_size);
15838                                 VME_OFFSET_SET(src_entry, 0);
15839                                 VME_OBJECT_SET(src_entry, object);
15840                                 assert(src_entry->use_pmap);
15841                         } else if (object->copy_strategy !=
15842                             MEMORY_OBJECT_COPY_SYMMETRIC) {
15843                                 /*
15844                                  *      We are already using an asymmetric
15845                                  *      copy, and therefore we already have
15846                                  *      the right object.
15847                                  */
15848                                 assert(!src_entry->needs_copy);
15849                         } else if (src_entry->needs_copy || object->shadowed ||
15850                             (object->internal && !object->true_share &&
15851                             !src_entry->is_shared &&
15852                             object->vo_size > entry_size)) {
15853                                 VME_OBJECT_SHADOW(src_entry, entry_size);
15854                                 assert(src_entry->use_pmap);
15855
15856                                 if (!src_entry->needs_copy &&
15857                                     (src_entry->protection & VM_PROT_WRITE)) {
15858                                         vm_prot_t prot;
15859
15860                                         assert(!pmap_has_prot_policy(src_entry->protection));
15861
15862                                         prot = src_entry->protection & ~VM_PROT_WRITE;
15863
15864                                         if (override_nx(map,
15865                                             VME_ALIAS(src_entry))
15866                                             && prot) {
15867                                                 prot |= VM_PROT_EXECUTE;
15868                                         }
15869
15870                                         assert(!pmap_has_prot_policy(prot));
15871
15872                                         if (map->mapped_in_other_pmaps) {
15873                                                 vm_object_pmap_protect(
15874                                                         VME_OBJECT(src_entry),
15875                                                         VME_OFFSET(src_entry),
15876                                                         entry_size,
15877                                                         PMAP_NULL,
15878                                                         src_entry->vme_start,
15879                                                         prot);
15880                                         } else {
15881                                                 pmap_protect(vm_map_pmap(map),
15882                                                     src_entry->vme_start,
15883                                                     src_entry->vme_end,
15884                                                     prot);
15885                                         }
15886                                 }
15887
15888                                 object = VME_OBJECT(src_entry);
15889                                 src_entry->needs_copy = FALSE;
15890                         }
15891
15892
15893                         vm_object_lock(object);
15894                         vm_object_reference_locked(object); /* object ref. for new entry */
15895                         if (object->copy_strategy ==
15896                             MEMORY_OBJECT_COPY_SYMMETRIC) {
15897                                 object->copy_strategy =
15898                                     MEMORY_OBJECT_COPY_DELAY;
15899                         }
15900                         vm_object_unlock(object);
15901                 }
15902
15903                 offset = (VME_OFFSET(src_entry) +
15904                     (src_start - src_entry->vme_start));
15905
15906                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15907                 vm_map_entry_copy(new_entry, src_entry);
15908                 if (new_entry->is_sub_map) {
15909                         /* clr address space specifics */
15910                         new_entry->use_pmap = FALSE;
15911                 } else if (copy) {
15912                         /*
15913                          * We're dealing with a copy-on-write operation,
15914                          * so the resulting mapping should not inherit the
15915                          * original mapping's accounting settings.
15916                          * "use_pmap" should be reset to its default (TRUE)
15917                          * so that the new mapping gets accounted for in
15918                          * the task's memory footprint.
15919                          */
15920                         new_entry->use_pmap = TRUE;
15921                 }
15922                 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15923                 assert(!new_entry->iokit_acct);
15924
15925                 new_entry->map_aligned = FALSE;
15926
15927                 new_entry->vme_start = map_address;
15928                 new_entry->vme_end = map_address + tmp_size;
15929                 assert(new_entry->vme_start < new_entry->vme_end);
15930                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15931                         /*
15932                          * Remapping for vm_map_protect(VM_PROT_COPY)
15933                          * to convert a read-only mapping into a
15934                          * copy-on-write version of itself but
15935                          * with write access:
15936                          * keep the original inheritance and add
15937                          * VM_PROT_WRITE to the max protection.
15938                          */
15939                         new_entry->inheritance = src_entry->inheritance;
15940                         new_entry->protection &= max_prot_for_prot_copy;
15941                         new_entry->max_protection |= VM_PROT_WRITE;
15942                 } else {
15943                         new_entry->inheritance = inheritance;
15944                 }
15945                 VME_OFFSET_SET(new_entry, offset);
15946
15947                 /*
15948                  * The new region has to be copied now if required.
15949                  */
15950 RestartCopy:
15951                 if (!copy) {
15952                         if (src_entry->used_for_jit == TRUE) {
15953                                 if (same_map) {
15954                                 } else {
15955 #if CONFIG_EMBEDDED
15956                                         /*
15957                                          * Cannot allow an entry describing a JIT
15958                                          * region to be shared across address spaces.
15959                                          */
15960                                         result = KERN_INVALID_ARGUMENT;
15961                                         break;
15962 #endif /* CONFIG_EMBEDDED */
15963                                 }
15964                         }
15965
15966                         src_entry->is_shared = TRUE;
15967                         new_entry->is_shared = TRUE;
15968                         if (!(new_entry->is_sub_map)) {
15969                                 new_entry->needs_copy = FALSE;
15970                         }
15971                 } else if (src_entry->is_sub_map) {
15972                         /* make this a COW sub_map if not already */
15973                         assert(new_entry->wired_count == 0);
15974                         new_entry->needs_copy = TRUE;
15975                         object = VM_OBJECT_NULL;
15976                 } else if (src_entry->wired_count == 0 &&
15977                     vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
15978                     VME_OFFSET(new_entry),
15979                     (new_entry->vme_end -
15980                     new_entry->vme_start),
15981                     &src_needs_copy,
15982                     &new_entry_needs_copy)) {
15983                         new_entry->needs_copy = new_entry_needs_copy;
15984                         new_entry->is_shared = FALSE;
15985                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15986
15987                         /*
15988                          * Handle copy_on_write semantics.
15989                          */
15990                         if (src_needs_copy && !src_entry->needs_copy) {
15991                                 vm_prot_t prot;
15992
15993                                 assert(!pmap_has_prot_policy(src_entry->protection));
15994
15995                                 prot = src_entry->protection & ~VM_PROT_WRITE;
15996
15997                                 if (override_nx(map,
15998                                     VME_ALIAS(src_entry))
15999                                     && prot) {
16000                                         prot |= VM_PROT_EXECUTE;
16001                                 }
16002
16003                                 assert(!pmap_has_prot_policy(prot));
16004
16005                                 vm_object_pmap_protect(object,
16006                                     offset,
16007                                     entry_size,
16008                                     ((src_entry->is_shared
16009                                     || map->mapped_in_other_pmaps) ?
16010                                     PMAP_NULL : map->pmap),
16011                                     src_entry->vme_start,
16012                                     prot);
16013
16014                                 assert(src_entry->wired_count == 0);
16015                                 src_entry->needs_copy = TRUE;
16016                         }
16017                         /*
16018                          * Throw away the old object reference of the new entry.
16019                          */
16020                         vm_object_deallocate(object);
16021                 } else {
16022                         new_entry->is_shared = FALSE;
16023                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16024
16025                         src_entry_was_wired = (src_entry->wired_count > 0);
16026                         saved_src_entry = src_entry;
16027                         src_entry = VM_MAP_ENTRY_NULL;
16028
16029                         /*
16030                          * The map can be safely unlocked since we
16031                          * already hold a reference on the object.
16032                          *
16033                          * Record the timestamp of the map for later
16034                          * verification, and unlock the map.
16035                          */
16036                         version.main_timestamp = map->timestamp;
16037                         vm_map_unlock(map);     /* Increments timestamp once! */
16038
16039                         /*
16040                          * Perform the copy.
16041                          */
16042                         if (src_entry_was_wired > 0) {
16043                                 vm_object_lock(object);
16044                                 result = vm_object_copy_slowly(
16045                                         object,
16046                                         offset,
16047                                         (new_entry->vme_end -
16048                                         new_entry->vme_start),
16049                                         THREAD_UNINT,
16050                                         VME_OBJECT_PTR(new_entry));
16051
16052                                 VME_OFFSET_SET(new_entry, 0);
16053                                 new_entry->needs_copy = FALSE;
16054                         } else {
16055                                 vm_object_offset_t new_offset;
16056
16057                                 new_offset = VME_OFFSET(new_entry);
16058                                 result = vm_object_copy_strategically(
16059                                         object,
16060                                         offset,
16061                                         (new_entry->vme_end -
16062                                         new_entry->vme_start),
16063                                         VME_OBJECT_PTR(new_entry),
16064                                         &new_offset,
16065                                         &new_entry_needs_copy);
16066                                 if (new_offset != VME_OFFSET(new_entry)) {
16067                                         VME_OFFSET_SET(new_entry, new_offset);
16068                                 }
16069
16070                                 new_entry->needs_copy = new_entry_needs_copy;
16071                         }
16072
16073                         /*
16074                          * Throw away the old object reference of the new entry.
16075                          */
16076                         vm_object_deallocate(object);
16077
16078                         if (result != KERN_SUCCESS &&
16079                             result != KERN_MEMORY_RESTART_COPY) {
16080                                 _vm_map_entry_dispose(map_header, new_entry);
16081                                 vm_map_lock(map);
16082                                 break;
16083                         }
16084
16085                         /*
16086                          * Verify that the map has not substantially
16087                          * changed while the copy was being made.
16088                          */
16089
16090                         vm_map_lock(map);
16091                         if (version.main_timestamp + 1 != map->timestamp) {
16092                                 /*
16093                                  * Simple version comparison failed.
16094                                  *
16095                                  * Retry the lookup and verify that the
16096                                  * same object/offset are still present.
16097                                  */
16098                                 saved_src_entry = VM_MAP_ENTRY_NULL;
16099                                 vm_object_deallocate(VME_OBJECT(new_entry));
16100                                 _vm_map_entry_dispose(map_header, new_entry);
16101                                 if (result == KERN_MEMORY_RESTART_COPY) {
16102                                         result = KERN_SUCCESS;
16103                                 }
16104                                 continue;
16105                         }
16106                         /* map hasn't changed: src_entry is still valid */
16107                         src_entry = saved_src_entry;
16108                         saved_src_entry = VM_MAP_ENTRY_NULL;
16109
16110                         if (result == KERN_MEMORY_RESTART_COPY) {
16111                                 vm_object_reference(object);
16112                                 goto RestartCopy;
16113                         }
16114                 }
16115
16116                 _vm_map_store_entry_link(map_header,
16117                     map_header->links.prev, new_entry);
16118
16119                 /*Protections for submap mapping are irrelevant here*/
16120                 if (!src_entry->is_sub_map) {
16121                         *cur_protection &= src_entry->protection;
16122                         *max_protection &= src_entry->max_protection;
16123                 }
16124                 map_address += tmp_size;
16125                 mapped_size += tmp_size;
16126                 src_start += tmp_size;
16127         } /* end while */
16128
16129         vm_map_unlock(map);
16130         if (result != KERN_SUCCESS) {
16131                 /*
16132                  * Free all allocated elements.
16133                  */
16134                 for (src_entry = map_header->links.next;
16135                     src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16136                     src_entry = new_entry) {
16137                         new_entry = src_entry->vme_next;
16138                         _vm_map_store_entry_unlink(map_header, src_entry);
16139                         if (src_entry->is_sub_map) {
16140                                 vm_map_deallocate(VME_SUBMAP(src_entry));
16141                         } else {
16142                                 vm_object_deallocate(VME_OBJECT(src_entry));
16143                         }
16144                         _vm_map_entry_dispose(map_header, src_entry);
16145                 }
16146         }
16147         return result;
16148 }
16149
16150 /*
16151  *      Routine:        vm_remap
16152  *
16153  *                      Map portion of a task's address space.
16154  *                      Mapped region must not overlap more than
16155  *                      one vm memory object. Protections and
16156  *                      inheritance attributes remain the same
16157  *                      as in the original task and are out parameters.
16158  *                      Source and Target task can be identical
16159  *                      Other attributes are identical as for vm_map()
16160  */
16161 kern_return_t
16162 vm_map_remap(
16163         vm_map_t                target_map,
16164         vm_map_address_t        *address,
16165         vm_map_size_t           size,
16166         vm_map_offset_t         mask,
16167         int                     flags,
16168         vm_map_kernel_flags_t   vmk_flags,
16169         vm_tag_t                tag,
16170         vm_map_t                src_map,
16171         vm_map_offset_t         memory_address,
16172         boolean_t               copy,
16173         vm_prot_t               *cur_protection,
16174         vm_prot_t               *max_protection,
16175         vm_inherit_t            inheritance)
16176 {
16177         kern_return_t           result;
16178         vm_map_entry_t          entry;
16179         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
16180         vm_map_entry_t          new_entry;
16181         struct vm_map_header    map_header;
16182         vm_map_offset_t         offset_in_mapping;
16183
16184         if (target_map == VM_MAP_NULL) {
16185                 return KERN_INVALID_ARGUMENT;
16186         }
16187
16188         switch (inheritance) {
16189         case VM_INHERIT_NONE:
16190         case VM_INHERIT_COPY:
16191         case VM_INHERIT_SHARE:
16192                 if (size != 0 && src_map != VM_MAP_NULL) {
16193                         break;
16194                 }
16195         /*FALL THRU*/
16196         default:
16197                 return KERN_INVALID_ARGUMENT;
16198         }
16199
16200         /*
16201          * If the user is requesting that we return the address of the
16202          * first byte of the data (rather than the base of the page),
16203          * then we use different rounding semantics: specifically,
16204          * we assume that (memory_address, size) describes a region
16205          * all of whose pages we must cover, rather than a base to be truncated
16206          * down and a size to be added to that base.  So we figure out
16207          * the highest page that the requested region includes and make
16208          * sure that the size will cover it.
16209          *
16210          * The key example we're worried about it is of the form:
16211          *
16212          *              memory_address = 0x1ff0, size = 0x20
16213          *
16214          * With the old semantics, we round down the memory_address to 0x1000
16215          * and round up the size to 0x1000, resulting in our covering *only*
16216          * page 0x1000.  With the new semantics, we'd realize that the region covers
16217          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
16218          * 0x1000 and page 0x2000 in the region we remap.
16219          */
16220         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16221                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16222                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16223         } else {
16224                 size = vm_map_round_page(size, PAGE_MASK);
16225         }
16226         if (size == 0) {
16227                 return KERN_INVALID_ARGUMENT;
16228         }
16229
16230         if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16231                 /* must be copy-on-write to be "media resilient" */
16232                 if (!copy) {
16233                         return KERN_INVALID_ARGUMENT;
16234                 }
16235         }
16236
16237         result = vm_map_remap_extract(src_map, memory_address,
16238             size, copy, &map_header,
16239             cur_protection,
16240             max_protection,
16241             inheritance,
16242             target_map->hdr.entries_pageable,
16243             src_map == target_map,
16244             vmk_flags);
16245
16246         if (result != KERN_SUCCESS) {
16247                 return result;
16248         }
16249
16250         /*
16251          * Allocate/check a range of free virtual address
16252          * space for the target
16253          */
16254         *address = vm_map_trunc_page(*address,
16255             VM_MAP_PAGE_MASK(target_map));
16256         vm_map_lock(target_map);
16257         result = vm_map_remap_range_allocate(target_map, address, size,
16258             mask, flags, vmk_flags, tag,
16259             &insp_entry);
16260
16261         for (entry = map_header.links.next;
16262             entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16263             entry = new_entry) {
16264                 new_entry = entry->vme_next;
16265                 _vm_map_store_entry_unlink(&map_header, entry);
16266                 if (result == KERN_SUCCESS) {
16267                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16268                                 /* no codesigning -> read-only access */
16269                                 entry->max_protection = VM_PROT_READ;
16270                                 entry->protection = VM_PROT_READ;
16271                                 entry->vme_resilient_codesign = TRUE;
16272                         }
16273                         entry->vme_start += *address;
16274                         entry->vme_end += *address;
16275                         assert(!entry->map_aligned);
16276                         if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16277                             !entry->is_sub_map &&
16278                             (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16279                             VME_OBJECT(entry)->internal)) {
16280                                 entry->vme_resilient_media = TRUE;
16281                         }
16282                         vm_map_store_entry_link(target_map, insp_entry, entry,
16283                             vmk_flags);
16284                         insp_entry = entry;
16285                 } else {
16286                         if (!entry->is_sub_map) {
16287                                 vm_object_deallocate(VME_OBJECT(entry));
16288                         } else {
16289                                 vm_map_deallocate(VME_SUBMAP(entry));
16290                         }
16291                         _vm_map_entry_dispose(&map_header, entry);
16292                 }
16293         }
16294
16295         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16296                 *cur_protection = VM_PROT_READ;
16297                 *max_protection = VM_PROT_READ;
16298         }
16299
16300         if (target_map->disable_vmentry_reuse == TRUE) {
16301                 assert(!target_map->is_nested_map);
16302                 if (target_map->highest_entry_end < insp_entry->vme_end) {
16303                         target_map->highest_entry_end = insp_entry->vme_end;
16304                 }
16305         }
16306
16307         if (result == KERN_SUCCESS) {
16308                 target_map->size += size;
16309                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16310
16311 #if PMAP_CS
16312                 if (*max_protection & VM_PROT_EXECUTE) {
16313                         vm_map_address_t region_start = 0, region_size = 0;
16314                         struct pmap_cs_code_directory *region_cd = NULL;
16315                         vm_map_address_t base = 0;
16316                         struct pmap_cs_lookup_results results = {};
16317                         vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16318                         vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16319
16320                         pmap_cs_lookup(src_map->pmap, memory_address, &results);
16321                         region_size = results.region_size;
16322                         region_start = results.region_start;
16323                         region_cd = results.region_cd_entry;
16324                         base = results.base;
16325
16326                         if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16327                                 *cur_protection = VM_PROT_READ;
16328                                 *max_protection = VM_PROT_READ;
16329                                 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16330                                     "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16331                                     page_addr, page_addr + assoc_size, *address,
16332                                     region_start, region_size,
16333                                     region_cd != NULL ? "not " : ""                     // Don't leak kernel slide
16334                                     );
16335                         }
16336                 }
16337 #endif
16338         }
16339         vm_map_unlock(target_map);
16340
16341         if (result == KERN_SUCCESS && target_map->wiring_required) {
16342                 result = vm_map_wire_kernel(target_map, *address,
16343                     *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16344                     TRUE);
16345         }
16346
16347         /*
16348          * If requested, return the address of the data pointed to by the
16349          * request, rather than the base of the resulting page.
16350          */
16351         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16352                 *address += offset_in_mapping;
16353         }
16354
16355         return result;
16356 }
16357
16358 /*
16359  *      Routine:        vm_map_remap_range_allocate
16360  *
16361  *      Description:
16362  *              Allocate a range in the specified virtual address map.
16363  *              returns the address and the map entry just before the allocated
16364  *              range
16365  *
16366  *      Map must be locked.
16367  */
16368
16369 static kern_return_t
16370 vm_map_remap_range_allocate(
16371         vm_map_t                map,
16372         vm_map_address_t        *address,       /* IN/OUT */
16373         vm_map_size_t           size,
16374         vm_map_offset_t         mask,
16375         int                     flags,
16376         vm_map_kernel_flags_t   vmk_flags,
16377         __unused vm_tag_t       tag,
16378         vm_map_entry_t          *map_entry)     /* OUT */
16379 {
16380         vm_map_entry_t  entry;
16381         vm_map_offset_t start;
16382         vm_map_offset_t end;
16383         vm_map_offset_t desired_empty_end;
16384         kern_return_t   kr;
16385         vm_map_entry_t          hole_entry;
16386
16387 StartAgain:;
16388
16389         start = *address;
16390
16391         if (flags & VM_FLAGS_ANYWHERE) {
16392                 if (flags & VM_FLAGS_RANDOM_ADDR) {
16393                         /*
16394                          * Get a random start address.
16395                          */
16396                         kr = vm_map_random_address_for_size(map, address, size);
16397                         if (kr != KERN_SUCCESS) {
16398                                 return kr;
16399                         }
16400                         start = *address;
16401                 }
16402
16403                 /*
16404                  *      Calculate the first possible address.
16405                  */
16406
16407                 if (start < map->min_offset) {
16408                         start = map->min_offset;
16409                 }
16410                 if (start > map->max_offset) {
16411                         return KERN_NO_SPACE;
16412                 }
16413
16414                 /*
16415                  *      Look for the first possible address;
16416                  *      if there's already something at this
16417                  *      address, we have to start after it.
16418                  */
16419
16420                 if (map->disable_vmentry_reuse == TRUE) {
16421                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
16422                 } else {
16423                         if (map->holelistenabled) {
16424                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16425
16426                                 if (hole_entry == NULL) {
16427                                         /*
16428                                          * No more space in the map?
16429                                          */
16430                                         return KERN_NO_SPACE;
16431                                 } else {
16432                                         boolean_t found_hole = FALSE;
16433
16434                                         do {
16435                                                 if (hole_entry->vme_start >= start) {
16436                                                         start = hole_entry->vme_start;
16437                                                         found_hole = TRUE;
16438                                                         break;
16439                                                 }
16440
16441                                                 if (hole_entry->vme_end > start) {
16442                                                         found_hole = TRUE;
16443                                                         break;
16444                                                 }
16445                                                 hole_entry = hole_entry->vme_next;
16446                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16447
16448                                         if (found_hole == FALSE) {
16449                                                 return KERN_NO_SPACE;
16450                                         }
16451
16452                                         entry = hole_entry;
16453                                 }
16454                         } else {
16455                                 assert(first_free_is_valid(map));
16456                                 if (start == map->min_offset) {
16457                                         if ((entry = map->first_free) != vm_map_to_entry(map)) {
16458                                                 start = entry->vme_end;
16459                                         }
16460                                 } else {
16461                                         vm_map_entry_t  tmp_entry;
16462                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
16463                                                 start = tmp_entry->vme_end;
16464                                         }
16465                                         entry = tmp_entry;
16466                                 }
16467                         }
16468                         start = vm_map_round_page(start,
16469                             VM_MAP_PAGE_MASK(map));
16470                 }
16471
16472                 /*
16473                  *      In any case, the "entry" always precedes
16474                  *      the proposed new region throughout the
16475                  *      loop:
16476                  */
16477
16478                 while (TRUE) {
16479                         vm_map_entry_t  next;
16480
16481                         /*
16482                          *      Find the end of the proposed new region.
16483                          *      Be sure we didn't go beyond the end, or
16484                          *      wrap around the address.
16485                          */
16486
16487                         end = ((start + mask) & ~mask);
16488                         end = vm_map_round_page(end,
16489                             VM_MAP_PAGE_MASK(map));
16490                         if (end < start) {
16491                                 return KERN_NO_SPACE;
16492                         }
16493                         start = end;
16494                         end += size;
16495
16496                         /* We want an entire page of empty space, but don't increase the allocation size. */
16497                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16498
16499                         if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16500                                 if (map->wait_for_space) {
16501                                         if (size <= (map->max_offset -
16502                                             map->min_offset)) {
16503                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16504                                                 vm_map_unlock(map);
16505                                                 thread_block(THREAD_CONTINUE_NULL);
16506                                                 vm_map_lock(map);
16507                                                 goto StartAgain;
16508                                         }
16509                                 }
16510
16511                                 return KERN_NO_SPACE;
16512                         }
16513
16514                         next = entry->vme_next;
16515
16516                         if (map->holelistenabled) {
16517                                 if (entry->vme_end >= desired_empty_end) {
16518                                         break;
16519                                 }
16520                         } else {
16521                                 /*
16522                                  *      If there are no more entries, we must win.
16523                                  *
16524                                  *      OR
16525                                  *
16526                                  *      If there is another entry, it must be
16527                                  *      after the end of the potential new region.
16528                                  */
16529
16530                                 if (next == vm_map_to_entry(map)) {
16531                                         break;
16532                                 }
16533
16534                                 if (next->vme_start >= desired_empty_end) {
16535                                         break;
16536                                 }
16537                         }
16538
16539                         /*
16540                          *      Didn't fit -- move to the next entry.
16541                          */
16542
16543                         entry = next;
16544
16545                         if (map->holelistenabled) {
16546                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16547                                         /*
16548                                          * Wrapped around
16549                                          */
16550                                         return KERN_NO_SPACE;
16551                                 }
16552                                 start = entry->vme_start;
16553                         } else {
16554                                 start = entry->vme_end;
16555                         }
16556                 }
16557
16558                 if (map->holelistenabled) {
16559                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16560                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16561                         }
16562                 }
16563
16564                 *address = start;
16565         } else {
16566                 vm_map_entry_t          temp_entry;
16567
16568                 /*
16569                  *      Verify that:
16570                  *              the address doesn't itself violate
16571                  *              the mask requirement.
16572                  */
16573
16574                 if ((start & mask) != 0) {
16575                         return KERN_NO_SPACE;
16576                 }
16577
16578
16579                 /*
16580                  *      ...     the address is within bounds
16581                  */
16582
16583                 end = start + size;
16584
16585                 if ((start < map->min_offset) ||
16586                     (end > map->max_offset) ||
16587                     (start >= end)) {
16588                         return KERN_INVALID_ADDRESS;
16589                 }
16590
16591                 /*
16592                  * If we're asked to overwrite whatever was mapped in that
16593                  * range, first deallocate that range.
16594                  */
16595                 if (flags & VM_FLAGS_OVERWRITE) {
16596                         vm_map_t zap_map;
16597                         int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16598
16599                         /*
16600                          * We use a "zap_map" to avoid having to unlock
16601                          * the "map" in vm_map_delete(), which would compromise
16602                          * the atomicity of the "deallocate" and then "remap"
16603                          * combination.
16604                          */
16605                         zap_map = vm_map_create(PMAP_NULL,
16606                             start,
16607                             end,
16608                             map->hdr.entries_pageable);
16609                         if (zap_map == VM_MAP_NULL) {
16610                                 return KERN_RESOURCE_SHORTAGE;
16611                         }
16612                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16613                         vm_map_disable_hole_optimization(zap_map);
16614
16615                         if (vmk_flags.vmkf_overwrite_immutable) {
16616                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16617                         }
16618                         kr = vm_map_delete(map, start, end,
16619                             remove_flags,
16620                             zap_map);
16621                         if (kr == KERN_SUCCESS) {
16622                                 vm_map_destroy(zap_map,
16623                                     VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16624                                 zap_map = VM_MAP_NULL;
16625                         }
16626                 }
16627
16628                 /*
16629                  *      ...     the starting address isn't allocated
16630                  */
16631
16632                 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16633                         return KERN_NO_SPACE;
16634                 }
16635
16636                 entry = temp_entry;
16637
16638                 /*
16639                  *      ...     the next region doesn't overlap the
16640                  *              end point.
16641                  */
16642
16643                 if ((entry->vme_next != vm_map_to_entry(map)) &&
16644                     (entry->vme_next->vme_start < end)) {
16645                         return KERN_NO_SPACE;
16646                 }
16647         }
16648         *map_entry = entry;
16649         return KERN_SUCCESS;
16650 }
16651
16652 /*
16653  *      vm_map_switch:
16654  *
16655  *      Set the address map for the current thread to the specified map
16656  */
16657
16658 vm_map_t
16659 vm_map_switch(
16660         vm_map_t        map)
16661 {
16662         int             mycpu;
16663         thread_t        thread = current_thread();
16664         vm_map_t        oldmap = thread->map;
16665
16666         mp_disable_preemption();
16667         mycpu = cpu_number();
16668
16669         /*
16670          *      Deactivate the current map and activate the requested map
16671          */
16672         PMAP_SWITCH_USER(thread, map, mycpu);
16673
16674         mp_enable_preemption();
16675         return oldmap;
16676 }
16677
16678
16679 /*
16680  *      Routine:        vm_map_write_user
16681  *
16682  *      Description:
16683  *              Copy out data from a kernel space into space in the
16684  *              destination map. The space must already exist in the
16685  *              destination map.
16686  *              NOTE:  This routine should only be called by threads
16687  *              which can block on a page fault. i.e. kernel mode user
16688  *              threads.
16689  *
16690  */
16691 kern_return_t
16692 vm_map_write_user(
16693         vm_map_t                map,
16694         void                    *src_p,
16695         vm_map_address_t        dst_addr,
16696         vm_size_t               size)
16697 {
16698         kern_return_t   kr = KERN_SUCCESS;
16699
16700         if (current_map() == map) {
16701                 if (copyout(src_p, dst_addr, size)) {
16702                         kr = KERN_INVALID_ADDRESS;
16703                 }
16704         } else {
16705                 vm_map_t        oldmap;
16706
16707                 /* take on the identity of the target map while doing */
16708                 /* the transfer */
16709
16710                 vm_map_reference(map);
16711                 oldmap = vm_map_switch(map);
16712                 if (copyout(src_p, dst_addr, size)) {
16713                         kr = KERN_INVALID_ADDRESS;
16714                 }
16715                 vm_map_switch(oldmap);
16716                 vm_map_deallocate(map);
16717         }
16718         return kr;
16719 }
16720
16721 /*
16722  *      Routine:        vm_map_read_user
16723  *
16724  *      Description:
16725  *              Copy in data from a user space source map into the
16726  *              kernel map. The space must already exist in the
16727  *              kernel map.
16728  *              NOTE:  This routine should only be called by threads
16729  *              which can block on a page fault. i.e. kernel mode user
16730  *              threads.
16731  *
16732  */
16733 kern_return_t
16734 vm_map_read_user(
16735         vm_map_t                map,
16736         vm_map_address_t        src_addr,
16737         void                    *dst_p,
16738         vm_size_t               size)
16739 {
16740         kern_return_t   kr = KERN_SUCCESS;
16741
16742         if (current_map() == map) {
16743                 if (copyin(src_addr, dst_p, size)) {
16744                         kr = KERN_INVALID_ADDRESS;
16745                 }
16746         } else {
16747                 vm_map_t        oldmap;
16748
16749                 /* take on the identity of the target map while doing */
16750                 /* the transfer */
16751
16752                 vm_map_reference(map);
16753                 oldmap = vm_map_switch(map);
16754                 if (copyin(src_addr, dst_p, size)) {
16755                         kr = KERN_INVALID_ADDRESS;
16756                 }
16757                 vm_map_switch(oldmap);
16758                 vm_map_deallocate(map);
16759         }
16760         return kr;
16761 }
16762
16763
16764 /*
16765  *      vm_map_check_protection:
16766  *
16767  *      Assert that the target map allows the specified
16768  *      privilege on the entire address region given.
16769  *      The entire region must be allocated.
16770  */
16771 boolean_t
16772 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16773     vm_map_offset_t end, vm_prot_t protection)
16774 {
16775         vm_map_entry_t entry;
16776         vm_map_entry_t tmp_entry;
16777
16778         vm_map_lock(map);
16779
16780         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
16781                 vm_map_unlock(map);
16782                 return FALSE;
16783         }
16784
16785         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16786                 vm_map_unlock(map);
16787                 return FALSE;
16788         }
16789
16790         entry = tmp_entry;
16791
16792         while (start < end) {
16793                 if (entry == vm_map_to_entry(map)) {
16794                         vm_map_unlock(map);
16795                         return FALSE;
16796                 }
16797
16798                 /*
16799                  *      No holes allowed!
16800                  */
16801
16802                 if (start < entry->vme_start) {
16803                         vm_map_unlock(map);
16804                         return FALSE;
16805                 }
16806
16807                 /*
16808                  * Check protection associated with entry.
16809                  */
16810
16811                 if ((entry->protection & protection) != protection) {
16812                         vm_map_unlock(map);
16813                         return FALSE;
16814                 }
16815
16816                 /* go to next entry */
16817
16818                 start = entry->vme_end;
16819                 entry = entry->vme_next;
16820         }
16821         vm_map_unlock(map);
16822         return TRUE;
16823 }
16824
16825 kern_return_t
16826 vm_map_purgable_control(
16827         vm_map_t                map,
16828         vm_map_offset_t         address,
16829         vm_purgable_t           control,
16830         int                     *state)
16831 {
16832         vm_map_entry_t          entry;
16833         vm_object_t             object;
16834         kern_return_t           kr;
16835         boolean_t               was_nonvolatile;
16836
16837         /*
16838          * Vet all the input parameters and current type and state of the
16839          * underlaying object.  Return with an error if anything is amiss.
16840          */
16841         if (map == VM_MAP_NULL) {
16842                 return KERN_INVALID_ARGUMENT;
16843         }
16844
16845         if (control != VM_PURGABLE_SET_STATE &&
16846             control != VM_PURGABLE_GET_STATE &&
16847             control != VM_PURGABLE_PURGE_ALL &&
16848             control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16849                 return KERN_INVALID_ARGUMENT;
16850         }
16851
16852         if (control == VM_PURGABLE_PURGE_ALL) {
16853                 vm_purgeable_object_purge_all();
16854                 return KERN_SUCCESS;
16855         }
16856
16857         if ((control == VM_PURGABLE_SET_STATE ||
16858             control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16859             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16860             ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16861                 return KERN_INVALID_ARGUMENT;
16862         }
16863
16864         vm_map_lock_read(map);
16865
16866         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16867                 /*
16868                  * Must pass a valid non-submap address.
16869                  */
16870                 vm_map_unlock_read(map);
16871                 return KERN_INVALID_ADDRESS;
16872         }
16873
16874         if ((entry->protection & VM_PROT_WRITE) == 0) {
16875                 /*
16876                  * Can't apply purgable controls to something you can't write.
16877                  */
16878                 vm_map_unlock_read(map);
16879                 return KERN_PROTECTION_FAILURE;
16880         }
16881
16882         object = VME_OBJECT(entry);
16883         if (object == VM_OBJECT_NULL ||
16884             object->purgable == VM_PURGABLE_DENY) {
16885                 /*
16886                  * Object must already be present and be purgeable.
16887                  */
16888                 vm_map_unlock_read(map);
16889                 return KERN_INVALID_ARGUMENT;
16890         }
16891
16892         vm_object_lock(object);
16893
16894 #if 00
16895         if (VME_OFFSET(entry) != 0 ||
16896             entry->vme_end - entry->vme_start != object->vo_size) {
16897                 /*
16898                  * Can only apply purgable controls to the whole (existing)
16899                  * object at once.
16900                  */
16901                 vm_map_unlock_read(map);
16902                 vm_object_unlock(object);
16903                 return KERN_INVALID_ARGUMENT;
16904         }
16905 #endif
16906
16907         assert(!entry->is_sub_map);
16908         assert(!entry->use_pmap); /* purgeable has its own accounting */
16909
16910         vm_map_unlock_read(map);
16911
16912         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16913
16914         kr = vm_object_purgable_control(object, control, state);
16915
16916         if (was_nonvolatile &&
16917             object->purgable != VM_PURGABLE_NONVOLATILE &&
16918             map->pmap == kernel_pmap) {
16919 #if DEBUG
16920                 object->vo_purgeable_volatilizer = kernel_task;
16921 #endif /* DEBUG */
16922         }
16923
16924         vm_object_unlock(object);
16925
16926         return kr;
16927 }
16928
16929 kern_return_t
16930 vm_map_page_query_internal(
16931         vm_map_t        target_map,
16932         vm_map_offset_t offset,
16933         int             *disposition,
16934         int             *ref_count)
16935 {
16936         kern_return_t                   kr;
16937         vm_page_info_basic_data_t       info;
16938         mach_msg_type_number_t          count;
16939
16940         count = VM_PAGE_INFO_BASIC_COUNT;
16941         kr = vm_map_page_info(target_map,
16942             offset,
16943             VM_PAGE_INFO_BASIC,
16944             (vm_page_info_t) &info,
16945             &count);
16946         if (kr == KERN_SUCCESS) {
16947                 *disposition = info.disposition;
16948                 *ref_count = info.ref_count;
16949         } else {
16950                 *disposition = 0;
16951                 *ref_count = 0;
16952         }
16953
16954         return kr;
16955 }
16956
16957 kern_return_t
16958 vm_map_page_info(
16959         vm_map_t                map,
16960         vm_map_offset_t         offset,
16961         vm_page_info_flavor_t   flavor,
16962         vm_page_info_t          info,
16963         mach_msg_type_number_t  *count)
16964 {
16965         return vm_map_page_range_info_internal(map,
16966                    offset,                     /* start of range */
16967                    (offset + 1),                     /* this will get rounded in the call to the page boundary */
16968                    flavor,
16969                    info,
16970                    count);
16971 }
16972
16973 kern_return_t
16974 vm_map_page_range_info_internal(
16975         vm_map_t                map,
16976         vm_map_offset_t         start_offset,
16977         vm_map_offset_t         end_offset,
16978         vm_page_info_flavor_t   flavor,
16979         vm_page_info_t          info,
16980         mach_msg_type_number_t  *count)
16981 {
16982         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
16983         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16984         vm_page_t               m = VM_PAGE_NULL;
16985         kern_return_t           retval = KERN_SUCCESS;
16986         int                     disposition = 0;
16987         int                     ref_count = 0;
16988         int                     depth = 0, info_idx = 0;
16989         vm_page_info_basic_t    basic_info = 0;
16990         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16991         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16992         boolean_t               do_region_footprint;
16993         ledger_amount_t         ledger_resident, ledger_compressed;
16994
16995         switch (flavor) {
16996         case VM_PAGE_INFO_BASIC:
16997                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
16998                         /*
16999                          * The "vm_page_info_basic_data" structure was not
17000                          * properly padded, so allow the size to be off by
17001                          * one to maintain backwards binary compatibility...
17002                          */
17003                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
17004                                 return KERN_INVALID_ARGUMENT;
17005                         }
17006                 }
17007                 break;
17008         default:
17009                 return KERN_INVALID_ARGUMENT;
17010         }
17011
17012         do_region_footprint = task_self_region_footprint();
17013         disposition = 0;
17014         ref_count = 0;
17015         depth = 0;
17016         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
17017         retval = KERN_SUCCESS;
17018
17019         offset_in_page = start_offset & PAGE_MASK;
17020         start = vm_map_trunc_page(start_offset, PAGE_MASK);
17021         end = vm_map_round_page(end_offset, PAGE_MASK);
17022
17023         if (end < start) {
17024                 return KERN_INVALID_ARGUMENT;
17025         }
17026
17027         assert((end - start) <= MAX_PAGE_RANGE_QUERY);
17028
17029         vm_map_lock_read(map);
17030
17031         task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17032
17033         for (curr_s_offset = start; curr_s_offset < end;) {
17034                 /*
17035                  * New lookup needs reset of these variables.
17036                  */
17037                 curr_object = object = VM_OBJECT_NULL;
17038                 offset_in_object = 0;
17039                 ref_count = 0;
17040                 depth = 0;
17041
17042                 if (do_region_footprint &&
17043                     curr_s_offset >= vm_map_last_entry(map)->vme_end) {
17044                         /*
17045                          * Request for "footprint" info about a page beyond
17046                          * the end of address space: this must be for
17047                          * the fake region vm_map_region_recurse_64()
17048                          * reported to account for non-volatile purgeable
17049                          * memory owned by this task.
17050                          */
17051                         disposition = 0;
17052
17053                         if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
17054                             (unsigned) ledger_compressed) {
17055                                 /*
17056                                  * We haven't reported all the "non-volatile
17057                                  * compressed" pages yet, so report this fake
17058                                  * page as "compressed".
17059                                  */
17060                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17061                         } else {
17062                                 /*
17063                                  * We've reported all the non-volatile
17064                                  * compressed page but not all the non-volatile
17065                                  * pages , so report this fake page as
17066                                  * "resident dirty".
17067                                  */
17068                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17069                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17070                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
17071                         }
17072                         switch (flavor) {
17073                         case VM_PAGE_INFO_BASIC:
17074                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17075                                 basic_info->disposition = disposition;
17076                                 basic_info->ref_count = 1;
17077                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17078                                 basic_info->offset = 0;
17079                                 basic_info->depth = 0;
17080
17081                                 info_idx++;
17082                                 break;
17083                         }
17084                         curr_s_offset += PAGE_SIZE;
17085                         continue;
17086                 }
17087
17088                 /*
17089                  * First, find the map entry covering "curr_s_offset", going down
17090                  * submaps if necessary.
17091                  */
17092                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17093                         /* no entry -> no object -> no page */
17094
17095                         if (curr_s_offset < vm_map_min(map)) {
17096                                 /*
17097                                  * Illegal address that falls below map min.
17098                                  */
17099                                 curr_e_offset = MIN(end, vm_map_min(map));
17100                         } else if (curr_s_offset >= vm_map_max(map)) {
17101                                 /*
17102                                  * Illegal address that falls on/after map max.
17103                                  */
17104                                 curr_e_offset = end;
17105                         } else if (map_entry == vm_map_to_entry(map)) {
17106                                 /*
17107                                  * Hit a hole.
17108                                  */
17109                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
17110                                         /*
17111                                          * Empty map.
17112                                          */
17113                                         curr_e_offset = MIN(map->max_offset, end);
17114                                 } else {
17115                                         /*
17116                                          * Hole at start of the map.
17117                                          */
17118                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17119                                 }
17120                         } else {
17121                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
17122                                         /*
17123                                          * Hole at the end of the map.
17124                                          */
17125                                         curr_e_offset = MIN(map->max_offset, end);
17126                                 } else {
17127                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17128                                 }
17129                         }
17130
17131                         assert(curr_e_offset >= curr_s_offset);
17132
17133                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17134
17135                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17136
17137                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17138
17139                         curr_s_offset = curr_e_offset;
17140
17141                         info_idx += num_pages;
17142
17143                         continue;
17144                 }
17145
17146                 /* compute offset from this map entry's start */
17147                 offset_in_object = curr_s_offset - map_entry->vme_start;
17148
17149                 /* compute offset into this map entry's object (or submap) */
17150                 offset_in_object += VME_OFFSET(map_entry);
17151
17152                 if (map_entry->is_sub_map) {
17153                         vm_map_t sub_map = VM_MAP_NULL;
17154                         vm_page_info_t submap_info = 0;
17155                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17156
17157                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17158
17159                         submap_s_offset = offset_in_object;
17160                         submap_e_offset = submap_s_offset + range_len;
17161
17162                         sub_map = VME_SUBMAP(map_entry);
17163
17164                         vm_map_reference(sub_map);
17165                         vm_map_unlock_read(map);
17166
17167                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17168
17169                         retval = vm_map_page_range_info_internal(sub_map,
17170                             submap_s_offset,
17171                             submap_e_offset,
17172                             VM_PAGE_INFO_BASIC,
17173                             (vm_page_info_t) submap_info,
17174                             count);
17175
17176                         assert(retval == KERN_SUCCESS);
17177
17178                         vm_map_lock_read(map);
17179                         vm_map_deallocate(sub_map);
17180
17181                         /* Move the "info" index by the number of pages we inspected.*/
17182                         info_idx += range_len >> PAGE_SHIFT;
17183
17184                         /* Move our current offset by the size of the range we inspected.*/
17185                         curr_s_offset += range_len;
17186
17187                         continue;
17188                 }
17189
17190                 object = VME_OBJECT(map_entry);
17191                 if (object == VM_OBJECT_NULL) {
17192                         /*
17193                          * We don't have an object here and, hence,
17194                          * no pages to inspect. We'll fill up the
17195                          * info structure appropriately.
17196                          */
17197
17198                         curr_e_offset = MIN(map_entry->vme_end, end);
17199
17200                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17201
17202                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17203
17204                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17205
17206                         curr_s_offset = curr_e_offset;
17207
17208                         info_idx += num_pages;
17209
17210                         continue;
17211                 }
17212
17213                 if (do_region_footprint) {
17214                         int pmap_disp;
17215
17216                         disposition = 0;
17217                         pmap_disp = 0;
17218                         if (map->has_corpse_footprint) {
17219                                 /*
17220                                  * Query the page info data we saved
17221                                  * while forking the corpse.
17222                                  */
17223                                 vm_map_corpse_footprint_query_page_info(
17224                                         map,
17225                                         curr_s_offset,
17226                                         &pmap_disp);
17227                         } else {
17228                                 /*
17229                                  * Query the pmap.
17230                                  */
17231                                 pmap_query_page_info(map->pmap,
17232                                     curr_s_offset,
17233                                     &pmap_disp);
17234                         }
17235                         if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17236                             /* && not tagged as no-footprint? */
17237                             VM_OBJECT_OWNER(object) != NULL &&
17238                             VM_OBJECT_OWNER(object)->map == map) {
17239                                 if ((((curr_s_offset
17240                                     - map_entry->vme_start
17241                                     + VME_OFFSET(map_entry))
17242                                     / PAGE_SIZE) <
17243                                     (object->resident_page_count +
17244                                     vm_compressor_pager_get_count(object->pager)))) {
17245                                         /*
17246                                          * Non-volatile purgeable object owned
17247                                          * by this task: report the first
17248                                          * "#resident + #compressed" pages as
17249                                          * "resident" (to show that they
17250                                          * contribute to the footprint) but not
17251                                          * "dirty" (to avoid double-counting
17252                                          * with the fake "non-volatile" region
17253                                          * we'll report at the end of the
17254                                          * address space to account for all
17255                                          * (mapped or not) non-volatile memory
17256                                          * owned by this task.
17257                                          */
17258                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17259                                 }
17260                         } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
17261                             object->purgable == VM_PURGABLE_EMPTY) &&
17262                             /* && not tagged as no-footprint? */
17263                             VM_OBJECT_OWNER(object) != NULL &&
17264                             VM_OBJECT_OWNER(object)->map == map) {
17265                                 if ((((curr_s_offset
17266                                     - map_entry->vme_start
17267                                     + VME_OFFSET(map_entry))
17268                                     / PAGE_SIZE) <
17269                                     object->wired_page_count)) {
17270                                         /*
17271                                          * Volatile|empty purgeable object owned
17272                                          * by this task: report the first
17273                                          * "#wired" pages as "resident" (to
17274                                          * show that they contribute to the
17275                                          * footprint) but not "dirty" (to avoid
17276                                          * double-counting with the fake
17277                                          * "non-volatile" region we'll report
17278                                          * at the end of the address space to
17279                                          * account for all (mapped or not)
17280                                          * non-volatile memory owned by this
17281                                          * task.
17282                                          */
17283                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17284                                 }
17285                         } else if (map_entry->iokit_acct &&
17286                             object->internal &&
17287                             object->purgable == VM_PURGABLE_DENY) {
17288                                 /*
17289                                  * Non-purgeable IOKit memory: phys_footprint
17290                                  * includes the entire virtual mapping.
17291                                  */
17292                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17293                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17294                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17295                         } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17296                             PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17297                                 /* alternate accounting */
17298 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17299                                 if (map->pmap->footprint_was_suspended ||
17300                                     /*
17301                                      * XXX corpse does not know if original
17302                                      * pmap had its footprint suspended...
17303                                      */
17304                                     map->has_corpse_footprint) {
17305                                         /*
17306                                          * The assertion below can fail if dyld
17307                                          * suspended footprint accounting
17308                                          * while doing some adjustments to
17309                                          * this page;  the mapping would say
17310                                          * "use pmap accounting" but the page
17311                                          * would be marked "alternate
17312                                          * accounting".
17313                                          */
17314                                 } else
17315 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17316                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17317                                 pmap_disp = 0;
17318                         } else {
17319                                 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17320                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17321                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17322                                         disposition |= VM_PAGE_QUERY_PAGE_REF;
17323                                         if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17324                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17325                                         } else {
17326                                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17327                                         }
17328                                         if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17329                                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17330                                         }
17331                                 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17332                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17333                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17334                                 }
17335                         }
17336                         switch (flavor) {
17337                         case VM_PAGE_INFO_BASIC:
17338                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17339                                 basic_info->disposition = disposition;
17340                                 basic_info->ref_count = 1;
17341                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17342                                 basic_info->offset = 0;
17343                                 basic_info->depth = 0;
17344
17345                                 info_idx++;
17346                                 break;
17347                         }
17348                         curr_s_offset += PAGE_SIZE;
17349                         continue;
17350                 }
17351
17352                 vm_object_reference(object);
17353                 /*
17354                  * Shared mode -- so we can allow other readers
17355                  * to grab the lock too.
17356                  */
17357                 vm_object_lock_shared(object);
17358
17359                 curr_e_offset = MIN(map_entry->vme_end, end);
17360
17361                 vm_map_unlock_read(map);
17362
17363                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17364
17365                 curr_object = object;
17366
17367                 for (; curr_s_offset < curr_e_offset;) {
17368                         if (object == curr_object) {
17369                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17370                         } else {
17371                                 ref_count = curr_object->ref_count;
17372                         }
17373
17374                         curr_offset_in_object = offset_in_object;
17375
17376                         for (;;) {
17377                                 m = vm_page_lookup(curr_object, curr_offset_in_object);
17378
17379                                 if (m != VM_PAGE_NULL) {
17380                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17381                                         break;
17382                                 } else {
17383                                         if (curr_object->internal &&
17384                                             curr_object->alive &&
17385                                             !curr_object->terminating &&
17386                                             curr_object->pager_ready) {
17387                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17388                                                     == VM_EXTERNAL_STATE_EXISTS) {
17389                                                         /* the pager has that page */
17390                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17391                                                         break;
17392                                                 }
17393                                         }
17394
17395                                         /*
17396                                          * Go down the VM object shadow chain until we find the page
17397                                          * we're looking for.
17398                                          */
17399
17400                                         if (curr_object->shadow != VM_OBJECT_NULL) {
17401                                                 vm_object_t shadow = VM_OBJECT_NULL;
17402
17403                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
17404                                                 shadow = curr_object->shadow;
17405
17406                                                 vm_object_lock_shared(shadow);
17407                                                 vm_object_unlock(curr_object);
17408
17409                                                 curr_object = shadow;
17410                                                 depth++;
17411                                                 continue;
17412                                         } else {
17413                                                 break;
17414                                         }
17415                                 }
17416                         }
17417
17418                         /* The ref_count is not strictly accurate, it measures the number   */
17419                         /* of entities holding a ref on the object, they may not be mapping */
17420                         /* the object or may not be mapping the section holding the         */
17421                         /* target page but its still a ball park number and though an over- */
17422                         /* count, it picks up the copy-on-write cases                       */
17423
17424                         /* We could also get a picture of page sharing from pmap_attributes */
17425                         /* but this would under count as only faulted-in mappings would     */
17426                         /* show up.                                                         */
17427
17428                         if ((curr_object == object) && curr_object->shadow) {
17429                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17430                         }
17431
17432                         if (!curr_object->internal) {
17433                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17434                         }
17435
17436                         if (m != VM_PAGE_NULL) {
17437                                 if (m->vmp_fictitious) {
17438                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17439                                 } else {
17440                                         if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
17441                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17442                                         }
17443
17444                                         if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
17445                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
17446                                         }
17447
17448                                         if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
17449                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17450                                         }
17451
17452                                         if (m->vmp_cs_validated) {
17453                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17454                                         }
17455                                         if (m->vmp_cs_tainted) {
17456                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17457                                         }
17458                                         if (m->vmp_cs_nx) {
17459                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17460                                         }
17461                                         if (m->vmp_reusable || curr_object->all_reusable) {
17462                                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17463                                         }
17464                                 }
17465                         }
17466
17467                         switch (flavor) {
17468                         case VM_PAGE_INFO_BASIC:
17469                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17470                                 basic_info->disposition = disposition;
17471                                 basic_info->ref_count = ref_count;
17472                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17473                                     VM_KERNEL_ADDRPERM(curr_object);
17474                                 basic_info->offset =
17475                                     (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17476                                 basic_info->depth = depth;
17477
17478                                 info_idx++;
17479                                 break;
17480                         }
17481
17482                         disposition = 0;
17483                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17484
17485                         /*
17486                          * Move to next offset in the range and in our object.
17487                          */
17488                         curr_s_offset += PAGE_SIZE;
17489                         offset_in_object += PAGE_SIZE;
17490                         curr_offset_in_object = offset_in_object;
17491
17492                         if (curr_object != object) {
17493                                 vm_object_unlock(curr_object);
17494
17495                                 curr_object = object;
17496
17497                                 vm_object_lock_shared(curr_object);
17498                         } else {
17499                                 vm_object_lock_yield_shared(curr_object);
17500                         }
17501                 }
17502
17503                 vm_object_unlock(curr_object);
17504                 vm_object_deallocate(curr_object);
17505
17506                 vm_map_lock_read(map);
17507         }
17508
17509         vm_map_unlock_read(map);
17510         return retval;
17511 }
17512
17513 /*
17514  *      vm_map_msync
17515  *
17516  *      Synchronises the memory range specified with its backing store
17517  *      image by either flushing or cleaning the contents to the appropriate
17518  *      memory manager engaging in a memory object synchronize dialog with
17519  *      the manager.  The client doesn't return until the manager issues
17520  *      m_o_s_completed message.  MIG Magically converts user task parameter
17521  *      to the task's address map.
17522  *
17523  *      interpretation of sync_flags
17524  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
17525  *                                pages to manager.
17526  *
17527  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17528  *                              - discard pages, write dirty or precious
17529  *                                pages back to memory manager.
17530  *
17531  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17532  *                              - write dirty or precious pages back to
17533  *                                the memory manager.
17534  *
17535  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
17536  *                                is a hole in the region, and we would
17537  *                                have returned KERN_SUCCESS, return
17538  *                                KERN_INVALID_ADDRESS instead.
17539  *
17540  *      NOTE
17541  *      The memory object attributes have not yet been implemented, this
17542  *      function will have to deal with the invalidate attribute
17543  *
17544  *      RETURNS
17545  *      KERN_INVALID_TASK               Bad task parameter
17546  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
17547  *      KERN_SUCCESS                    The usual.
17548  *      KERN_INVALID_ADDRESS            There was a hole in the region.
17549  */
17550
17551 kern_return_t
17552 vm_map_msync(
17553         vm_map_t                map,
17554         vm_map_address_t        address,
17555         vm_map_size_t           size,
17556         vm_sync_t               sync_flags)
17557 {
17558         vm_map_entry_t          entry;
17559         vm_map_size_t           amount_left;
17560         vm_object_offset_t      offset;
17561         boolean_t               do_sync_req;
17562         boolean_t               had_hole = FALSE;
17563         vm_map_offset_t         pmap_offset;
17564
17565         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17566             (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17567                 return KERN_INVALID_ARGUMENT;
17568         }
17569
17570         /*
17571          * align address and size on page boundaries
17572          */
17573         size = (vm_map_round_page(address + size,
17574             VM_MAP_PAGE_MASK(map)) -
17575             vm_map_trunc_page(address,
17576             VM_MAP_PAGE_MASK(map)));
17577         address = vm_map_trunc_page(address,
17578             VM_MAP_PAGE_MASK(map));
17579
17580         if (map == VM_MAP_NULL) {
17581                 return KERN_INVALID_TASK;
17582         }
17583
17584         if (size == 0) {
17585                 return KERN_SUCCESS;
17586         }
17587
17588         amount_left = size;
17589
17590         while (amount_left > 0) {
17591                 vm_object_size_t        flush_size;
17592                 vm_object_t             object;
17593
17594                 vm_map_lock(map);
17595                 if (!vm_map_lookup_entry(map,
17596                     address,
17597                     &entry)) {
17598                         vm_map_size_t   skip;
17599
17600                         /*
17601                          * hole in the address map.
17602                          */
17603                         had_hole = TRUE;
17604
17605                         if (sync_flags & VM_SYNC_KILLPAGES) {
17606                                 /*
17607                                  * For VM_SYNC_KILLPAGES, there should be
17608                                  * no holes in the range, since we couldn't
17609                                  * prevent someone else from allocating in
17610                                  * that hole and we wouldn't want to "kill"
17611                                  * their pages.
17612                                  */
17613                                 vm_map_unlock(map);
17614                                 break;
17615                         }
17616
17617                         /*
17618                          * Check for empty map.
17619                          */
17620                         if (entry == vm_map_to_entry(map) &&
17621                             entry->vme_next == entry) {
17622                                 vm_map_unlock(map);
17623                                 break;
17624                         }
17625                         /*
17626                          * Check that we don't wrap and that
17627                          * we have at least one real map entry.
17628                          */
17629                         if ((map->hdr.nentries == 0) ||
17630                             (entry->vme_next->vme_start < address)) {
17631                                 vm_map_unlock(map);
17632                                 break;
17633                         }
17634                         /*
17635                          * Move up to the next entry if needed
17636                          */
17637                         skip = (entry->vme_next->vme_start - address);
17638                         if (skip >= amount_left) {
17639                                 amount_left = 0;
17640                         } else {
17641                                 amount_left -= skip;
17642                         }
17643                         address = entry->vme_next->vme_start;
17644                         vm_map_unlock(map);
17645                         continue;
17646                 }
17647
17648                 offset = address - entry->vme_start;
17649                 pmap_offset = address;
17650
17651                 /*
17652                  * do we have more to flush than is contained in this
17653                  * entry ?
17654                  */
17655                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17656                         flush_size = entry->vme_end -
17657                             (entry->vme_start + offset);
17658                 } else {
17659                         flush_size = amount_left;
17660                 }
17661                 amount_left -= flush_size;
17662                 address += flush_size;
17663
17664                 if (entry->is_sub_map == TRUE) {
17665                         vm_map_t        local_map;
17666                         vm_map_offset_t local_offset;
17667
17668                         local_map = VME_SUBMAP(entry);
17669                         local_offset = VME_OFFSET(entry);
17670                         vm_map_reference(local_map);
17671                         vm_map_unlock(map);
17672                         if (vm_map_msync(
17673                                     local_map,
17674                                     local_offset,
17675                                     flush_size,
17676                                     sync_flags) == KERN_INVALID_ADDRESS) {
17677                                 had_hole = TRUE;
17678                         }
17679                         vm_map_deallocate(local_map);
17680                         continue;
17681                 }
17682                 object = VME_OBJECT(entry);
17683
17684                 /*
17685                  * We can't sync this object if the object has not been
17686                  * created yet
17687                  */
17688                 if (object == VM_OBJECT_NULL) {
17689                         vm_map_unlock(map);
17690                         continue;
17691                 }
17692                 offset += VME_OFFSET(entry);
17693
17694                 vm_object_lock(object);
17695
17696                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17697                         int kill_pages = 0;
17698                         boolean_t reusable_pages = FALSE;
17699
17700                         if (sync_flags & VM_SYNC_KILLPAGES) {
17701                                 if (((object->ref_count == 1) ||
17702                                     ((object->copy_strategy !=
17703                                     MEMORY_OBJECT_COPY_SYMMETRIC) &&
17704                                     (object->copy == VM_OBJECT_NULL))) &&
17705                                     (object->shadow == VM_OBJECT_NULL)) {
17706                                         if (object->ref_count != 1) {
17707                                                 vm_page_stats_reusable.free_shared++;
17708                                         }
17709                                         kill_pages = 1;
17710                                 } else {
17711                                         kill_pages = -1;
17712                                 }
17713                         }
17714                         if (kill_pages != -1) {
17715                                 vm_object_deactivate_pages(
17716                                         object,
17717                                         offset,
17718                                         (vm_object_size_t) flush_size,
17719                                         kill_pages,
17720                                         reusable_pages,
17721                                         map->pmap,
17722                                         pmap_offset);
17723                         }
17724                         vm_object_unlock(object);
17725                         vm_map_unlock(map);
17726                         continue;
17727                 }
17728                 /*
17729                  * We can't sync this object if there isn't a pager.
17730                  * Don't bother to sync internal objects, since there can't
17731                  * be any "permanent" storage for these objects anyway.
17732                  */
17733                 if ((object->pager == MEMORY_OBJECT_NULL) ||
17734                     (object->internal) || (object->private)) {
17735                         vm_object_unlock(object);
17736                         vm_map_unlock(map);
17737                         continue;
17738                 }
17739                 /*
17740                  * keep reference on the object until syncing is done
17741                  */
17742                 vm_object_reference_locked(object);
17743                 vm_object_unlock(object);
17744
17745                 vm_map_unlock(map);
17746
17747                 do_sync_req = vm_object_sync(object,
17748                     offset,
17749                     flush_size,
17750                     sync_flags & VM_SYNC_INVALIDATE,
17751                     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17752                     (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17753                     sync_flags & VM_SYNC_SYNCHRONOUS);
17754
17755                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17756                         /*
17757                          * clear out the clustering and read-ahead hints
17758                          */
17759                         vm_object_lock(object);
17760
17761                         object->pages_created = 0;
17762                         object->pages_used = 0;
17763                         object->sequential = 0;
17764                         object->last_alloc = 0;
17765
17766                         vm_object_unlock(object);
17767                 }
17768                 vm_object_deallocate(object);
17769         } /* while */
17770
17771         /* for proper msync() behaviour */
17772         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17773                 return KERN_INVALID_ADDRESS;
17774         }
17775
17776         return KERN_SUCCESS;
17777 }/* vm_msync */
17778
17779 /*
17780  *      Routine:        convert_port_entry_to_map
17781  *      Purpose:
17782  *              Convert from a port specifying an entry or a task
17783  *              to a map. Doesn't consume the port ref; produces a map ref,
17784  *              which may be null.  Unlike convert_port_to_map, the
17785  *              port may be task or a named entry backed.
17786  *      Conditions:
17787  *              Nothing locked.
17788  */
17789
17790
17791 vm_map_t
17792 convert_port_entry_to_map(
17793         ipc_port_t      port)
17794 {
17795         vm_map_t map;
17796         vm_named_entry_t        named_entry;
17797         uint32_t        try_failed_count = 0;
17798
17799         if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17800                 while (TRUE) {
17801                         ip_lock(port);
17802                         if (ip_active(port) && (ip_kotype(port)
17803                             == IKOT_NAMED_ENTRY)) {
17804                                 named_entry =
17805                                     (vm_named_entry_t)port->ip_kobject;
17806                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17807                                         ip_unlock(port);
17808
17809                                         try_failed_count++;
17810                                         mutex_pause(try_failed_count);
17811                                         continue;
17812                                 }
17813                                 named_entry->ref_count++;
17814                                 lck_mtx_unlock(&(named_entry)->Lock);
17815                                 ip_unlock(port);
17816                                 if ((named_entry->is_sub_map) &&
17817                                     (named_entry->protection
17818                                     & VM_PROT_WRITE)) {
17819                                         map = named_entry->backing.map;
17820                                 } else {
17821                                         mach_destroy_memory_entry(port);
17822                                         return VM_MAP_NULL;
17823                                 }
17824                                 vm_map_reference_swap(map);
17825                                 mach_destroy_memory_entry(port);
17826                                 break;
17827                         } else {
17828                                 return VM_MAP_NULL;
17829                         }
17830                 }
17831         } else {
17832                 map = convert_port_to_map(port);
17833         }
17834
17835         return map;
17836 }
17837
17838 /*
17839  *      Routine:        convert_port_entry_to_object
17840  *      Purpose:
17841  *              Convert from a port specifying a named entry to an
17842  *              object. Doesn't consume the port ref; produces a map ref,
17843  *              which may be null.
17844  *      Conditions:
17845  *              Nothing locked.
17846  */
17847
17848
17849 vm_object_t
17850 convert_port_entry_to_object(
17851         ipc_port_t      port)
17852 {
17853         vm_object_t             object = VM_OBJECT_NULL;
17854         vm_named_entry_t        named_entry;
17855         uint32_t                try_failed_count = 0;
17856
17857         if (IP_VALID(port) &&
17858             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17859 try_again:
17860                 ip_lock(port);
17861                 if (ip_active(port) &&
17862                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17863                         named_entry = (vm_named_entry_t)port->ip_kobject;
17864                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17865                                 ip_unlock(port);
17866                                 try_failed_count++;
17867                                 mutex_pause(try_failed_count);
17868                                 goto try_again;
17869                         }
17870                         named_entry->ref_count++;
17871                         lck_mtx_unlock(&(named_entry)->Lock);
17872                         ip_unlock(port);
17873                         if (!(named_entry->is_sub_map) &&
17874                             !(named_entry->is_copy) &&
17875                             (named_entry->protection & VM_PROT_WRITE)) {
17876                                 object = named_entry->backing.object;
17877                                 vm_object_reference(object);
17878                         }
17879                         mach_destroy_memory_entry(port);
17880                 }
17881         }
17882
17883         return object;
17884 }
17885
17886 /*
17887  * Export routines to other components for the things we access locally through
17888  * macros.
17889  */
17890 #undef current_map
17891 vm_map_t
17892 current_map(void)
17893 {
17894         return current_map_fast();
17895 }
17896
17897 /*
17898  *      vm_map_reference:
17899  *
17900  *      Most code internal to the osfmk will go through a
17901  *      macro defining this.  This is always here for the
17902  *      use of other kernel components.
17903  */
17904 #undef vm_map_reference
17905 void
17906 vm_map_reference(
17907         vm_map_t        map)
17908 {
17909         if (map == VM_MAP_NULL) {
17910                 return;
17911         }
17912
17913         lck_mtx_lock(&map->s_lock);
17914 #if     TASK_SWAPPER
17915         assert(map->res_count > 0);
17916         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
17917         map->res_count++;
17918 #endif
17919         os_ref_retain_locked(&map->map_refcnt);
17920         lck_mtx_unlock(&map->s_lock);
17921 }
17922
17923 /*
17924  *      vm_map_deallocate:
17925  *
17926  *      Removes a reference from the specified map,
17927  *      destroying it if no references remain.
17928  *      The map should not be locked.
17929  */
17930 void
17931 vm_map_deallocate(
17932         vm_map_t        map)
17933 {
17934         unsigned int            ref;
17935
17936         if (map == VM_MAP_NULL) {
17937                 return;
17938         }
17939
17940         lck_mtx_lock(&map->s_lock);
17941         ref = os_ref_release_locked(&map->map_refcnt);
17942         if (ref > 0) {
17943                 vm_map_res_deallocate(map);
17944                 lck_mtx_unlock(&map->s_lock);
17945                 return;
17946         }
17947         assert(os_ref_get_count(&map->map_refcnt) == 0);
17948         lck_mtx_unlock(&map->s_lock);
17949
17950 #if     TASK_SWAPPER
17951         /*
17952          * The map residence count isn't decremented here because
17953          * the vm_map_delete below will traverse the entire map,
17954          * deleting entries, and the residence counts on objects
17955          * and sharing maps will go away then.
17956          */
17957 #endif
17958
17959         vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17960 }
17961
17962
17963 void
17964 vm_map_disable_NX(vm_map_t map)
17965 {
17966         if (map == NULL) {
17967                 return;
17968         }
17969         if (map->pmap == NULL) {
17970                 return;
17971         }
17972
17973         pmap_disable_NX(map->pmap);
17974 }
17975
17976 void
17977 vm_map_disallow_data_exec(vm_map_t map)
17978 {
17979         if (map == NULL) {
17980                 return;
17981         }
17982
17983         map->map_disallow_data_exec = TRUE;
17984 }
17985
17986 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17987  * more descriptive.
17988  */
17989 void
17990 vm_map_set_32bit(vm_map_t map)
17991 {
17992 #if defined(__arm__) || defined(__arm64__)
17993         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17994 #else
17995         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17996 #endif
17997 }
17998
17999
18000 void
18001 vm_map_set_64bit(vm_map_t map)
18002 {
18003 #if defined(__arm__) || defined(__arm64__)
18004         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18005 #else
18006         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
18007 #endif
18008 }
18009
18010 /*
18011  * Expand the maximum size of an existing map to the maximum supported.
18012  */
18013 void
18014 vm_map_set_jumbo(vm_map_t map)
18015 {
18016 #if defined (__arm64__)
18017         vm_map_set_max_addr(map, ~0);
18018 #else /* arm64 */
18019         (void) map;
18020 #endif
18021 }
18022
18023 /*
18024  * This map has a JIT entitlement
18025  */
18026 void
18027 vm_map_set_jit_entitled(vm_map_t map)
18028 {
18029 #if defined (__arm64__)
18030         pmap_set_jit_entitled(map->pmap);
18031 #else /* arm64 */
18032         (void) map;
18033 #endif
18034 }
18035
18036 /*
18037  * Expand the maximum size of an existing map.
18038  */
18039 void
18040 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18041 {
18042 #if defined(__arm64__)
18043         vm_map_offset_t max_supported_offset = 0;
18044         vm_map_offset_t old_max_offset = map->max_offset;
18045         max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18046
18047         new_max_offset = trunc_page(new_max_offset);
18048
18049         /* The address space cannot be shrunk using this routine. */
18050         if (old_max_offset >= new_max_offset) {
18051                 return;
18052         }
18053
18054         if (max_supported_offset < new_max_offset) {
18055                 new_max_offset = max_supported_offset;
18056         }
18057
18058         map->max_offset = new_max_offset;
18059
18060         if (map->holes_list->prev->vme_end == old_max_offset) {
18061                 /*
18062                  * There is already a hole at the end of the map; simply make it bigger.
18063                  */
18064                 map->holes_list->prev->vme_end = map->max_offset;
18065         } else {
18066                 /*
18067                  * There is no hole at the end, so we need to create a new hole
18068                  * for the new empty space we're creating.
18069                  */
18070                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18071                 new_hole->start = old_max_offset;
18072                 new_hole->end = map->max_offset;
18073                 new_hole->prev = map->holes_list->prev;
18074                 new_hole->next = (struct vm_map_entry *)map->holes_list;
18075                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18076                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18077         }
18078 #else
18079         (void)map;
18080         (void)new_max_offset;
18081 #endif
18082 }
18083
18084 vm_map_offset_t
18085 vm_compute_max_offset(boolean_t is64)
18086 {
18087 #if defined(__arm__) || defined(__arm64__)
18088         return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
18089 #else
18090         return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
18091 #endif
18092 }
18093
18094 void
18095 vm_map_get_max_aslr_slide_section(
18096         vm_map_t                map __unused,
18097         int64_t                 *max_sections,
18098         int64_t                 *section_size)
18099 {
18100 #if defined(__arm64__)
18101         *max_sections = 3;
18102         *section_size = ARM_TT_TWIG_SIZE;
18103 #else
18104         *max_sections = 1;
18105         *section_size = 0;
18106 #endif
18107 }
18108
18109 uint64_t
18110 vm_map_get_max_aslr_slide_pages(vm_map_t map)
18111 {
18112 #if defined(__arm64__)
18113         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18114          * limited embedded address space; this is also meant to minimize pmap
18115          * memory usage on 16KB page systems.
18116          */
18117         return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
18118 #else
18119         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18120 #endif
18121 }
18122
18123 uint64_t
18124 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18125 {
18126 #if defined(__arm64__)
18127         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18128          * of independent entropy on 16KB page systems.
18129          */
18130         return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
18131 #else
18132         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18133 #endif
18134 }
18135
18136 #ifndef __arm__
18137 boolean_t
18138 vm_map_is_64bit(
18139         vm_map_t map)
18140 {
18141         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18142 }
18143 #endif
18144
18145 boolean_t
18146 vm_map_has_hard_pagezero(
18147         vm_map_t        map,
18148         vm_map_offset_t pagezero_size)
18149 {
18150         /*
18151          * XXX FBDP
18152          * We should lock the VM map (for read) here but we can get away
18153          * with it for now because there can't really be any race condition:
18154          * the VM map's min_offset is changed only when the VM map is created
18155          * and when the zero page is established (when the binary gets loaded),
18156          * and this routine gets called only when the task terminates and the
18157          * VM map is being torn down, and when a new map is created via
18158          * load_machfile()/execve().
18159          */
18160         return map->min_offset >= pagezero_size;
18161 }
18162
18163 /*
18164  * Raise a VM map's maximun offset.
18165  */
18166 kern_return_t
18167 vm_map_raise_max_offset(
18168         vm_map_t        map,
18169         vm_map_offset_t new_max_offset)
18170 {
18171         kern_return_t   ret;
18172
18173         vm_map_lock(map);
18174         ret = KERN_INVALID_ADDRESS;
18175
18176         if (new_max_offset >= map->max_offset) {
18177                 if (!vm_map_is_64bit(map)) {
18178                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18179                                 map->max_offset = new_max_offset;
18180                                 ret = KERN_SUCCESS;
18181                         }
18182                 } else {
18183                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18184                                 map->max_offset = new_max_offset;
18185                                 ret = KERN_SUCCESS;
18186                         }
18187                 }
18188         }
18189
18190         vm_map_unlock(map);
18191         return ret;
18192 }
18193
18194
18195 /*
18196  * Raise a VM map's minimum offset.
18197  * To strictly enforce "page zero" reservation.
18198  */
18199 kern_return_t
18200 vm_map_raise_min_offset(
18201         vm_map_t        map,
18202         vm_map_offset_t new_min_offset)
18203 {
18204         vm_map_entry_t  first_entry;
18205
18206         new_min_offset = vm_map_round_page(new_min_offset,
18207             VM_MAP_PAGE_MASK(map));
18208
18209         vm_map_lock(map);
18210
18211         if (new_min_offset < map->min_offset) {
18212                 /*
18213                  * Can't move min_offset backwards, as that would expose
18214                  * a part of the address space that was previously, and for
18215                  * possibly good reasons, inaccessible.
18216                  */
18217                 vm_map_unlock(map);
18218                 return KERN_INVALID_ADDRESS;
18219         }
18220         if (new_min_offset >= map->max_offset) {
18221                 /* can't go beyond the end of the address space */
18222                 vm_map_unlock(map);
18223                 return KERN_INVALID_ADDRESS;
18224         }
18225
18226         first_entry = vm_map_first_entry(map);
18227         if (first_entry != vm_map_to_entry(map) &&
18228             first_entry->vme_start < new_min_offset) {
18229                 /*
18230                  * Some memory was already allocated below the new
18231                  * minimun offset.  It's too late to change it now...
18232                  */
18233                 vm_map_unlock(map);
18234                 return KERN_NO_SPACE;
18235         }
18236
18237         map->min_offset = new_min_offset;
18238
18239         assert(map->holes_list);
18240         map->holes_list->start = new_min_offset;
18241         assert(new_min_offset < map->holes_list->end);
18242
18243         vm_map_unlock(map);
18244
18245         return KERN_SUCCESS;
18246 }
18247
18248 /*
18249  * Set the limit on the maximum amount of user wired memory allowed for this map.
18250  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18251  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
18252  * don't have to reach over to the BSD data structures.
18253  */
18254
18255 void
18256 vm_map_set_user_wire_limit(vm_map_t     map,
18257     vm_size_t    limit)
18258 {
18259         map->user_wire_limit = limit;
18260 }
18261
18262
18263 void
18264 vm_map_switch_protect(vm_map_t     map,
18265     boolean_t    val)
18266 {
18267         vm_map_lock(map);
18268         map->switch_protect = val;
18269         vm_map_unlock(map);
18270 }
18271
18272 /*
18273  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18274  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18275  * bump both counters.
18276  */
18277 void
18278 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18279 {
18280         pmap_t pmap = vm_map_pmap(map);
18281
18282         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18283         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18284 }
18285
18286 void
18287 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18288 {
18289         pmap_t pmap = vm_map_pmap(map);
18290
18291         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18292         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18293 }
18294
18295 /* Add (generate) code signature for memory range */
18296 #if CONFIG_DYNAMIC_CODE_SIGNING
18297 kern_return_t
18298 vm_map_sign(vm_map_t map,
18299     vm_map_offset_t start,
18300     vm_map_offset_t end)
18301 {
18302         vm_map_entry_t entry;
18303         vm_page_t m;
18304         vm_object_t object;
18305
18306         /*
18307          * Vet all the input parameters and current type and state of the
18308          * underlaying object.  Return with an error if anything is amiss.
18309          */
18310         if (map == VM_MAP_NULL) {
18311                 return KERN_INVALID_ARGUMENT;
18312         }
18313
18314         vm_map_lock_read(map);
18315
18316         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18317                 /*
18318                  * Must pass a valid non-submap address.
18319                  */
18320                 vm_map_unlock_read(map);
18321                 return KERN_INVALID_ADDRESS;
18322         }
18323
18324         if ((entry->vme_start > start) || (entry->vme_end < end)) {
18325                 /*
18326                  * Map entry doesn't cover the requested range. Not handling
18327                  * this situation currently.
18328                  */
18329                 vm_map_unlock_read(map);
18330                 return KERN_INVALID_ARGUMENT;
18331         }
18332
18333         object = VME_OBJECT(entry);
18334         if (object == VM_OBJECT_NULL) {
18335                 /*
18336                  * Object must already be present or we can't sign.
18337                  */
18338                 vm_map_unlock_read(map);
18339                 return KERN_INVALID_ARGUMENT;
18340         }
18341
18342         vm_object_lock(object);
18343         vm_map_unlock_read(map);
18344
18345         while (start < end) {
18346                 uint32_t refmod;
18347
18348                 m = vm_page_lookup(object,
18349                     start - entry->vme_start + VME_OFFSET(entry));
18350                 if (m == VM_PAGE_NULL) {
18351                         /* shoud we try to fault a page here? we can probably
18352                          * demand it exists and is locked for this request */
18353                         vm_object_unlock(object);
18354                         return KERN_FAILURE;
18355                 }
18356                 /* deal with special page status */
18357                 if (m->vmp_busy ||
18358                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18359                         vm_object_unlock(object);
18360                         return KERN_FAILURE;
18361                 }
18362
18363                 /* Page is OK... now "validate" it */
18364                 /* This is the place where we'll call out to create a code
18365                  * directory, later */
18366                 m->vmp_cs_validated = TRUE;
18367
18368                 /* The page is now "clean" for codesigning purposes. That means
18369                  * we don't consider it as modified (wpmapped) anymore. But
18370                  * we'll disconnect the page so we note any future modification
18371                  * attempts. */
18372                 m->vmp_wpmapped = FALSE;
18373                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18374
18375                 /* Pull the dirty status from the pmap, since we cleared the
18376                  * wpmapped bit */
18377                 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18378                         SET_PAGE_DIRTY(m, FALSE);
18379                 }
18380
18381                 /* On to the next page */
18382                 start += PAGE_SIZE;
18383         }
18384         vm_object_unlock(object);
18385
18386         return KERN_SUCCESS;
18387 }
18388 #endif
18389
18390 kern_return_t
18391 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18392 {
18393         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
18394         vm_map_entry_t next_entry;
18395         kern_return_t   kr = KERN_SUCCESS;
18396         vm_map_t        zap_map;
18397
18398         vm_map_lock(map);
18399
18400         /*
18401          * We use a "zap_map" to avoid having to unlock
18402          * the "map" in vm_map_delete().
18403          */
18404         zap_map = vm_map_create(PMAP_NULL,
18405             map->min_offset,
18406             map->max_offset,
18407             map->hdr.entries_pageable);
18408
18409         if (zap_map == VM_MAP_NULL) {
18410                 return KERN_RESOURCE_SHORTAGE;
18411         }
18412
18413         vm_map_set_page_shift(zap_map,
18414             VM_MAP_PAGE_SHIFT(map));
18415         vm_map_disable_hole_optimization(zap_map);
18416
18417         for (entry = vm_map_first_entry(map);
18418             entry != vm_map_to_entry(map);
18419             entry = next_entry) {
18420                 next_entry = entry->vme_next;
18421
18422                 if (VME_OBJECT(entry) &&
18423                     !entry->is_sub_map &&
18424                     (VME_OBJECT(entry)->internal == TRUE) &&
18425                     (VME_OBJECT(entry)->ref_count == 1)) {
18426                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18427                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18428
18429                         (void)vm_map_delete(map,
18430                             entry->vme_start,
18431                             entry->vme_end,
18432                             VM_MAP_REMOVE_SAVE_ENTRIES,
18433                             zap_map);
18434                 }
18435         }
18436
18437         vm_map_unlock(map);
18438
18439         /*
18440          * Get rid of the "zap_maps" and all the map entries that
18441          * they may still contain.
18442          */
18443         if (zap_map != VM_MAP_NULL) {
18444                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18445                 zap_map = VM_MAP_NULL;
18446         }
18447
18448         return kr;
18449 }
18450
18451
18452 #if DEVELOPMENT || DEBUG
18453
18454 int
18455 vm_map_disconnect_page_mappings(
18456         vm_map_t map,
18457         boolean_t do_unnest)
18458 {
18459         vm_map_entry_t entry;
18460         int     page_count = 0;
18461
18462         if (do_unnest == TRUE) {
18463 #ifndef NO_NESTED_PMAP
18464                 vm_map_lock(map);
18465
18466                 for (entry = vm_map_first_entry(map);
18467                     entry != vm_map_to_entry(map);
18468                     entry = entry->vme_next) {
18469                         if (entry->is_sub_map && entry->use_pmap) {
18470                                 /*
18471                                  * Make sure the range between the start of this entry and
18472                                  * the end of this entry is no longer nested, so that
18473                                  * we will only remove mappings from the pmap in use by this
18474                                  * this task
18475                                  */
18476                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18477                         }
18478                 }
18479                 vm_map_unlock(map);
18480 #endif
18481         }
18482         vm_map_lock_read(map);
18483
18484         page_count = map->pmap->stats.resident_count;
18485
18486         for (entry = vm_map_first_entry(map);
18487             entry != vm_map_to_entry(map);
18488             entry = entry->vme_next) {
18489                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18490                     (VME_OBJECT(entry)->phys_contiguous))) {
18491                         continue;
18492                 }
18493                 if (entry->is_sub_map) {
18494                         assert(!entry->use_pmap);
18495                 }
18496
18497                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18498         }
18499         vm_map_unlock_read(map);
18500
18501         return page_count;
18502 }
18503
18504 #endif
18505
18506
18507 #if CONFIG_FREEZE
18508
18509
18510 int c_freezer_swapout_page_count;
18511 int c_freezer_compression_count = 0;
18512 AbsoluteTime c_freezer_last_yield_ts = 0;
18513
18514 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18515 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18516
18517 kern_return_t
18518 vm_map_freeze(
18519         task_t       task,
18520         unsigned int *purgeable_count,
18521         unsigned int *wired_count,
18522         unsigned int *clean_count,
18523         unsigned int *dirty_count,
18524         unsigned int dirty_budget,
18525         unsigned int *shared_count,
18526         int          *freezer_error_code,
18527         boolean_t    eval_only)
18528 {
18529         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
18530         kern_return_t   kr = KERN_SUCCESS;
18531         boolean_t       evaluation_phase = TRUE;
18532         vm_object_t     cur_shared_object = NULL;
18533         int             cur_shared_obj_ref_cnt = 0;
18534         unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18535
18536         *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18537
18538         /*
18539          * We need the exclusive lock here so that we can
18540          * block any page faults or lookups while we are
18541          * in the middle of freezing this vm map.
18542          */
18543         vm_map_t map = task->map;
18544
18545         vm_map_lock(map);
18546
18547         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18548
18549         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18550                 if (vm_compressor_low_on_space()) {
18551                         *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18552                 }
18553
18554                 if (vm_swap_low_on_space()) {
18555                         *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18556                 }
18557
18558                 kr = KERN_NO_SPACE;
18559                 goto done;
18560         }
18561
18562         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18563                 /*
18564                  * In-memory compressor backing the freezer. No disk.
18565                  * So no need to do the evaluation phase.
18566                  */
18567                 evaluation_phase = FALSE;
18568
18569                 if (eval_only == TRUE) {
18570                         /*
18571                          * We don't support 'eval_only' mode
18572                          * in this non-swap config.
18573                          */
18574                         *freezer_error_code = FREEZER_ERROR_GENERIC;
18575                         kr = KERN_INVALID_ARGUMENT;
18576                         goto done;
18577                 }
18578
18579                 c_freezer_compression_count = 0;
18580                 clock_get_uptime(&c_freezer_last_yield_ts);
18581         }
18582 again:
18583
18584         for (entry2 = vm_map_first_entry(map);
18585             entry2 != vm_map_to_entry(map);
18586             entry2 = entry2->vme_next) {
18587                 vm_object_t     src_object = VME_OBJECT(entry2);
18588
18589                 if (src_object &&
18590                     !entry2->is_sub_map &&
18591                     !src_object->phys_contiguous) {
18592                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
18593
18594                         if (src_object->internal == TRUE) {
18595                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18596                                         /*
18597                                          * We skip purgeable objects during evaluation phase only.
18598                                          * If we decide to freeze this process, we'll explicitly
18599                                          * purge these objects before we go around again with
18600                                          * 'evaluation_phase' set to FALSE.
18601                                          */
18602
18603                                         if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18604                                                 /*
18605                                                  * We want to purge objects that may not belong to this task but are mapped
18606                                                  * in this task alone. Since we already purged this task's purgeable memory
18607                                                  * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18608                                                  * on this task's purgeable objects. Hence the check for only volatile objects.
18609                                                  */
18610                                                 if (evaluation_phase == FALSE &&
18611                                                     (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18612                                                     (src_object->ref_count == 1)) {
18613                                                         vm_object_lock(src_object);
18614                                                         vm_object_purge(src_object, 0);
18615                                                         vm_object_unlock(src_object);
18616                                                 }
18617                                                 continue;
18618                                         }
18619
18620                                         /*
18621                                          * Pages belonging to this object could be swapped to disk.
18622                                          * Make sure it's not a shared object because we could end
18623                                          * up just bringing it back in again.
18624                                          *
18625                                          * We try to optimize somewhat by checking for objects that are mapped
18626                                          * more than once within our own map. But we don't do full searches,
18627                                          * we just look at the entries following our current entry.
18628                                          */
18629
18630                                         if (src_object->ref_count > 1) {
18631                                                 if (src_object != cur_shared_object) {
18632                                                         obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18633                                                         dirty_shared_count += obj_pages_snapshot;
18634
18635                                                         cur_shared_object = src_object;
18636                                                         cur_shared_obj_ref_cnt = 1;
18637                                                         continue;
18638                                                 } else {
18639                                                         cur_shared_obj_ref_cnt++;
18640                                                         if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18641                                                                 /*
18642                                                                  * Fall through to below and treat this object as private.
18643                                                                  * So deduct its pages from our shared total and add it to the
18644                                                                  * private total.
18645                                                                  */
18646
18647                                                                 dirty_shared_count -= obj_pages_snapshot;
18648                                                                 dirty_private_count += obj_pages_snapshot;
18649                                                         } else {
18650                                                                 continue;
18651                                                         }
18652                                                 }
18653                                         }
18654
18655
18656                                         if (src_object->ref_count == 1) {
18657                                                 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18658                                         }
18659
18660                                         if (evaluation_phase == TRUE) {
18661                                                 continue;
18662                                         }
18663                                 }
18664
18665                                 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
18666                                 *wired_count += src_object->wired_page_count;
18667
18668                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18669                                         if (vm_compressor_low_on_space()) {
18670                                                 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18671                                         }
18672
18673                                         if (vm_swap_low_on_space()) {
18674                                                 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18675                                         }
18676
18677                                         kr = KERN_NO_SPACE;
18678                                         break;
18679                                 }
18680                                 if (paged_out_count >= dirty_budget) {
18681                                         break;
18682                                 }
18683                                 dirty_budget -= paged_out_count;
18684                         }
18685                 }
18686         }
18687
18688         if (evaluation_phase) {
18689                 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18690
18691                 if (dirty_shared_count > shared_pages_threshold) {
18692                         *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18693                         kr = KERN_FAILURE;
18694                         goto done;
18695                 }
18696
18697                 if (dirty_shared_count &&
18698                     ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18699                         *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18700                         kr = KERN_FAILURE;
18701                         goto done;
18702                 }
18703
18704                 evaluation_phase = FALSE;
18705                 dirty_shared_count = dirty_private_count = 0;
18706
18707                 c_freezer_compression_count = 0;
18708                 clock_get_uptime(&c_freezer_last_yield_ts);
18709
18710                 if (eval_only) {
18711                         kr = KERN_SUCCESS;
18712                         goto done;
18713                 }
18714
18715                 vm_purgeable_purge_task_owned(task);
18716
18717                 goto again;
18718         } else {
18719                 kr = KERN_SUCCESS;
18720                 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18721         }
18722
18723 done:
18724         vm_map_unlock(map);
18725
18726         if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18727                 vm_object_compressed_freezer_done();
18728
18729                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18730                         /*
18731                          * reset the counter tracking the # of swapped compressed pages
18732                          * because we are now done with this freeze session and task.
18733                          */
18734
18735                         *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18736                         c_freezer_swapout_page_count = 0;
18737                 }
18738         }
18739         return kr;
18740 }
18741
18742 #endif
18743
18744 /*
18745  * vm_map_entry_should_cow_for_true_share:
18746  *
18747  * Determines if the map entry should be clipped and setup for copy-on-write
18748  * to avoid applying "true_share" to a large VM object when only a subset is
18749  * targeted.
18750  *
18751  * For now, we target only the map entries created for the Objective C
18752  * Garbage Collector, which initially have the following properties:
18753  *      - alias == VM_MEMORY_MALLOC
18754  *      - wired_count == 0
18755  *      - !needs_copy
18756  * and a VM object with:
18757  *      - internal
18758  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18759  *      - !true_share
18760  *      - vo_size == ANON_CHUNK_SIZE
18761  *
18762  * Only non-kernel map entries.
18763  */
18764 boolean_t
18765 vm_map_entry_should_cow_for_true_share(
18766         vm_map_entry_t  entry)
18767 {
18768         vm_object_t     object;
18769
18770         if (entry->is_sub_map) {
18771                 /* entry does not point at a VM object */
18772                 return FALSE;
18773         }
18774
18775         if (entry->needs_copy) {
18776                 /* already set for copy_on_write: done! */
18777                 return FALSE;
18778         }
18779
18780         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18781             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18782                 /* not a malloc heap or Obj-C Garbage Collector heap */
18783                 return FALSE;
18784         }
18785
18786         if (entry->wired_count) {
18787                 /* wired: can't change the map entry... */
18788                 vm_counters.should_cow_but_wired++;
18789                 return FALSE;
18790         }
18791
18792         object = VME_OBJECT(entry);
18793
18794         if (object == VM_OBJECT_NULL) {
18795                 /* no object yet... */
18796                 return FALSE;
18797         }
18798
18799         if (!object->internal) {
18800                 /* not an internal object */
18801                 return FALSE;
18802         }
18803
18804         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18805                 /* not the default copy strategy */
18806                 return FALSE;
18807         }
18808
18809         if (object->true_share) {
18810                 /* already true_share: too late to avoid it */
18811                 return FALSE;
18812         }
18813
18814         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18815             object->vo_size != ANON_CHUNK_SIZE) {
18816                 /* ... not an object created for the ObjC Garbage Collector */
18817                 return FALSE;
18818         }
18819
18820         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18821             object->vo_size != 2048 * 4096) {
18822                 /* ... not a "MALLOC_SMALL" heap */
18823                 return FALSE;
18824         }
18825
18826         /*
18827          * All the criteria match: we have a large object being targeted for "true_share".
18828          * To limit the adverse side-effects linked with "true_share", tell the caller to
18829          * try and avoid setting up the entire object for "true_share" by clipping the
18830          * targeted range and setting it up for copy-on-write.
18831          */
18832         return TRUE;
18833 }
18834
18835 vm_map_offset_t
18836 vm_map_round_page_mask(
18837         vm_map_offset_t offset,
18838         vm_map_offset_t mask)
18839 {
18840         return VM_MAP_ROUND_PAGE(offset, mask);
18841 }
18842
18843 vm_map_offset_t
18844 vm_map_trunc_page_mask(
18845         vm_map_offset_t offset,
18846         vm_map_offset_t mask)
18847 {
18848         return VM_MAP_TRUNC_PAGE(offset, mask);
18849 }
18850
18851 boolean_t
18852 vm_map_page_aligned(
18853         vm_map_offset_t offset,
18854         vm_map_offset_t mask)
18855 {
18856         return ((offset) & mask) == 0;
18857 }
18858
18859 int
18860 vm_map_page_shift(
18861         vm_map_t map)
18862 {
18863         return VM_MAP_PAGE_SHIFT(map);
18864 }
18865
18866 int
18867 vm_map_page_size(
18868         vm_map_t map)
18869 {
18870         return VM_MAP_PAGE_SIZE(map);
18871 }
18872
18873 vm_map_offset_t
18874 vm_map_page_mask(
18875         vm_map_t map)
18876 {
18877         return VM_MAP_PAGE_MASK(map);
18878 }
18879
18880 kern_return_t
18881 vm_map_set_page_shift(
18882         vm_map_t        map,
18883         int             pageshift)
18884 {
18885         if (map->hdr.nentries != 0) {
18886                 /* too late to change page size */
18887                 return KERN_FAILURE;
18888         }
18889
18890         map->hdr.page_shift = pageshift;
18891
18892         return KERN_SUCCESS;
18893 }
18894
18895 kern_return_t
18896 vm_map_query_volatile(
18897         vm_map_t        map,
18898         mach_vm_size_t  *volatile_virtual_size_p,
18899         mach_vm_size_t  *volatile_resident_size_p,
18900         mach_vm_size_t  *volatile_compressed_size_p,
18901         mach_vm_size_t  *volatile_pmap_size_p,
18902         mach_vm_size_t  *volatile_compressed_pmap_size_p)
18903 {
18904         mach_vm_size_t  volatile_virtual_size;
18905         mach_vm_size_t  volatile_resident_count;
18906         mach_vm_size_t  volatile_compressed_count;
18907         mach_vm_size_t  volatile_pmap_count;
18908         mach_vm_size_t  volatile_compressed_pmap_count;
18909         mach_vm_size_t  resident_count;
18910         vm_map_entry_t  entry;
18911         vm_object_t     object;
18912
18913         /* map should be locked by caller */
18914
18915         volatile_virtual_size = 0;
18916         volatile_resident_count = 0;
18917         volatile_compressed_count = 0;
18918         volatile_pmap_count = 0;
18919         volatile_compressed_pmap_count = 0;
18920
18921         for (entry = vm_map_first_entry(map);
18922             entry != vm_map_to_entry(map);
18923             entry = entry->vme_next) {
18924                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
18925
18926                 if (entry->is_sub_map) {
18927                         continue;
18928                 }
18929                 if (!(entry->protection & VM_PROT_WRITE)) {
18930                         continue;
18931                 }
18932                 object = VME_OBJECT(entry);
18933                 if (object == VM_OBJECT_NULL) {
18934                         continue;
18935                 }
18936                 if (object->purgable != VM_PURGABLE_VOLATILE &&
18937                     object->purgable != VM_PURGABLE_EMPTY) {
18938                         continue;
18939                 }
18940                 if (VME_OFFSET(entry)) {
18941                         /*
18942                          * If the map entry has been split and the object now
18943                          * appears several times in the VM map, we don't want
18944                          * to count the object's resident_page_count more than
18945                          * once.  We count it only for the first one, starting
18946                          * at offset 0 and ignore the other VM map entries.
18947                          */
18948                         continue;
18949                 }
18950                 resident_count = object->resident_page_count;
18951                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18952                         resident_count = 0;
18953                 } else {
18954                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18955                 }
18956
18957                 volatile_virtual_size += entry->vme_end - entry->vme_start;
18958                 volatile_resident_count += resident_count;
18959                 if (object->pager) {
18960                         volatile_compressed_count +=
18961                             vm_compressor_pager_get_count(object->pager);
18962                 }
18963                 pmap_compressed_bytes = 0;
18964                 pmap_resident_bytes =
18965                     pmap_query_resident(map->pmap,
18966                     entry->vme_start,
18967                     entry->vme_end,
18968                     &pmap_compressed_bytes);
18969                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18970                 volatile_compressed_pmap_count += (pmap_compressed_bytes
18971                     / PAGE_SIZE);
18972         }
18973
18974         /* map is still locked on return */
18975
18976         *volatile_virtual_size_p = volatile_virtual_size;
18977         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
18978         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
18979         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
18980         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
18981
18982         return KERN_SUCCESS;
18983 }
18984
18985 void
18986 vm_map_sizes(vm_map_t map,
18987     vm_map_size_t * psize,
18988     vm_map_size_t * pfree,
18989     vm_map_size_t * plargest_free)
18990 {
18991         vm_map_entry_t  entry;
18992         vm_map_offset_t prev;
18993         vm_map_size_t   free, total_free, largest_free;
18994         boolean_t       end;
18995
18996         if (!map) {
18997                 *psize = *pfree = *plargest_free = 0;
18998                 return;
18999         }
19000         total_free = largest_free = 0;
19001
19002         vm_map_lock_read(map);
19003         if (psize) {
19004                 *psize = map->max_offset - map->min_offset;
19005         }
19006
19007         prev = map->min_offset;
19008         for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19009                 end = (entry == vm_map_to_entry(map));
19010
19011                 if (end) {
19012                         free = entry->vme_end   - prev;
19013                 } else {
19014                         free = entry->vme_start - prev;
19015                 }
19016
19017                 total_free += free;
19018                 if (free > largest_free) {
19019                         largest_free = free;
19020                 }
19021
19022                 if (end) {
19023                         break;
19024                 }
19025                 prev = entry->vme_end;
19026         }
19027         vm_map_unlock_read(map);
19028         if (pfree) {
19029                 *pfree = total_free;
19030         }
19031         if (plargest_free) {
19032                 *plargest_free = largest_free;
19033         }
19034 }
19035
19036 #if VM_SCAN_FOR_SHADOW_CHAIN
19037 int vm_map_shadow_max(vm_map_t map);
19038 int
19039 vm_map_shadow_max(
19040         vm_map_t map)
19041 {
19042         int             shadows, shadows_max;
19043         vm_map_entry_t  entry;
19044         vm_object_t     object, next_object;
19045
19046         if (map == NULL) {
19047                 return 0;
19048         }
19049
19050         shadows_max = 0;
19051
19052         vm_map_lock_read(map);
19053
19054         for (entry = vm_map_first_entry(map);
19055             entry != vm_map_to_entry(map);
19056             entry = entry->vme_next) {
19057                 if (entry->is_sub_map) {
19058                         continue;
19059                 }
19060                 object = VME_OBJECT(entry);
19061                 if (object == NULL) {
19062                         continue;
19063                 }
19064                 vm_object_lock_shared(object);
19065                 for (shadows = 0;
19066                     object->shadow != NULL;
19067                     shadows++, object = next_object) {
19068                         next_object = object->shadow;
19069                         vm_object_lock_shared(next_object);
19070                         vm_object_unlock(object);
19071                 }
19072                 vm_object_unlock(object);
19073                 if (shadows > shadows_max) {
19074                         shadows_max = shadows;
19075                 }
19076         }
19077
19078         vm_map_unlock_read(map);
19079
19080         return shadows_max;
19081 }
19082 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19083
19084 void
19085 vm_commit_pagezero_status(vm_map_t lmap)
19086 {
19087         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19088 }
19089
19090 #if !CONFIG_EMBEDDED
19091 void
19092 vm_map_set_high_start(
19093         vm_map_t        map,
19094         vm_map_offset_t high_start)
19095 {
19096         map->vmmap_high_start = high_start;
19097 }
19098 #endif
19099
19100 #if PMAP_CS
19101 kern_return_t
19102 vm_map_entry_cs_associate(
19103         vm_map_t                map,
19104         vm_map_entry_t          entry,
19105         vm_map_kernel_flags_t   vmk_flags)
19106 {
19107         vm_object_t cs_object, cs_shadow;
19108         vm_object_offset_t cs_offset;
19109         void *cs_blobs;
19110         struct vnode *cs_vnode;
19111         kern_return_t cs_ret;
19112
19113         if (map->pmap == NULL ||
19114             entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19115             VME_OBJECT(entry) == VM_OBJECT_NULL ||
19116             !(entry->protection & VM_PROT_EXECUTE)) {
19117                 return KERN_SUCCESS;
19118         }
19119
19120         vm_map_lock_assert_exclusive(map);
19121
19122         if (entry->used_for_jit) {
19123                 cs_ret = pmap_cs_associate(map->pmap,
19124                     PMAP_CS_ASSOCIATE_JIT,
19125                     entry->vme_start,
19126                     entry->vme_end - entry->vme_start);
19127                 goto done;
19128         }
19129
19130         if (vmk_flags.vmkf_remap_prot_copy) {
19131                 cs_ret = pmap_cs_associate(map->pmap,
19132                     PMAP_CS_ASSOCIATE_COW,
19133                     entry->vme_start,
19134                     entry->vme_end - entry->vme_start);
19135                 goto done;
19136         }
19137
19138         vm_object_lock_shared(VME_OBJECT(entry));
19139         cs_offset = VME_OFFSET(entry);
19140         for (cs_object = VME_OBJECT(entry);
19141             (cs_object != VM_OBJECT_NULL &&
19142             !cs_object->code_signed);
19143             cs_object = cs_shadow) {
19144                 cs_shadow = cs_object->shadow;
19145                 if (cs_shadow != VM_OBJECT_NULL) {
19146                         cs_offset += cs_object->vo_shadow_offset;
19147                         vm_object_lock_shared(cs_shadow);
19148                 }
19149                 vm_object_unlock(cs_object);
19150         }
19151         if (cs_object == VM_OBJECT_NULL) {
19152                 return KERN_SUCCESS;
19153         }
19154
19155         cs_offset += cs_object->paging_offset;
19156         cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19157         cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
19158             &cs_blobs);
19159         assert(cs_ret == KERN_SUCCESS);
19160         cs_ret = cs_associate_blob_with_mapping(map->pmap,
19161             entry->vme_start,
19162             (entry->vme_end -
19163             entry->vme_start),
19164             cs_offset,
19165             cs_blobs);
19166         vm_object_unlock(cs_object);
19167         cs_object = VM_OBJECT_NULL;
19168
19169 done:
19170         if (cs_ret == KERN_SUCCESS) {
19171                 DTRACE_VM2(vm_map_entry_cs_associate_success,
19172                     vm_map_offset_t, entry->vme_start,
19173                     vm_map_offset_t, entry->vme_end);
19174                 if (vm_map_executable_immutable) {
19175                         /*
19176                          * Prevent this executable
19177                          * mapping from being unmapped
19178                          * or modified.
19179                          */
19180                         entry->permanent = TRUE;
19181                 }
19182                 /*
19183                  * pmap says it will validate the
19184                  * code-signing validity of pages
19185                  * faulted in via this mapping, so
19186                  * this map entry should be marked so
19187                  * that vm_fault() bypasses code-signing
19188                  * validation for faults coming through
19189                  * this mapping.
19190                  */
19191                 entry->pmap_cs_associated = TRUE;
19192         } else if (cs_ret == KERN_NOT_SUPPORTED) {
19193                 /*
19194                  * pmap won't check the code-signing
19195                  * validity of pages faulted in via
19196                  * this mapping, so VM should keep
19197                  * doing it.
19198                  */
19199                 DTRACE_VM3(vm_map_entry_cs_associate_off,
19200                     vm_map_offset_t, entry->vme_start,
19201                     vm_map_offset_t, entry->vme_end,
19202                     int, cs_ret);
19203         } else {
19204                 /*
19205                  * A real error: do not allow
19206                  * execution in this mapping.
19207                  */
19208                 DTRACE_VM3(vm_map_entry_cs_associate_failure,
19209                     vm_map_offset_t, entry->vme_start,
19210                     vm_map_offset_t, entry->vme_end,
19211                     int, cs_ret);
19212                 entry->protection &= ~VM_PROT_EXECUTE;
19213                 entry->max_protection &= ~VM_PROT_EXECUTE;
19214         }
19215
19216         return cs_ret;
19217 }
19218 #endif /* PMAP_CS */
19219
19220 /*
19221  * FORKED CORPSE FOOTPRINT
19222  *
19223  * A forked corpse gets a copy of the original VM map but its pmap is mostly
19224  * empty since it never ran and never got to fault in any pages.
19225  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19226  * a forked corpse would therefore return very little information.
19227  *
19228  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19229  * to vm_map_fork() to collect footprint information from the original VM map
19230  * and its pmap, and store it in the forked corpse's VM map.  That information
19231  * is stored in place of the VM map's "hole list" since we'll never need to
19232  * lookup for holes in the corpse's map.
19233  *
19234  * The corpse's footprint info looks like this:
19235  *
19236  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19237  * as follows:
19238  *                     +---------------------------------------+
19239  *            header-> | cf_size                               |
19240  *                     +-------------------+-------------------+
19241  *                     | cf_last_region    | cf_last_zeroes    |
19242  *                     +-------------------+-------------------+
19243  *           region1-> | cfr_vaddr                             |
19244  *                     +-------------------+-------------------+
19245  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
19246  *                     +---------------------------------------+
19247  *                     | d4 | d5 | ...                         |
19248  *                     +---------------------------------------+
19249  *                     | ...                                   |
19250  *                     +-------------------+-------------------+
19251  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
19252  *                     +-------------------+-------------------+
19253  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
19254  *                     +---------------------------------------+
19255  *                     | d0 | d1 ...                           |
19256  *                     +---------------------------------------+
19257  *                       ...
19258  *                     +---------------------------------------+
19259  *       last region-> | cfr_vaddr                             |
19260  *                     +---------------------------------------+
19261  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
19262  *                     +---------------------------------------+
19263  *                       ...
19264  *                     +---------------------------------------+
19265  *                     | dx | dy | dz | na | na | na | na | na |
19266  *                     +---------------------------------------+
19267  *
19268  * where:
19269  *      cf_size:        total size of the buffer (rounded to page size)
19270  *      cf_last_region: offset in the buffer of the last "region" sub-header
19271  *      cf_last_zeroes: number of trailing "zero" dispositions at the end
19272  *                      of last region
19273  *      cfr_vaddr:      virtual address of the start of the covered "region"
19274  *      cfr_num_pages:  number of pages in the covered "region"
19275  *      d*:             disposition of the page at that virtual address
19276  * Regions in the buffer are word-aligned.
19277  *
19278  * We estimate the size of the buffer based on the number of memory regions
19279  * and the virtual size of the address space.  While copying each memory region
19280  * during vm_map_fork(), we also collect the footprint info for that region
19281  * and store it in the buffer, packing it as much as possible (coalescing
19282  * contiguous memory regions to avoid having too many region headers and
19283  * avoiding long streaks of "zero" page dispositions by splitting footprint
19284  * "regions", so the number of regions in the footprint buffer might not match
19285  * the number of memory regions in the address space.
19286  *
19287  * We also have to copy the original task's "nonvolatile" ledgers since that's
19288  * part of the footprint and will need to be reported to any tool asking for
19289  * the footprint information of the forked corpse.
19290  */
19291
19292 uint64_t vm_map_corpse_footprint_count = 0;
19293 uint64_t vm_map_corpse_footprint_size_avg = 0;
19294 uint64_t vm_map_corpse_footprint_size_max = 0;
19295 uint64_t vm_map_corpse_footprint_full = 0;
19296 uint64_t vm_map_corpse_footprint_no_buf = 0;
19297
19298 /*
19299  * vm_map_corpse_footprint_new_region:
19300  *      closes the current footprint "region" and creates a new one
19301  *
19302  * Returns NULL if there's not enough space in the buffer for a new region.
19303  */
19304 static struct vm_map_corpse_footprint_region *
19305 vm_map_corpse_footprint_new_region(
19306         struct vm_map_corpse_footprint_header *footprint_header)
19307 {
19308         uintptr_t       footprint_edge;
19309         uint32_t        new_region_offset;
19310         struct vm_map_corpse_footprint_region *footprint_region;
19311         struct vm_map_corpse_footprint_region *new_footprint_region;
19312
19313         footprint_edge = ((uintptr_t)footprint_header +
19314             footprint_header->cf_size);
19315         footprint_region = ((struct vm_map_corpse_footprint_region *)
19316             ((char *)footprint_header +
19317             footprint_header->cf_last_region));
19318         assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19319             footprint_edge);
19320
19321         /* get rid of trailing zeroes in the last region */
19322         assert(footprint_region->cfr_num_pages >=
19323             footprint_header->cf_last_zeroes);
19324         footprint_region->cfr_num_pages -=
19325             footprint_header->cf_last_zeroes;
19326         footprint_header->cf_last_zeroes = 0;
19327
19328         /* reuse this region if it's now empty */
19329         if (footprint_region->cfr_num_pages == 0) {
19330                 return footprint_region;
19331         }
19332
19333         /* compute offset of new region */
19334         new_region_offset = footprint_header->cf_last_region;
19335         new_region_offset += sizeof(*footprint_region);
19336         new_region_offset += footprint_region->cfr_num_pages;
19337         new_region_offset = roundup(new_region_offset, sizeof(int));
19338
19339         /* check if we're going over the edge */
19340         if (((uintptr_t)footprint_header +
19341             new_region_offset +
19342             sizeof(*footprint_region)) >=
19343             footprint_edge) {
19344                 /* over the edge: no new region */
19345                 return NULL;
19346         }
19347
19348         /* adjust offset of last region in header */
19349         footprint_header->cf_last_region = new_region_offset;
19350
19351         new_footprint_region = (struct vm_map_corpse_footprint_region *)
19352             ((char *)footprint_header +
19353             footprint_header->cf_last_region);
19354         new_footprint_region->cfr_vaddr = 0;
19355         new_footprint_region->cfr_num_pages = 0;
19356         /* caller needs to initialize new region */
19357
19358         return new_footprint_region;
19359 }
19360
19361 /*
19362  * vm_map_corpse_footprint_collect:
19363  *      collect footprint information for "old_entry" in "old_map" and
19364  *      stores it in "new_map"'s vmmap_footprint_info.
19365  */
19366 kern_return_t
19367 vm_map_corpse_footprint_collect(
19368         vm_map_t        old_map,
19369         vm_map_entry_t  old_entry,
19370         vm_map_t        new_map)
19371 {
19372         vm_map_offset_t va;
19373         int             disp;
19374         kern_return_t   kr;
19375         struct vm_map_corpse_footprint_header *footprint_header;
19376         struct vm_map_corpse_footprint_region *footprint_region;
19377         struct vm_map_corpse_footprint_region *new_footprint_region;
19378         unsigned char   *next_disp_p;
19379         uintptr_t       footprint_edge;
19380         uint32_t        num_pages_tmp;
19381
19382         va = old_entry->vme_start;
19383
19384         vm_map_lock_assert_exclusive(old_map);
19385         vm_map_lock_assert_exclusive(new_map);
19386
19387         assert(new_map->has_corpse_footprint);
19388         assert(!old_map->has_corpse_footprint);
19389         if (!new_map->has_corpse_footprint ||
19390             old_map->has_corpse_footprint) {
19391                 /*
19392                  * This can only transfer footprint info from a
19393                  * map with a live pmap to a map with a corpse footprint.
19394                  */
19395                 return KERN_NOT_SUPPORTED;
19396         }
19397
19398         if (new_map->vmmap_corpse_footprint == NULL) {
19399                 vm_offset_t     buf;
19400                 vm_size_t       buf_size;
19401
19402                 buf = 0;
19403                 buf_size = (sizeof(*footprint_header) +
19404                     (old_map->hdr.nentries
19405                     *
19406                     (sizeof(*footprint_region) +
19407                     +3))            /* potential alignment for each region */
19408                     +
19409                     ((old_map->size / PAGE_SIZE)
19410                     *
19411                     sizeof(char)));           /* disposition for each page */
19412 //              printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19413                 buf_size = round_page(buf_size);
19414
19415                 /* limit buffer to 1 page to validate overflow detection */
19416 //              buf_size = PAGE_SIZE;
19417
19418                 /* limit size to a somewhat sane amount */
19419 #if CONFIG_EMBEDDED
19420 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
19421 #else /* CONFIG_EMBEDDED */
19422 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
19423 #endif /* CONFIG_EMBEDDED */
19424                 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19425                         buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19426                 }
19427
19428                 /*
19429                  * Allocate the pageable buffer (with a trailing guard page).
19430                  * It will be zero-filled on demand.
19431                  */
19432                 kr = kernel_memory_allocate(kernel_map,
19433                     &buf,
19434                     (buf_size
19435                     + PAGE_SIZE),                          /* trailing guard page */
19436                     0,                         /* mask */
19437                     KMA_PAGEABLE | KMA_GUARD_LAST,
19438                     VM_KERN_MEMORY_DIAG);
19439                 if (kr != KERN_SUCCESS) {
19440                         vm_map_corpse_footprint_no_buf++;
19441                         return kr;
19442                 }
19443
19444                 /* initialize header and 1st region */
19445                 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19446                 new_map->vmmap_corpse_footprint = footprint_header;
19447
19448                 footprint_header->cf_size = buf_size;
19449                 footprint_header->cf_last_region =
19450                     sizeof(*footprint_header);
19451                 footprint_header->cf_last_zeroes = 0;
19452
19453                 footprint_region = (struct vm_map_corpse_footprint_region *)
19454                     ((char *)footprint_header +
19455                     footprint_header->cf_last_region);
19456                 footprint_region->cfr_vaddr = 0;
19457                 footprint_region->cfr_num_pages = 0;
19458         } else {
19459                 /* retrieve header and last region */
19460                 footprint_header = (struct vm_map_corpse_footprint_header *)
19461                     new_map->vmmap_corpse_footprint;
19462                 footprint_region = (struct vm_map_corpse_footprint_region *)
19463                     ((char *)footprint_header +
19464                     footprint_header->cf_last_region);
19465         }
19466         footprint_edge = ((uintptr_t)footprint_header +
19467             footprint_header->cf_size);
19468
19469         if ((footprint_region->cfr_vaddr +
19470             (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19471             PAGE_SIZE))
19472             != old_entry->vme_start) {
19473                 uint64_t num_pages_delta;
19474                 uint32_t region_offset_delta;
19475
19476                 /*
19477                  * Not the next contiguous virtual address:
19478                  * start a new region or store "zero" dispositions for
19479                  * the missing pages?
19480                  */
19481                 /* size of gap in actual page dispositions */
19482                 num_pages_delta = (((old_entry->vme_start -
19483                     footprint_region->cfr_vaddr) / PAGE_SIZE)
19484                     - footprint_region->cfr_num_pages);
19485                 /* size of gap as a new footprint region header */
19486                 region_offset_delta =
19487                     (sizeof(*footprint_region) +
19488                     roundup((footprint_region->cfr_num_pages -
19489                     footprint_header->cf_last_zeroes),
19490                     sizeof(int)) -
19491                     (footprint_region->cfr_num_pages -
19492                     footprint_header->cf_last_zeroes));
19493 //              printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19494                 if (region_offset_delta < num_pages_delta ||
19495                     os_add3_overflow(footprint_region->cfr_num_pages,
19496                     (uint32_t) num_pages_delta,
19497                     1,
19498                     &num_pages_tmp)) {
19499                         /*
19500                          * Storing data for this gap would take more space
19501                          * than inserting a new footprint region header:
19502                          * let's start a new region and save space. If it's a
19503                          * tie, let's avoid using a new region, since that
19504                          * would require more region hops to find the right
19505                          * range during lookups.
19506                          *
19507                          * If the current region's cfr_num_pages would overflow
19508                          * if we added "zero" page dispositions for the gap,
19509                          * no choice but to start a new region.
19510                          */
19511 //                      printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19512                         new_footprint_region =
19513                             vm_map_corpse_footprint_new_region(footprint_header);
19514                         /* check that we're not going over the edge */
19515                         if (new_footprint_region == NULL) {
19516                                 goto over_the_edge;
19517                         }
19518                         footprint_region = new_footprint_region;
19519                         /* initialize new region as empty */
19520                         footprint_region->cfr_vaddr = old_entry->vme_start;
19521                         footprint_region->cfr_num_pages = 0;
19522                 } else {
19523                         /*
19524                          * Store "zero" page dispositions for the missing
19525                          * pages.
19526                          */
19527 //                      printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19528                         for (; num_pages_delta > 0; num_pages_delta--) {
19529                                 next_disp_p =
19530                                     ((unsigned char *) footprint_region +
19531                                     sizeof(*footprint_region) +
19532                                     footprint_region->cfr_num_pages);
19533                                 /* check that we're not going over the edge */
19534                                 if ((uintptr_t)next_disp_p >= footprint_edge) {
19535                                         goto over_the_edge;
19536                                 }
19537                                 /* store "zero" disposition for this gap page */
19538                                 footprint_region->cfr_num_pages++;
19539                                 *next_disp_p = (unsigned char) 0;
19540                                 footprint_header->cf_last_zeroes++;
19541                         }
19542                 }
19543         }
19544
19545         for (va = old_entry->vme_start;
19546             va < old_entry->vme_end;
19547             va += PAGE_SIZE) {
19548                 vm_object_t     object;
19549
19550                 object = VME_OBJECT(old_entry);
19551                 if (!old_entry->is_sub_map &&
19552                     old_entry->iokit_acct &&
19553                     object != VM_OBJECT_NULL &&
19554                     object->internal &&
19555                     object->purgable == VM_PURGABLE_DENY) {
19556                         /*
19557                          * Non-purgeable IOKit memory: phys_footprint
19558                          * includes the entire virtual mapping.
19559                          * Since the forked corpse's VM map entry will not
19560                          * have "iokit_acct", pretend that this page's
19561                          * disposition is "present & internal", so that it
19562                          * shows up in the forked corpse's footprint.
19563                          */
19564                         disp = (PMAP_QUERY_PAGE_PRESENT |
19565                             PMAP_QUERY_PAGE_INTERNAL);
19566                 } else {
19567                         disp = 0;
19568                         pmap_query_page_info(old_map->pmap,
19569                             va,
19570                             &disp);
19571                 }
19572
19573 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19574
19575                 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19576                         /*
19577                          * Ignore "zero" dispositions at start of
19578                          * region: just move start of region.
19579                          */
19580                         footprint_region->cfr_vaddr += PAGE_SIZE;
19581                         continue;
19582                 }
19583
19584                 /* would region's cfr_num_pages overflow? */
19585                 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19586                     &num_pages_tmp)) {
19587                         /* overflow: create a new region */
19588                         new_footprint_region =
19589                             vm_map_corpse_footprint_new_region(
19590                                 footprint_header);
19591                         if (new_footprint_region == NULL) {
19592                                 goto over_the_edge;
19593                         }
19594                         footprint_region = new_footprint_region;
19595                         footprint_region->cfr_vaddr = va;
19596                         footprint_region->cfr_num_pages = 0;
19597                 }
19598
19599                 next_disp_p = ((unsigned char *)footprint_region +
19600                     sizeof(*footprint_region) +
19601                     footprint_region->cfr_num_pages);
19602                 /* check that we're not going over the edge */
19603                 if ((uintptr_t)next_disp_p >= footprint_edge) {
19604                         goto over_the_edge;
19605                 }
19606                 /* store this dispostion */
19607                 *next_disp_p = (unsigned char) disp;
19608                 footprint_region->cfr_num_pages++;
19609
19610                 if (disp != 0) {
19611                         /* non-zero disp: break the current zero streak */
19612                         footprint_header->cf_last_zeroes = 0;
19613                         /* done */
19614                         continue;
19615                 }
19616
19617                 /* zero disp: add to the current streak of zeroes */
19618                 footprint_header->cf_last_zeroes++;
19619                 if ((footprint_header->cf_last_zeroes +
19620                     roundup((footprint_region->cfr_num_pages -
19621                     footprint_header->cf_last_zeroes) &
19622                     (sizeof(int) - 1),
19623                     sizeof(int))) <
19624                     (sizeof(*footprint_header))) {
19625                         /*
19626                          * There are not enough trailing "zero" dispositions
19627                          * (+ the extra padding we would need for the previous
19628                          * region); creating a new region would not save space
19629                          * at this point, so let's keep this "zero" disposition
19630                          * in this region and reconsider later.
19631                          */
19632                         continue;
19633                 }
19634                 /*
19635                  * Create a new region to avoid having too many consecutive
19636                  * "zero" dispositions.
19637                  */
19638                 new_footprint_region =
19639                     vm_map_corpse_footprint_new_region(footprint_header);
19640                 if (new_footprint_region == NULL) {
19641                         goto over_the_edge;
19642                 }
19643                 footprint_region = new_footprint_region;
19644                 /* initialize the new region as empty ... */
19645                 footprint_region->cfr_num_pages = 0;
19646                 /* ... and skip this "zero" disp */
19647                 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19648         }
19649
19650         return KERN_SUCCESS;
19651
19652 over_the_edge:
19653 //      printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19654         vm_map_corpse_footprint_full++;
19655         return KERN_RESOURCE_SHORTAGE;
19656 }
19657
19658 /*
19659  * vm_map_corpse_footprint_collect_done:
19660  *      completes the footprint collection by getting rid of any remaining
19661  *      trailing "zero" dispositions and trimming the unused part of the
19662  *      kernel buffer
19663  */
19664 void
19665 vm_map_corpse_footprint_collect_done(
19666         vm_map_t        new_map)
19667 {
19668         struct vm_map_corpse_footprint_header *footprint_header;
19669         struct vm_map_corpse_footprint_region *footprint_region;
19670         vm_size_t       buf_size, actual_size;
19671         kern_return_t   kr;
19672
19673         assert(new_map->has_corpse_footprint);
19674         if (!new_map->has_corpse_footprint ||
19675             new_map->vmmap_corpse_footprint == NULL) {
19676                 return;
19677         }
19678
19679         footprint_header = (struct vm_map_corpse_footprint_header *)
19680             new_map->vmmap_corpse_footprint;
19681         buf_size = footprint_header->cf_size;
19682
19683         footprint_region = (struct vm_map_corpse_footprint_region *)
19684             ((char *)footprint_header +
19685             footprint_header->cf_last_region);
19686
19687         /* get rid of trailing zeroes in last region */
19688         assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19689         footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19690         footprint_header->cf_last_zeroes = 0;
19691
19692         actual_size = (vm_size_t)(footprint_header->cf_last_region +
19693             sizeof(*footprint_region) +
19694             footprint_region->cfr_num_pages);
19695
19696 //      printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19697         vm_map_corpse_footprint_size_avg =
19698             (((vm_map_corpse_footprint_size_avg *
19699             vm_map_corpse_footprint_count) +
19700             actual_size) /
19701             (vm_map_corpse_footprint_count + 1));
19702         vm_map_corpse_footprint_count++;
19703         if (actual_size > vm_map_corpse_footprint_size_max) {
19704                 vm_map_corpse_footprint_size_max = actual_size;
19705         }
19706
19707         actual_size = round_page(actual_size);
19708         if (buf_size > actual_size) {
19709                 kr = vm_deallocate(kernel_map,
19710                     ((vm_address_t)footprint_header +
19711                     actual_size +
19712                     PAGE_SIZE),                 /* trailing guard page */
19713                     (buf_size - actual_size));
19714                 assertf(kr == KERN_SUCCESS,
19715                     "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19716                     footprint_header,
19717                     (uint64_t) buf_size,
19718                     (uint64_t) actual_size,
19719                     kr);
19720                 kr = vm_protect(kernel_map,
19721                     ((vm_address_t)footprint_header +
19722                     actual_size),
19723                     PAGE_SIZE,
19724                     FALSE,             /* set_maximum */
19725                     VM_PROT_NONE);
19726                 assertf(kr == KERN_SUCCESS,
19727                     "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19728                     footprint_header,
19729                     (uint64_t) buf_size,
19730                     (uint64_t) actual_size,
19731                     kr);
19732         }
19733
19734         footprint_header->cf_size = actual_size;
19735 }
19736
19737 /*
19738  * vm_map_corpse_footprint_query_page_info:
19739  *      retrieves the disposition of the page at virtual address "vaddr"
19740  *      in the forked corpse's VM map
19741  *
19742  * This is the equivalent of pmap_query_page_info() for a forked corpse.
19743  */
19744 kern_return_t
19745 vm_map_corpse_footprint_query_page_info(
19746         vm_map_t        map,
19747         vm_map_offset_t va,
19748         int             *disp)
19749 {
19750         struct vm_map_corpse_footprint_header *footprint_header;
19751         struct vm_map_corpse_footprint_region *footprint_region;
19752         uint32_t        footprint_region_offset;
19753         vm_map_offset_t region_start, region_end;
19754         int             disp_idx;
19755         kern_return_t   kr;
19756
19757         if (!map->has_corpse_footprint) {
19758                 *disp = 0;
19759                 kr = KERN_INVALID_ARGUMENT;
19760                 goto done;
19761         }
19762
19763         footprint_header = map->vmmap_corpse_footprint;
19764         if (footprint_header == NULL) {
19765                 *disp = 0;
19766 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19767                 kr = KERN_INVALID_ARGUMENT;
19768                 goto done;
19769         }
19770
19771         /* start looking at the hint ("cf_hint_region") */
19772         footprint_region_offset = footprint_header->cf_hint_region;
19773
19774 lookup_again:
19775         if (footprint_region_offset < sizeof(*footprint_header)) {
19776                 /* hint too low: start from 1st region */
19777                 footprint_region_offset = sizeof(*footprint_header);
19778         }
19779         if (footprint_region_offset >= footprint_header->cf_last_region) {
19780                 /* hint too high: re-start from 1st region */
19781                 footprint_region_offset = sizeof(*footprint_header);
19782         }
19783         footprint_region = (struct vm_map_corpse_footprint_region *)
19784             ((char *)footprint_header + footprint_region_offset);
19785         region_start = footprint_region->cfr_vaddr;
19786         region_end = (region_start +
19787             ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19788             PAGE_SIZE));
19789         if (va < region_start &&
19790             footprint_region_offset != sizeof(*footprint_header)) {
19791                 /* our range starts before the hint region */
19792
19793                 /* reset the hint (in a racy way...) */
19794                 footprint_header->cf_hint_region = sizeof(*footprint_header);
19795                 /* lookup "va" again from 1st region */
19796                 footprint_region_offset = sizeof(*footprint_header);
19797                 goto lookup_again;
19798         }
19799
19800         while (va >= region_end) {
19801                 if (footprint_region_offset >= footprint_header->cf_last_region) {
19802                         break;
19803                 }
19804                 /* skip the region's header */
19805                 footprint_region_offset += sizeof(*footprint_region);
19806                 /* skip the region's page dispositions */
19807                 footprint_region_offset += footprint_region->cfr_num_pages;
19808                 /* align to next word boundary */
19809                 footprint_region_offset =
19810                     roundup(footprint_region_offset,
19811                     sizeof(int));
19812                 footprint_region = (struct vm_map_corpse_footprint_region *)
19813                     ((char *)footprint_header + footprint_region_offset);
19814                 region_start = footprint_region->cfr_vaddr;
19815                 region_end = (region_start +
19816                     ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19817                     PAGE_SIZE));
19818         }
19819         if (va < region_start || va >= region_end) {
19820                 /* page not found */
19821                 *disp = 0;
19822 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19823                 kr = KERN_SUCCESS;
19824                 goto done;
19825         }
19826
19827         /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19828         footprint_header->cf_hint_region = footprint_region_offset;
19829
19830         /* get page disposition for "va" in this region */
19831         disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19832         *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19833
19834         kr = KERN_SUCCESS;
19835 done:
19836 //      if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19837         /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19838         DTRACE_VM4(footprint_query_page_info,
19839             vm_map_t, map,
19840             vm_map_offset_t, va,
19841             int, *disp,
19842             kern_return_t, kr);
19843
19844         return kr;
19845 }
19846
19847
19848 static void
19849 vm_map_corpse_footprint_destroy(
19850         vm_map_t        map)
19851 {
19852         if (map->has_corpse_footprint &&
19853             map->vmmap_corpse_footprint != 0) {
19854                 struct vm_map_corpse_footprint_header *footprint_header;
19855                 vm_size_t buf_size;
19856                 kern_return_t kr;
19857
19858                 footprint_header = map->vmmap_corpse_footprint;
19859                 buf_size = footprint_header->cf_size;
19860                 kr = vm_deallocate(kernel_map,
19861                     (vm_offset_t) map->vmmap_corpse_footprint,
19862                     ((vm_size_t) buf_size
19863                     + PAGE_SIZE));                 /* trailing guard page */
19864                 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19865                 map->vmmap_corpse_footprint = 0;
19866                 map->has_corpse_footprint = FALSE;
19867         }
19868 }
19869
19870 /*
19871  * vm_map_copy_footprint_ledgers:
19872  *      copies any ledger that's relevant to the memory footprint of "old_task"
19873  *      into the forked corpse's task ("new_task")
19874  */
19875 void
19876 vm_map_copy_footprint_ledgers(
19877         task_t  old_task,
19878         task_t  new_task)
19879 {
19880         vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19881         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19882         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19883         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19884         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19885         vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19886         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19887         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19888         vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19889         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19890         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
19891         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19892         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19893         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19894         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19895         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19896         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19897         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19898         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
19899         vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19900 }
19901
19902 /*
19903  * vm_map_copy_ledger:
19904  *      copy a single ledger from "old_task" to "new_task"
19905  */
19906 void
19907 vm_map_copy_ledger(
19908         task_t  old_task,
19909         task_t  new_task,
19910         int     ledger_entry)
19911 {
19912         ledger_amount_t old_balance, new_balance, delta;
19913
19914         assert(new_task->map->has_corpse_footprint);
19915         if (!new_task->map->has_corpse_footprint) {
19916                 return;
19917         }
19918
19919         /* turn off sanity checks for the ledger we're about to mess with */
19920         ledger_disable_panic_on_negative(new_task->ledger,
19921             ledger_entry);
19922
19923         /* adjust "new_task" to match "old_task" */
19924         ledger_get_balance(old_task->ledger,
19925             ledger_entry,
19926             &old_balance);
19927         ledger_get_balance(new_task->ledger,
19928             ledger_entry,
19929             &new_balance);
19930         if (new_balance == old_balance) {
19931                 /* new == old: done */
19932         } else if (new_balance > old_balance) {
19933                 /* new > old ==> new -= new - old */
19934                 delta = new_balance - old_balance;
19935                 ledger_debit(new_task->ledger,
19936                     ledger_entry,
19937                     delta);
19938         } else {
19939                 /* new < old ==> new += old - new */
19940                 delta = old_balance - new_balance;
19941                 ledger_credit(new_task->ledger,
19942                     ledger_entry,
19943                     delta);
19944         }
19945 }
19946
19947 #if MACH_ASSERT
19948
19949 extern int pmap_ledgers_panic;
19950 extern int pmap_ledgers_panic_leeway;
19951
19952 #define LEDGER_DRIFT(__LEDGER)                    \
19953         int             __LEDGER##_over;          \
19954         ledger_amount_t __LEDGER##_over_total;    \
19955         ledger_amount_t __LEDGER##_over_max;      \
19956         int             __LEDGER##_under;         \
19957         ledger_amount_t __LEDGER##_under_total;   \
19958         ledger_amount_t __LEDGER##_under_max
19959
19960 struct {
19961         uint64_t        num_pmaps_checked;
19962
19963         LEDGER_DRIFT(phys_footprint);
19964         LEDGER_DRIFT(internal);
19965         LEDGER_DRIFT(internal_compressed);
19966         LEDGER_DRIFT(iokit_mapped);
19967         LEDGER_DRIFT(alternate_accounting);
19968         LEDGER_DRIFT(alternate_accounting_compressed);
19969         LEDGER_DRIFT(page_table);
19970         LEDGER_DRIFT(purgeable_volatile);
19971         LEDGER_DRIFT(purgeable_nonvolatile);
19972         LEDGER_DRIFT(purgeable_volatile_compressed);
19973         LEDGER_DRIFT(purgeable_nonvolatile_compressed);
19974         LEDGER_DRIFT(tagged_nofootprint);
19975         LEDGER_DRIFT(tagged_footprint);
19976         LEDGER_DRIFT(tagged_nofootprint_compressed);
19977         LEDGER_DRIFT(tagged_footprint_compressed);
19978         LEDGER_DRIFT(network_volatile);
19979         LEDGER_DRIFT(network_nonvolatile);
19980         LEDGER_DRIFT(network_volatile_compressed);
19981         LEDGER_DRIFT(network_nonvolatile_compressed);
19982         LEDGER_DRIFT(media_nofootprint);
19983         LEDGER_DRIFT(media_footprint);
19984         LEDGER_DRIFT(media_nofootprint_compressed);
19985         LEDGER_DRIFT(media_footprint_compressed);
19986         LEDGER_DRIFT(graphics_nofootprint);
19987         LEDGER_DRIFT(graphics_footprint);
19988         LEDGER_DRIFT(graphics_nofootprint_compressed);
19989         LEDGER_DRIFT(graphics_footprint_compressed);
19990         LEDGER_DRIFT(neural_nofootprint);
19991         LEDGER_DRIFT(neural_footprint);
19992         LEDGER_DRIFT(neural_nofootprint_compressed);
19993         LEDGER_DRIFT(neural_footprint_compressed);
19994 } pmap_ledgers_drift;
19995
19996 void
19997 vm_map_pmap_check_ledgers(
19998         pmap_t          pmap,
19999         ledger_t        ledger,
20000         int             pid,
20001         char            *procname)
20002 {
20003         ledger_amount_t bal;
20004         boolean_t       do_panic;
20005
20006         do_panic = FALSE;
20007
20008         pmap_ledgers_drift.num_pmaps_checked++;
20009
20010 #define LEDGER_CHECK_BALANCE(__LEDGER)                                  \
20011 MACRO_BEGIN                                                             \
20012         int panic_on_negative = TRUE;                                   \
20013         ledger_get_balance(ledger,                                      \
20014                            task_ledgers.__LEDGER,                       \
20015                            &bal);                                       \
20016         ledger_get_panic_on_negative(ledger,                            \
20017                                      task_ledgers.__LEDGER,             \
20018                                      &panic_on_negative);               \
20019         if (bal != 0) {                                                 \
20020                 if (panic_on_negative ||                                \
20021                     (pmap_ledgers_panic &&                              \
20022                      pmap_ledgers_panic_leeway > 0 &&                   \
20023                      (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) ||  \
20024                       bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20025                         do_panic = TRUE;                                \
20026                 }                                                       \
20027                 printf("LEDGER BALANCE proc %d (%s) "                   \
20028                        "\"%s\" = %lld\n",                               \
20029                        pid, procname, #__LEDGER, bal);                  \
20030                 if (bal > 0) {                                          \
20031                         pmap_ledgers_drift.__LEDGER##_over++;           \
20032                         pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20033                         if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20034                                 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20035                         }                                               \
20036                 } else if (bal < 0) {                                   \
20037                         pmap_ledgers_drift.__LEDGER##_under++;          \
20038                         pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20039                         if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20040                                 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20041                         }                                               \
20042                 }                                                       \
20043         }                                                               \
20044 MACRO_END
20045
20046         LEDGER_CHECK_BALANCE(phys_footprint);
20047         LEDGER_CHECK_BALANCE(internal);
20048         LEDGER_CHECK_BALANCE(internal_compressed);
20049         LEDGER_CHECK_BALANCE(iokit_mapped);
20050         LEDGER_CHECK_BALANCE(alternate_accounting);
20051         LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20052         LEDGER_CHECK_BALANCE(page_table);
20053         LEDGER_CHECK_BALANCE(purgeable_volatile);
20054         LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20055         LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20056         LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20057         LEDGER_CHECK_BALANCE(tagged_nofootprint);
20058         LEDGER_CHECK_BALANCE(tagged_footprint);
20059         LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20060         LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20061         LEDGER_CHECK_BALANCE(network_volatile);
20062         LEDGER_CHECK_BALANCE(network_nonvolatile);
20063         LEDGER_CHECK_BALANCE(network_volatile_compressed);
20064         LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20065         LEDGER_CHECK_BALANCE(media_nofootprint);
20066         LEDGER_CHECK_BALANCE(media_footprint);
20067         LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20068         LEDGER_CHECK_BALANCE(media_footprint_compressed);
20069         LEDGER_CHECK_BALANCE(graphics_nofootprint);
20070         LEDGER_CHECK_BALANCE(graphics_footprint);
20071         LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20072         LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20073         LEDGER_CHECK_BALANCE(neural_nofootprint);
20074         LEDGER_CHECK_BALANCE(neural_footprint);
20075         LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20076         LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20077
20078         if (do_panic) {
20079                 if (pmap_ledgers_panic) {
20080                         panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20081                             pmap, pid, procname);
20082                 } else {
20083                         printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20084                             pmap, pid, procname);
20085                 }
20086         }
20087 }
20088 #endif /* MACH_ASSERT */