osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/exc_guard.h>
  88 #include <kern/kalloc.h>
  89 #include <kern/zalloc.h>
  90
  91 #include <vm/cpm.h>
  92 #include <vm/vm_compressor.h>
  93 #include <vm/vm_compressor_pager.h>
  94 #include <vm/vm_init.h>
  95 #include <vm/vm_fault.h>
  96 #include <vm/vm_map.h>
  97 #include <vm/vm_object.h>
  98 #include <vm/vm_page.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/pmap.h>
 101 #include <vm/vm_kern.h>
 102 #include <ipc/ipc_port.h>
 103 #include <kern/sched_prim.h>
 104 #include <kern/misc_protos.h>
 105 #include <kern/xpr.h>
 106
 107 #include <mach/vm_map_server.h>
 108 #include <mach/mach_host_server.h>
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_purgeable_internal.h>
 111
 112 #include <vm/vm_protos.h>
 113 #include <vm/vm_shared_region.h>
 114 #include <vm/vm_map_store.h>
 115
 116 #include <san/kasan.h>
 117
 118 #include <sys/codesign.h>
 119 #include <libkern/section_keywords.h>
 120 #if DEVELOPMENT || DEBUG
 121 extern int proc_selfcsflags(void);
 122 #if CONFIG_EMBEDDED
 123 extern int panic_on_unsigned_execute;
 124 #endif /* CONFIG_EMBEDDED */
 125 #endif /* DEVELOPMENT || DEBUG */
 126
 127 #if __arm64__
 128 extern const int fourk_binary_compatibility_unsafe;
 129 extern const int fourk_binary_compatibility_allow_wx;
 130 #endif /* __arm64__ */
 131 extern int proc_selfpid(void);
 132 extern char *proc_name_address(void *p);
 133
 134 #if VM_MAP_DEBUG_APPLE_PROTECT
 135 int vm_map_debug_apple_protect = 0;
 136 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 137 #if VM_MAP_DEBUG_FOURK
 138 int vm_map_debug_fourk = 0;
 139 #endif /* VM_MAP_DEBUG_FOURK */
 140
 141 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
 142 int vm_map_executable_immutable_verbose = 0;
 143
 144 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 145 /* Internal prototypes
 146  */
 147
 148 static void vm_map_simplify_range(
 149         vm_map_t        map,
 150         vm_map_offset_t start,
 151         vm_map_offset_t end);   /* forward */
 152
 153 static boolean_t        vm_map_range_check(
 154         vm_map_t        map,
 155         vm_map_offset_t start,
 156         vm_map_offset_t end,
 157         vm_map_entry_t  *entry);
 158
 159 static vm_map_entry_t   _vm_map_entry_create(
 160         struct vm_map_header    *map_header, boolean_t map_locked);
 161
 162 static void             _vm_map_entry_dispose(
 163         struct vm_map_header    *map_header,
 164         vm_map_entry_t          entry);
 165
 166 static void             vm_map_pmap_enter(
 167         vm_map_t                map,
 168         vm_map_offset_t         addr,
 169         vm_map_offset_t         end_addr,
 170         vm_object_t             object,
 171         vm_object_offset_t      offset,
 172         vm_prot_t               protection);
 173
 174 static void             _vm_map_clip_end(
 175         struct vm_map_header    *map_header,
 176         vm_map_entry_t          entry,
 177         vm_map_offset_t         end);
 178
 179 static void             _vm_map_clip_start(
 180         struct vm_map_header    *map_header,
 181         vm_map_entry_t          entry,
 182         vm_map_offset_t         start);
 183
 184 static void             vm_map_entry_delete(
 185         vm_map_t        map,
 186         vm_map_entry_t  entry);
 187
 188 static kern_return_t    vm_map_delete(
 189         vm_map_t        map,
 190         vm_map_offset_t start,
 191         vm_map_offset_t end,
 192         int             flags,
 193         vm_map_t        zap_map);
 194
 195 static void             vm_map_copy_insert(
 196         vm_map_t        map,
 197         vm_map_entry_t  after_where,
 198         vm_map_copy_t   copy);
 199
 200 static kern_return_t    vm_map_copy_overwrite_unaligned(
 201         vm_map_t        dst_map,
 202         vm_map_entry_t  entry,
 203         vm_map_copy_t   copy,
 204         vm_map_address_t start,
 205         boolean_t       discard_on_success);
 206
 207 static kern_return_t    vm_map_copy_overwrite_aligned(
 208         vm_map_t        dst_map,
 209         vm_map_entry_t  tmp_entry,
 210         vm_map_copy_t   copy,
 211         vm_map_offset_t start,
 212         pmap_t          pmap);
 213
 214 static kern_return_t    vm_map_copyin_kernel_buffer(
 215         vm_map_t        src_map,
 216         vm_map_address_t src_addr,
 217         vm_map_size_t   len,
 218         boolean_t       src_destroy,
 219         vm_map_copy_t   *copy_result);  /* OUT */
 220
 221 static kern_return_t    vm_map_copyout_kernel_buffer(
 222         vm_map_t        map,
 223         vm_map_address_t *addr, /* IN/OUT */
 224         vm_map_copy_t   copy,
 225         vm_map_size_t   copy_size,
 226         boolean_t       overwrite,
 227         boolean_t       consume_on_success);
 228
 229 static void             vm_map_fork_share(
 230         vm_map_t        old_map,
 231         vm_map_entry_t  old_entry,
 232         vm_map_t        new_map);
 233
 234 static boolean_t        vm_map_fork_copy(
 235         vm_map_t        old_map,
 236         vm_map_entry_t  *old_entry_p,
 237         vm_map_t        new_map,
 238         int             vm_map_copyin_flags);
 239
 240 static kern_return_t    vm_map_wire_nested(
 241         vm_map_t                   map,
 242         vm_map_offset_t            start,
 243         vm_map_offset_t            end,
 244         vm_prot_t                  caller_prot,
 245         vm_tag_t                   tag,
 246         boolean_t                  user_wire,
 247         pmap_t                     map_pmap,
 248         vm_map_offset_t            pmap_addr,
 249         ppnum_t                    *physpage_p);
 250
 251 static kern_return_t    vm_map_unwire_nested(
 252         vm_map_t                   map,
 253         vm_map_offset_t            start,
 254         vm_map_offset_t            end,
 255         boolean_t                  user_wire,
 256         pmap_t                     map_pmap,
 257         vm_map_offset_t            pmap_addr);
 258
 259 static kern_return_t    vm_map_overwrite_submap_recurse(
 260         vm_map_t                   dst_map,
 261         vm_map_offset_t            dst_addr,
 262         vm_map_size_t              dst_size);
 263
 264 static kern_return_t    vm_map_copy_overwrite_nested(
 265         vm_map_t                   dst_map,
 266         vm_map_offset_t            dst_addr,
 267         vm_map_copy_t              copy,
 268         boolean_t                  interruptible,
 269         pmap_t                     pmap,
 270         boolean_t                  discard_on_success);
 271
 272 static kern_return_t    vm_map_remap_extract(
 273         vm_map_t                map,
 274         vm_map_offset_t         addr,
 275         vm_map_size_t           size,
 276         boolean_t               copy,
 277         struct vm_map_header    *map_header,
 278         vm_prot_t               *cur_protection,
 279         vm_prot_t               *max_protection,
 280         vm_inherit_t            inheritance,
 281         boolean_t               pageable,
 282         boolean_t               same_map,
 283         vm_map_kernel_flags_t   vmk_flags);
 284
 285 static kern_return_t    vm_map_remap_range_allocate(
 286         vm_map_t                map,
 287         vm_map_address_t        *address,
 288         vm_map_size_t           size,
 289         vm_map_offset_t         mask,
 290         int                     flags,
 291         vm_map_kernel_flags_t   vmk_flags,
 292         vm_tag_t                tag,
 293         vm_map_entry_t          *map_entry);
 294
 295 static void             vm_map_region_look_for_page(
 296         vm_map_t                   map,
 297         vm_map_offset_t            va,
 298         vm_object_t                object,
 299         vm_object_offset_t         offset,
 300         int                        max_refcnt,
 301         int                        depth,
 302         vm_region_extended_info_t  extended,
 303         mach_msg_type_number_t count);
 304
 305 static int              vm_map_region_count_obj_refs(
 306         vm_map_entry_t             entry,
 307         vm_object_t                object);
 308
 309
 310 static kern_return_t    vm_map_willneed(
 311         vm_map_t        map,
 312         vm_map_offset_t start,
 313         vm_map_offset_t end);
 314
 315 static kern_return_t    vm_map_reuse_pages(
 316         vm_map_t        map,
 317         vm_map_offset_t start,
 318         vm_map_offset_t end);
 319
 320 static kern_return_t    vm_map_reusable_pages(
 321         vm_map_t        map,
 322         vm_map_offset_t start,
 323         vm_map_offset_t end);
 324
 325 static kern_return_t    vm_map_can_reuse(
 326         vm_map_t        map,
 327         vm_map_offset_t start,
 328         vm_map_offset_t end);
 329
 330 #if MACH_ASSERT
 331 static kern_return_t    vm_map_pageout(
 332         vm_map_t        map,
 333         vm_map_offset_t start,
 334         vm_map_offset_t end);
 335 #endif /* MACH_ASSERT */
 336
 337 static void             vm_map_corpse_footprint_destroy(
 338         vm_map_t        map);
 339
 340 pid_t find_largest_process_vm_map_entries(void);
 341
 342 /*
 343  * Macros to copy a vm_map_entry. We must be careful to correctly
 344  * manage the wired page count. vm_map_entry_copy() creates a new
 345  * map entry to the same memory - the wired count in the new entry
 346  * must be set to zero. vm_map_entry_copy_full() creates a new
 347  * entry that is identical to the old entry.  This preserves the
 348  * wire count; it's used for map splitting and zone changing in
 349  * vm_map_copyout.
 350  */
 351
 352 #if CONFIG_EMBEDDED
 353
 354 /*
 355  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 356  * But for security reasons on embedded platforms, we don't want the
 357  * new mapping to be "used for jit", so we always reset the flag here.
 358  * Same for "pmap_cs_associated".
 359  */
 360 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW,OLD)         \
 361 MACRO_BEGIN                                             \
 362         (NEW)->used_for_jit = FALSE;                    \
 363         (NEW)->pmap_cs_associated = FALSE;                              \
 364 MACRO_END
 365
 366 #else /* CONFIG_EMBEDDED */
 367
 368 /*
 369  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 370  * On macOS, the new mapping can be "used for jit".
 371  */
 372 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW,OLD)                         \
 373 MACRO_BEGIN                                                             \
 374         assert((NEW)->used_for_jit == (OLD)->used_for_jit);             \
 375         assert((NEW)->pmap_cs_associated == FALSE);                             \
 376 MACRO_END
 377
 378 #endif /* CONFIG_EMBEDDED */
 379
 380 #define vm_map_entry_copy(NEW,OLD)      \
 381 MACRO_BEGIN                             \
 382 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 383         *(NEW) = *(OLD);                \
 384         (NEW)->is_shared = FALSE;       \
 385         (NEW)->needs_wakeup = FALSE;    \
 386         (NEW)->in_transition = FALSE;   \
 387         (NEW)->wired_count = 0;         \
 388         (NEW)->user_wired_count = 0;    \
 389         (NEW)->permanent = FALSE;       \
 390         VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD));    \
 391         (NEW)->from_reserved_zone = _vmec_reserved;     \
 392         if ((NEW)->iokit_acct) {                        \
 393              assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
 394              (NEW)->iokit_acct = FALSE;                 \
 395              (NEW)->use_pmap = TRUE;                    \
 396         }                                               \
 397         (NEW)->vme_resilient_codesign = FALSE; \
 398         (NEW)->vme_resilient_media = FALSE;     \
 399         (NEW)->vme_atomic = FALSE;      \
 400 MACRO_END
 401
 402 #define vm_map_entry_copy_full(NEW,OLD)                 \
 403 MACRO_BEGIN                                             \
 404 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 405 (*(NEW) = *(OLD));                                      \
 406 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 407 MACRO_END
 408
 409 /*
 410  *      Decide if we want to allow processes to execute from their data or stack areas.
 411  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 412  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 413  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 414  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 415  *      specific pmap files since the default behavior varies according to architecture.  The
 416  *      main reason it varies is because of the need to provide binary compatibility with old
 417  *      applications that were written before these restrictions came into being.  In the old
 418  *      days, an app could execute anything it could read, but this has slowly been tightened
 419  *      up over time.  The default behavior is:
 420  *
 421  *      32-bit PPC apps         may execute from both stack and data areas
 422  *      32-bit Intel apps       may exeucte from data areas but not stack
 423  *      64-bit PPC/Intel apps   may not execute from either data or stack
 424  *
 425  *      An application on any architecture may override these defaults by explicitly
 426  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 427  *      system call.  This code here just determines what happens when an app tries to
 428  *      execute from a page that lacks execute permission.
 429  *
 430  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 431  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 432  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 433  *      execution from data areas for a particular binary even if the arch normally permits it. As
 434  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 435  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 436  *      are not all NX-safe.
 437  */
 438
 439 extern int allow_data_exec, allow_stack_exec;
 440
 441 int
 442 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 443 {
 444         int current_abi;
 445
 446         if (map->pmap == kernel_pmap) return FALSE;
 447
 448         /*
 449          * Determine if the app is running in 32 or 64 bit mode.
 450          */
 451
 452         if (vm_map_is_64bit(map))
 453                 current_abi = VM_ABI_64;
 454         else
 455                 current_abi = VM_ABI_32;
 456
 457         /*
 458          * Determine if we should allow the execution based on whether it's a
 459          * stack or data area and the current architecture.
 460          */
 461
 462         if (user_tag == VM_MEMORY_STACK)
 463                 return allow_stack_exec & current_abi;
 464
 465         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 466 }
 467
 468
 469 /*
 470  *      Virtual memory maps provide for the mapping, protection,
 471  *      and sharing of virtual memory objects.  In addition,
 472  *      this module provides for an efficient virtual copy of
 473  *      memory from one map to another.
 474  *
 475  *      Synchronization is required prior to most operations.
 476  *
 477  *      Maps consist of an ordered doubly-linked list of simple
 478  *      entries; a single hint is used to speed up lookups.
 479  *
 480  *      Sharing maps have been deleted from this version of Mach.
 481  *      All shared objects are now mapped directly into the respective
 482  *      maps.  This requires a change in the copy on write strategy;
 483  *      the asymmetric (delayed) strategy is used for shared temporary
 484  *      objects instead of the symmetric (shadow) strategy.  All maps
 485  *      are now "top level" maps (either task map, kernel map or submap
 486  *      of the kernel map).
 487  *
 488  *      Since portions of maps are specified by start/end addreses,
 489  *      which may not align with existing map entries, all
 490  *      routines merely "clip" entries to these start/end values.
 491  *      [That is, an entry is split into two, bordering at a
 492  *      start or end value.]  Note that these clippings may not
 493  *      always be necessary (as the two resulting entries are then
 494  *      not changed); however, the clipping is done for convenience.
 495  *      No attempt is currently made to "glue back together" two
 496  *      abutting entries.
 497  *
 498  *      The symmetric (shadow) copy strategy implements virtual copy
 499  *      by copying VM object references from one map to
 500  *      another, and then marking both regions as copy-on-write.
 501  *      It is important to note that only one writeable reference
 502  *      to a VM object region exists in any map when this strategy
 503  *      is used -- this means that shadow object creation can be
 504  *      delayed until a write operation occurs.  The symmetric (delayed)
 505  *      strategy allows multiple maps to have writeable references to
 506  *      the same region of a vm object, and hence cannot delay creating
 507  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 508  *      Copying of permanent objects is completely different; see
 509  *      vm_object_copy_strategically() in vm_object.c.
 510  */
 511
 512 static zone_t   vm_map_zone;                            /* zone for vm_map structures */
 513 zone_t                  vm_map_entry_zone;                      /* zone for vm_map_entry structures */
 514 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking allocations */
 515 static zone_t   vm_map_copy_zone;                       /* zone for vm_map_copy structures */
 516 zone_t                  vm_map_holes_zone;                      /* zone for vm map holes (vm_map_links) structures */
 517
 518
 519 /*
 520  *      Placeholder object for submap operations.  This object is dropped
 521  *      into the range by a call to vm_map_find, and removed when
 522  *      vm_map_submap creates the submap.
 523  */
 524
 525 vm_object_t     vm_submap_object;
 526
 527 static void             *map_data;
 528 static vm_size_t        map_data_size;
 529 static void             *kentry_data;
 530 static vm_size_t        kentry_data_size;
 531 static void             *map_holes_data;
 532 static vm_size_t        map_holes_data_size;
 533
 534 #if CONFIG_EMBEDDED
 535 #define         NO_COALESCE_LIMIT  0
 536 #else
 537 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 538 #endif
 539
 540 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 541 unsigned int not_in_kdp = 1;
 542
 543 unsigned int vm_map_set_cache_attr_count = 0;
 544
 545 kern_return_t
 546 vm_map_set_cache_attr(
 547         vm_map_t        map,
 548         vm_map_offset_t va)
 549 {
 550         vm_map_entry_t  map_entry;
 551         vm_object_t     object;
 552         kern_return_t   kr = KERN_SUCCESS;
 553
 554         vm_map_lock_read(map);
 555
 556         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 557             map_entry->is_sub_map) {
 558                 /*
 559                  * that memory is not properly mapped
 560                  */
 561                 kr = KERN_INVALID_ARGUMENT;
 562                 goto done;
 563         }
 564         object = VME_OBJECT(map_entry);
 565
 566         if (object == VM_OBJECT_NULL) {
 567                 /*
 568                  * there should be a VM object here at this point
 569                  */
 570                 kr = KERN_INVALID_ARGUMENT;
 571                 goto done;
 572         }
 573         vm_object_lock(object);
 574         object->set_cache_attr = TRUE;
 575         vm_object_unlock(object);
 576
 577         vm_map_set_cache_attr_count++;
 578 done:
 579         vm_map_unlock_read(map);
 580
 581         return kr;
 582 }
 583
 584
 585 #if CONFIG_CODE_DECRYPTION
 586 /*
 587  * vm_map_apple_protected:
 588  * This remaps the requested part of the object with an object backed by
 589  * the decrypting pager.
 590  * crypt_info contains entry points and session data for the crypt module.
 591  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 592  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 593  */
 594 kern_return_t
 595 vm_map_apple_protected(
 596         vm_map_t                map,
 597         vm_map_offset_t         start,
 598         vm_map_offset_t         end,
 599         vm_object_offset_t      crypto_backing_offset,
 600         struct pager_crypt_info *crypt_info)
 601 {
 602         boolean_t       map_locked;
 603         kern_return_t   kr;
 604         vm_map_entry_t  map_entry;
 605         struct vm_map_entry tmp_entry;
 606         memory_object_t unprotected_mem_obj;
 607         vm_object_t     protected_object;
 608         vm_map_offset_t map_addr;
 609         vm_map_offset_t start_aligned, end_aligned;
 610         vm_object_offset_t      crypto_start, crypto_end;
 611         int             vm_flags;
 612         vm_map_kernel_flags_t vmk_flags;
 613
 614         vm_flags = 0;
 615         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 616
 617         map_locked = FALSE;
 618         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 619
 620         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 621         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 622         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 623         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 624
 625 #if __arm64__
 626         /*
 627          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 628          * so we might have to loop and establish up to 3 mappings:
 629          *
 630          * + the first 16K-page, which might overlap with the previous
 631          *   4K-aligned mapping,
 632          * + the center,
 633          * + the last 16K-page, which might overlap with the next
 634          *   4K-aligned mapping.
 635          * Each of these mapping might be backed by a vnode pager (if
 636          * properly page-aligned) or a "fourk_pager", itself backed by a
 637          * vnode pager (if 4K-aligned but not page-aligned).
 638          */
 639 #else /* __arm64__ */
 640         assert(start_aligned == start);
 641         assert(end_aligned == end);
 642 #endif /* __arm64__ */
 643
 644         map_addr = start_aligned;
 645         for (map_addr = start_aligned;
 646              map_addr < end;
 647              map_addr = tmp_entry.vme_end) {
 648                 vm_map_lock(map);
 649                 map_locked = TRUE;
 650
 651                 /* lookup the protected VM object */
 652                 if (!vm_map_lookup_entry(map,
 653                                          map_addr,
 654                                          &map_entry) ||
 655                     map_entry->is_sub_map ||
 656                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 657                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 658                         /* that memory is not properly mapped */
 659                         kr = KERN_INVALID_ARGUMENT;
 660                         goto done;
 661                 }
 662
 663                 /* get the protected object to be decrypted */
 664                 protected_object = VME_OBJECT(map_entry);
 665                 if (protected_object == VM_OBJECT_NULL) {
 666                         /* there should be a VM object here at this point */
 667                         kr = KERN_INVALID_ARGUMENT;
 668                         goto done;
 669                 }
 670                 /* ensure protected object stays alive while map is unlocked */
 671                 vm_object_reference(protected_object);
 672
 673                 /* limit the map entry to the area we want to cover */
 674                 vm_map_clip_start(map, map_entry, start_aligned);
 675                 vm_map_clip_end(map, map_entry, end_aligned);
 676
 677                 tmp_entry = *map_entry;
 678                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 679                 vm_map_unlock(map);
 680                 map_locked = FALSE;
 681
 682                 /*
 683                  * This map entry might be only partially encrypted
 684                  * (if not fully "page-aligned").
 685                  */
 686                 crypto_start = 0;
 687                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 688                 if (tmp_entry.vme_start < start) {
 689                         if (tmp_entry.vme_start != start_aligned) {
 690                                 kr = KERN_INVALID_ADDRESS;
 691                         }
 692                         crypto_start += (start - tmp_entry.vme_start);
 693                 }
 694                 if (tmp_entry.vme_end > end) {
 695                         if (tmp_entry.vme_end != end_aligned) {
 696                                 kr = KERN_INVALID_ADDRESS;
 697                         }
 698                         crypto_end -= (tmp_entry.vme_end - end);
 699                 }
 700
 701                 /*
 702                  * This "extra backing offset" is needed to get the decryption
 703                  * routine to use the right key.  It adjusts for the possibly
 704                  * relative offset of an interposed "4K" pager...
 705                  */
 706                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 707                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 708                 }
 709
 710                 /*
 711                  * Lookup (and create if necessary) the protected memory object
 712                  * matching that VM object.
 713                  * If successful, this also grabs a reference on the memory object,
 714                  * to guarantee that it doesn't go away before we get a chance to map
 715                  * it.
 716                  */
 717                 unprotected_mem_obj = apple_protect_pager_setup(
 718                         protected_object,
 719                         VME_OFFSET(&tmp_entry),
 720                         crypto_backing_offset,
 721                         crypt_info,
 722                         crypto_start,
 723                         crypto_end);
 724
 725                 /* release extra ref on protected object */
 726                 vm_object_deallocate(protected_object);
 727
 728                 if (unprotected_mem_obj == NULL) {
 729                         kr = KERN_FAILURE;
 730                         goto done;
 731                 }
 732
 733                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 734                 /* can overwrite an immutable mapping */
 735                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 736 #if __arm64__
 737                 if (tmp_entry.used_for_jit &&
 738                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 739                      PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 740                     fourk_binary_compatibility_unsafe &&
 741                     fourk_binary_compatibility_allow_wx) {
 742                         printf("** FOURK_COMPAT [%d]: "
 743                                "allowing write+execute at 0x%llx\n",
 744                                proc_selfpid(), tmp_entry.vme_start);
 745                         vmk_flags.vmkf_map_jit = TRUE;
 746                 }
 747 #endif /* __arm64__ */
 748
 749                 /* map this memory object in place of the current one */
 750                 map_addr = tmp_entry.vme_start;
 751                 kr = vm_map_enter_mem_object(map,
 752                                              &map_addr,
 753                                              (tmp_entry.vme_end -
 754                                               tmp_entry.vme_start),
 755                                              (mach_vm_offset_t) 0,
 756                                              vm_flags,
 757                                              vmk_flags,
 758                                              VM_KERN_MEMORY_NONE,
 759                                              (ipc_port_t)(uintptr_t) unprotected_mem_obj,
 760                                              0,
 761                                              TRUE,
 762                                              tmp_entry.protection,
 763                                              tmp_entry.max_protection,
 764                                              tmp_entry.inheritance);
 765                 assertf(kr == KERN_SUCCESS,
 766                         "kr = 0x%x\n", kr);
 767                 assertf(map_addr == tmp_entry.vme_start,
 768                         "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 769                         (uint64_t)map_addr,
 770                         (uint64_t) tmp_entry.vme_start,
 771                         &tmp_entry);
 772
 773 #if VM_MAP_DEBUG_APPLE_PROTECT
 774                 if (vm_map_debug_apple_protect) {
 775                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 776                                " backing:[object:%p,offset:0x%llx,"
 777                                "crypto_backing_offset:0x%llx,"
 778                                "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 779                                map,
 780                                (uint64_t) map_addr,
 781                                (uint64_t) (map_addr + (tmp_entry.vme_end -
 782                                                        tmp_entry.vme_start)),
 783                                unprotected_mem_obj,
 784                                protected_object,
 785                                VME_OFFSET(&tmp_entry),
 786                                crypto_backing_offset,
 787                                crypto_start,
 788                                crypto_end);
 789                 }
 790 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 791
 792                 /*
 793                  * Release the reference obtained by
 794                  * apple_protect_pager_setup().
 795                  * The mapping (if it succeeded) is now holding a reference on
 796                  * the memory object.
 797                  */
 798                 memory_object_deallocate(unprotected_mem_obj);
 799                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 800
 801                 /* continue with next map entry */
 802                 crypto_backing_offset += (tmp_entry.vme_end -
 803                                           tmp_entry.vme_start);
 804                 crypto_backing_offset -= crypto_start;
 805         }
 806         kr = KERN_SUCCESS;
 807
 808 done:
 809         if (map_locked) {
 810                 vm_map_unlock(map);
 811         }
 812         return kr;
 813 }
 814 #endif  /* CONFIG_CODE_DECRYPTION */
 815
 816
 817 lck_grp_t               vm_map_lck_grp;
 818 lck_grp_attr_t  vm_map_lck_grp_attr;
 819 lck_attr_t              vm_map_lck_attr;
 820 lck_attr_t              vm_map_lck_rw_attr;
 821
 822 #if CONFIG_EMBEDDED
 823 int malloc_no_cow = 1;
 824 #define VM_PROTECT_WX_FAIL 0
 825 #else /* CONFIG_EMBEDDED */
 826 int malloc_no_cow = 0;
 827 #define VM_PROTECT_WX_FAIL 1
 828 #endif /* CONFIG_EMBEDDED */
 829 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
 830
 831 /*
 832  *      vm_map_init:
 833  *
 834  *      Initialize the vm_map module.  Must be called before
 835  *      any other vm_map routines.
 836  *
 837  *      Map and entry structures are allocated from zones -- we must
 838  *      initialize those zones.
 839  *
 840  *      There are three zones of interest:
 841  *
 842  *      vm_map_zone:            used to allocate maps.
 843  *      vm_map_entry_zone:      used to allocate map entries.
 844  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 845  *
 846  *      The kernel allocates map entries from a special zone that is initially
 847  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 848  *      the kernel to allocate more memory to a entry zone when it became
 849  *      empty since the very act of allocating memory implies the creation
 850  *      of a new entry.
 851  */
 852 void
 853 vm_map_init(
 854         void)
 855 {
 856         vm_size_t entry_zone_alloc_size;
 857         const char *mez_name = "VM map entries";
 858
 859         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 860                             PAGE_SIZE, "maps");
 861         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 862 #if     defined(__LP64__)
 863         entry_zone_alloc_size = PAGE_SIZE * 5;
 864 #else
 865         entry_zone_alloc_size = PAGE_SIZE * 6;
 866 #endif
 867         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 868                                   1024*1024, entry_zone_alloc_size,
 869                                   mez_name);
 870         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 871         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 872         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 873
 874         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 875                                    kentry_data_size * 64, kentry_data_size,
 876                                    "Reserved VM map entries");
 877         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 878         /* Don't quarantine because we always need elements available */
 879         zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
 880
 881         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 882                                  16*1024, PAGE_SIZE, "VM map copies");
 883         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 884
 885         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 886                                  16*1024, PAGE_SIZE, "VM map holes");
 887         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 888
 889         /*
 890          *      Cram the map and kentry zones with initial data.
 891          *      Set reserved_zone non-collectible to aid zone_gc().
 892          */
 893         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 894         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 895         zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
 896
 897         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 898         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 899         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 900         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 901         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 902         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 903         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 904
 905         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 906         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 907         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 908         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 909         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 910         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 911
 912         /*
 913          * Add the stolen memory to zones, adjust zone size and stolen counts.
 914          * zcram only up to the maximum number of pages for each zone chunk.
 915          */
 916         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 917
 918         const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
 919         for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
 920                 zcram(vm_map_entry_reserved_zone,
 921                                 (vm_offset_t)kentry_data + off,
 922                                 MIN(kentry_data_size - off, stride));
 923         }
 924         for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
 925                 zcram(vm_map_holes_zone,
 926                                 (vm_offset_t)map_holes_data + off,
 927                                 MIN(map_holes_data_size - off, stride));
 928         }
 929
 930         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 931
 932         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 933         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 934         lck_attr_setdefault(&vm_map_lck_attr);
 935
 936         lck_attr_setdefault(&vm_map_lck_rw_attr);
 937         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 938
 939 #if VM_MAP_DEBUG_APPLE_PROTECT
 940         PE_parse_boot_argn("vm_map_debug_apple_protect",
 941                            &vm_map_debug_apple_protect,
 942                            sizeof(vm_map_debug_apple_protect));
 943 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 944 #if VM_MAP_DEBUG_APPLE_FOURK
 945         PE_parse_boot_argn("vm_map_debug_fourk",
 946                            &vm_map_debug_fourk,
 947                            sizeof(vm_map_debug_fourk));
 948 #endif /* VM_MAP_DEBUG_FOURK */
 949         PE_parse_boot_argn("vm_map_executable_immutable",
 950                            &vm_map_executable_immutable,
 951                            sizeof(vm_map_executable_immutable));
 952         PE_parse_boot_argn("vm_map_executable_immutable_verbose",
 953                            &vm_map_executable_immutable_verbose,
 954                            sizeof(vm_map_executable_immutable_verbose));
 955
 956         PE_parse_boot_argn("malloc_no_cow",
 957                            &malloc_no_cow,
 958                            sizeof(malloc_no_cow));
 959         if (malloc_no_cow) {
 960                 vm_memory_malloc_no_cow_mask = 0ULL;
 961                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
 962                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
 963                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
 964 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
 965 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
 966                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
 967                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
 968                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
 969                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
 970 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
 971                 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
 972                                    &vm_memory_malloc_no_cow_mask,
 973                                    sizeof(vm_memory_malloc_no_cow_mask));
 974         }
 975 }
 976
 977 void
 978 vm_map_steal_memory(
 979         void)
 980 {
 981         uint32_t kentry_initial_pages;
 982
 983         map_data_size = round_page(10 * sizeof(struct _vm_map));
 984         map_data = pmap_steal_memory(map_data_size);
 985
 986         /*
 987          * kentry_initial_pages corresponds to the number of kernel map entries
 988          * required during bootstrap until the asynchronous replenishment
 989          * scheme is activated and/or entries are available from the general
 990          * map entry pool.
 991          */
 992 #if     defined(__LP64__)
 993         kentry_initial_pages = 10;
 994 #else
 995         kentry_initial_pages = 6;
 996 #endif
 997
 998 #if CONFIG_GZALLOC
 999         /* If using the guard allocator, reserve more memory for the kernel
1000          * reserved map entry pool.
1001         */
1002         if (gzalloc_enabled())
1003                 kentry_initial_pages *= 1024;
1004 #endif
1005
1006         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1007         kentry_data = pmap_steal_memory(kentry_data_size);
1008
1009         map_holes_data_size = kentry_data_size;
1010         map_holes_data = pmap_steal_memory(map_holes_data_size);
1011 }
1012
1013 boolean_t vm_map_supports_hole_optimization = FALSE;
1014
1015 void
1016 vm_kernel_reserved_entry_init(void) {
1017         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
1018
1019         /*
1020          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1021          */
1022         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
1023         vm_map_supports_hole_optimization = TRUE;
1024 }
1025
1026 void
1027 vm_map_disable_hole_optimization(vm_map_t map)
1028 {
1029         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1030
1031         if (map->holelistenabled) {
1032
1033                 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1034
1035                 while (hole_entry != NULL) {
1036
1037                         next_hole_entry = hole_entry->vme_next;
1038
1039                         hole_entry->vme_next = NULL;
1040                         hole_entry->vme_prev = NULL;
1041                         zfree(vm_map_holes_zone, hole_entry);
1042
1043                         if (next_hole_entry == head_entry) {
1044                                 hole_entry = NULL;
1045                         } else {
1046                                 hole_entry = next_hole_entry;
1047                         }
1048                 }
1049
1050                 map->holes_list = NULL;
1051                 map->holelistenabled = FALSE;
1052
1053                 map->first_free = vm_map_first_entry(map);
1054                 SAVE_HINT_HOLE_WRITE(map, NULL);
1055         }
1056 }
1057
1058 boolean_t
1059 vm_kernel_map_is_kernel(vm_map_t map) {
1060         return (map->pmap == kernel_pmap);
1061 }
1062
1063 /*
1064  *      vm_map_create:
1065  *
1066  *      Creates and returns a new empty VM map with
1067  *      the given physical map structure, and having
1068  *      the given lower and upper address bounds.
1069  */
1070
1071 vm_map_t
1072 vm_map_create(
1073         pmap_t          pmap,
1074         vm_map_offset_t min,
1075         vm_map_offset_t max,
1076         boolean_t       pageable)
1077 {
1078         int options;
1079
1080         options = 0;
1081         if (pageable) {
1082                 options |= VM_MAP_CREATE_PAGEABLE;
1083         }
1084         return vm_map_create_options(pmap, min, max, options);
1085 }
1086
1087 vm_map_t
1088 vm_map_create_options(
1089         pmap_t          pmap,
1090         vm_map_offset_t min,
1091         vm_map_offset_t max,
1092         int             options)
1093 {
1094         vm_map_t        result;
1095         struct vm_map_links     *hole_entry = NULL;
1096
1097         if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1098                 /* unknown option */
1099                 return VM_MAP_NULL;
1100         }
1101
1102         result = (vm_map_t) zalloc(vm_map_zone);
1103         if (result == VM_MAP_NULL)
1104                 panic("vm_map_create");
1105
1106         vm_map_first_entry(result) = vm_map_to_entry(result);
1107         vm_map_last_entry(result)  = vm_map_to_entry(result);
1108         result->hdr.nentries = 0;
1109         if (options & VM_MAP_CREATE_PAGEABLE) {
1110                 result->hdr.entries_pageable = TRUE;
1111         } else {
1112                 result->hdr.entries_pageable = FALSE;
1113         }
1114
1115         vm_map_store_init( &(result->hdr) );
1116
1117         result->hdr.page_shift = PAGE_SHIFT;
1118
1119         result->size = 0;
1120         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1121         result->user_wire_size  = 0;
1122 #if __x86_64__
1123         result->vmmap_high_start = 0;
1124 #endif /* __x86_64__ */
1125         result->map_refcnt = 1;
1126 #if     TASK_SWAPPER
1127         result->res_count = 1;
1128         result->sw_state = MAP_SW_IN;
1129 #endif  /* TASK_SWAPPER */
1130         result->pmap = pmap;
1131         result->min_offset = min;
1132         result->max_offset = max;
1133         result->wiring_required = FALSE;
1134         result->no_zero_fill = FALSE;
1135         result->mapped_in_other_pmaps = FALSE;
1136         result->wait_for_space = FALSE;
1137         result->switch_protect = FALSE;
1138         result->disable_vmentry_reuse = FALSE;
1139         result->map_disallow_data_exec = FALSE;
1140         result->is_nested_map = FALSE;
1141         result->map_disallow_new_exec = FALSE;
1142         result->highest_entry_end = 0;
1143         result->first_free = vm_map_to_entry(result);
1144         result->hint = vm_map_to_entry(result);
1145         result->jit_entry_exists = FALSE;
1146
1147         /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1148         if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1149                 result->has_corpse_footprint = TRUE;
1150                 result->holelistenabled = FALSE;
1151                 result->vmmap_corpse_footprint = NULL;
1152         } else {
1153                 result->has_corpse_footprint = FALSE;
1154                 if (vm_map_supports_hole_optimization) {
1155                         hole_entry = zalloc(vm_map_holes_zone);
1156
1157                         hole_entry->start = min;
1158 #if defined(__arm__) || defined(__arm64__)
1159                         hole_entry->end = result->max_offset;
1160 #else
1161                         hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1162 #endif
1163                         result->holes_list = result->hole_hint = hole_entry;
1164                         hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1165                         result->holelistenabled = TRUE;
1166                 } else {
1167                         result->holelistenabled = FALSE;
1168                 }
1169         }
1170
1171         vm_map_lock_init(result);
1172         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1173
1174         return(result);
1175 }
1176
1177 /*
1178  *      vm_map_entry_create:    [ internal use only ]
1179  *
1180  *      Allocates a VM map entry for insertion in the
1181  *      given map (or map copy).  No fields are filled.
1182  */
1183 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1184
1185 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1186         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1187 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1188
1189 static vm_map_entry_t
1190 _vm_map_entry_create(
1191         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1192 {
1193         zone_t  zone;
1194         vm_map_entry_t  entry;
1195
1196         zone = vm_map_entry_zone;
1197
1198         assert(map_header->entries_pageable ? !map_locked : TRUE);
1199
1200         if (map_header->entries_pageable) {
1201                 entry = (vm_map_entry_t) zalloc(zone);
1202         }
1203         else {
1204                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1205
1206                 if (entry == VM_MAP_ENTRY_NULL) {
1207                         zone = vm_map_entry_reserved_zone;
1208                         entry = (vm_map_entry_t) zalloc(zone);
1209                         OSAddAtomic(1, &reserved_zalloc_count);
1210                 } else
1211                         OSAddAtomic(1, &nonreserved_zalloc_count);
1212         }
1213
1214         if (entry == VM_MAP_ENTRY_NULL)
1215                 panic("vm_map_entry_create");
1216         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1217
1218         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1219 #if     MAP_ENTRY_CREATION_DEBUG
1220         entry->vme_creation_maphdr = map_header;
1221         backtrace(&entry->vme_creation_bt[0],
1222                   (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1223 #endif
1224         return(entry);
1225 }
1226
1227 /*
1228  *      vm_map_entry_dispose:   [ internal use only ]
1229  *
1230  *      Inverse of vm_map_entry_create.
1231  *
1232  *      write map lock held so no need to
1233  *      do anything special to insure correctness
1234  *      of the stores
1235  */
1236 #define vm_map_entry_dispose(map, entry)                        \
1237         _vm_map_entry_dispose(&(map)->hdr, (entry))
1238
1239 #define vm_map_copy_entry_dispose(map, entry) \
1240         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1241
1242 static void
1243 _vm_map_entry_dispose(
1244         struct vm_map_header    *map_header,
1245         vm_map_entry_t          entry)
1246 {
1247         zone_t          zone;
1248
1249         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1250                 zone = vm_map_entry_zone;
1251         else
1252                 zone = vm_map_entry_reserved_zone;
1253
1254         if (!map_header->entries_pageable) {
1255                 if (zone == vm_map_entry_zone)
1256                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1257                 else
1258                         OSAddAtomic(-1, &reserved_zalloc_count);
1259         }
1260
1261         zfree(zone, entry);
1262 }
1263
1264 #if MACH_ASSERT
1265 static boolean_t first_free_check = FALSE;
1266 boolean_t
1267 first_free_is_valid(
1268         vm_map_t        map)
1269 {
1270         if (!first_free_check)
1271                 return TRUE;
1272
1273         return( first_free_is_valid_store( map ));
1274 }
1275 #endif /* MACH_ASSERT */
1276
1277
1278 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1279         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1280
1281 #define vm_map_copy_entry_unlink(copy, entry)                           \
1282         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1283
1284 #if     MACH_ASSERT && TASK_SWAPPER
1285 /*
1286  *      vm_map_res_reference:
1287  *
1288  *      Adds another valid residence count to the given map.
1289  *
1290  *      Map is locked so this function can be called from
1291  *      vm_map_swapin.
1292  *
1293  */
1294 void vm_map_res_reference(vm_map_t map)
1295 {
1296         /* assert map is locked */
1297         assert(map->res_count >= 0);
1298         assert(map->map_refcnt >= map->res_count);
1299         if (map->res_count == 0) {
1300                 lck_mtx_unlock(&map->s_lock);
1301                 vm_map_lock(map);
1302                 vm_map_swapin(map);
1303                 lck_mtx_lock(&map->s_lock);
1304                 ++map->res_count;
1305                 vm_map_unlock(map);
1306         } else
1307                 ++map->res_count;
1308 }
1309
1310 /*
1311  *      vm_map_reference_swap:
1312  *
1313  *      Adds valid reference and residence counts to the given map.
1314  *
1315  *      The map may not be in memory (i.e. zero residence count).
1316  *
1317  */
1318 void vm_map_reference_swap(vm_map_t map)
1319 {
1320         assert(map != VM_MAP_NULL);
1321         lck_mtx_lock(&map->s_lock);
1322         assert(map->res_count >= 0);
1323         assert(map->map_refcnt >= map->res_count);
1324         map->map_refcnt++;
1325         vm_map_res_reference(map);
1326         lck_mtx_unlock(&map->s_lock);
1327 }
1328
1329 /*
1330  *      vm_map_res_deallocate:
1331  *
1332  *      Decrement residence count on a map; possibly causing swapout.
1333  *
1334  *      The map must be in memory (i.e. non-zero residence count).
1335  *
1336  *      The map is locked, so this function is callable from vm_map_deallocate.
1337  *
1338  */
1339 void vm_map_res_deallocate(vm_map_t map)
1340 {
1341         assert(map->res_count > 0);
1342         if (--map->res_count == 0) {
1343                 lck_mtx_unlock(&map->s_lock);
1344                 vm_map_lock(map);
1345                 vm_map_swapout(map);
1346                 vm_map_unlock(map);
1347                 lck_mtx_lock(&map->s_lock);
1348         }
1349         assert(map->map_refcnt >= map->res_count);
1350 }
1351 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1352
1353 /*
1354  *      vm_map_destroy:
1355  *
1356  *      Actually destroy a map.
1357  */
1358 void
1359 vm_map_destroy(
1360         vm_map_t        map,
1361         int             flags)
1362 {
1363         vm_map_lock(map);
1364
1365         /* final cleanup: no need to unnest shared region */
1366         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1367         /* final cleanup: ok to remove immutable mappings */
1368         flags |= VM_MAP_REMOVE_IMMUTABLE;
1369         /* final cleanup: allow gaps in range */
1370         flags |= VM_MAP_REMOVE_GAPS_OK;
1371
1372         /* clean up regular map entries */
1373         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1374                              flags, VM_MAP_NULL);
1375         /* clean up leftover special mappings (commpage, etc...) */
1376 #if     !defined(__arm__) && !defined(__arm64__)
1377         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1378                              flags, VM_MAP_NULL);
1379 #endif /* !__arm__ && !__arm64__ */
1380
1381         vm_map_disable_hole_optimization(map);
1382         vm_map_corpse_footprint_destroy(map);
1383
1384         vm_map_unlock(map);
1385
1386         assert(map->hdr.nentries == 0);
1387
1388         if(map->pmap)
1389                 pmap_destroy(map->pmap);
1390
1391         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1392                 /*
1393                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1394                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1395                  * structure or kalloc'ed via lck_mtx_init.
1396                  * An example is s_lock_ext within struct _vm_map.
1397                  *
1398                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1399                  * can add another tag to detect embedded vs alloc'ed indirect external
1400                  * mutexes but that'll be additional checks in the lock path and require
1401                  * updating dependencies for the old vs new tag.
1402                  *
1403                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1404                  * just when lock debugging is ON, we choose to forego explicitly destroying
1405                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1406                  * count on vm_map_lck_grp, which has no serious side-effect.
1407                  */
1408         } else {
1409                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1410                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1411         }
1412
1413         zfree(vm_map_zone, map);
1414 }
1415
1416 /*
1417  * Returns pid of the task with the largest number of VM map entries.
1418  * Used in the zone-map-exhaustion jetsam path.
1419  */
1420 pid_t
1421 find_largest_process_vm_map_entries(void)
1422 {
1423         pid_t victim_pid = -1;
1424         int max_vm_map_entries = 0;
1425         task_t task = TASK_NULL;
1426         queue_head_t *task_list = &tasks;
1427
1428         lck_mtx_lock(&tasks_threads_lock);
1429         queue_iterate(task_list, task, task_t, tasks) {
1430                 if (task == kernel_task || !task->active)
1431                         continue;
1432
1433                 vm_map_t task_map = task->map;
1434                 if (task_map != VM_MAP_NULL) {
1435                         int task_vm_map_entries = task_map->hdr.nentries;
1436                         if (task_vm_map_entries > max_vm_map_entries) {
1437                                 max_vm_map_entries = task_vm_map_entries;
1438                                 victim_pid = pid_from_task(task);
1439                         }
1440                 }
1441         }
1442         lck_mtx_unlock(&tasks_threads_lock);
1443
1444         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1445         return victim_pid;
1446 }
1447
1448 #if     TASK_SWAPPER
1449 /*
1450  * vm_map_swapin/vm_map_swapout
1451  *
1452  * Swap a map in and out, either referencing or releasing its resources.
1453  * These functions are internal use only; however, they must be exported
1454  * because they may be called from macros, which are exported.
1455  *
1456  * In the case of swapout, there could be races on the residence count,
1457  * so if the residence count is up, we return, assuming that a
1458  * vm_map_deallocate() call in the near future will bring us back.
1459  *
1460  * Locking:
1461  *      -- We use the map write lock for synchronization among races.
1462  *      -- The map write lock, and not the simple s_lock, protects the
1463  *         swap state of the map.
1464  *      -- If a map entry is a share map, then we hold both locks, in
1465  *         hierarchical order.
1466  *
1467  * Synchronization Notes:
1468  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1469  *      will block on the map lock and proceed when swapout is through.
1470  *      2) A vm_map_reference() call at this time is illegal, and will
1471  *      cause a panic.  vm_map_reference() is only allowed on resident
1472  *      maps, since it refuses to block.
1473  *      3) A vm_map_swapin() call during a swapin will block, and
1474  *      proceeed when the first swapin is done, turning into a nop.
1475  *      This is the reason the res_count is not incremented until
1476  *      after the swapin is complete.
1477  *      4) There is a timing hole after the checks of the res_count, before
1478  *      the map lock is taken, during which a swapin may get the lock
1479  *      before a swapout about to happen.  If this happens, the swapin
1480  *      will detect the state and increment the reference count, causing
1481  *      the swapout to be a nop, thereby delaying it until a later
1482  *      vm_map_deallocate.  If the swapout gets the lock first, then
1483  *      the swapin will simply block until the swapout is done, and
1484  *      then proceed.
1485  *
1486  * Because vm_map_swapin() is potentially an expensive operation, it
1487  * should be used with caution.
1488  *
1489  * Invariants:
1490  *      1) A map with a residence count of zero is either swapped, or
1491  *         being swapped.
1492  *      2) A map with a non-zero residence count is either resident,
1493  *         or being swapped in.
1494  */
1495
1496 int vm_map_swap_enable = 1;
1497
1498 void vm_map_swapin (vm_map_t map)
1499 {
1500         vm_map_entry_t entry;
1501
1502         if (!vm_map_swap_enable)        /* debug */
1503                 return;
1504
1505         /*
1506          * Map is locked
1507          * First deal with various races.
1508          */
1509         if (map->sw_state == MAP_SW_IN)
1510                 /*
1511                  * we raced with swapout and won.  Returning will incr.
1512                  * the res_count, turning the swapout into a nop.
1513                  */
1514                 return;
1515
1516         /*
1517          * The residence count must be zero.  If we raced with another
1518          * swapin, the state would have been IN; if we raced with a
1519          * swapout (after another competing swapin), we must have lost
1520          * the race to get here (see above comment), in which case
1521          * res_count is still 0.
1522          */
1523         assert(map->res_count == 0);
1524
1525         /*
1526          * There are no intermediate states of a map going out or
1527          * coming in, since the map is locked during the transition.
1528          */
1529         assert(map->sw_state == MAP_SW_OUT);
1530
1531         /*
1532          * We now operate upon each map entry.  If the entry is a sub-
1533          * or share-map, we call vm_map_res_reference upon it.
1534          * If the entry is an object, we call vm_object_res_reference
1535          * (this may iterate through the shadow chain).
1536          * Note that we hold the map locked the entire time,
1537          * even if we get back here via a recursive call in
1538          * vm_map_res_reference.
1539          */
1540         entry = vm_map_first_entry(map);
1541
1542         while (entry != vm_map_to_entry(map)) {
1543                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1544                         if (entry->is_sub_map) {
1545                                 vm_map_t lmap = VME_SUBMAP(entry);
1546                                 lck_mtx_lock(&lmap->s_lock);
1547                                 vm_map_res_reference(lmap);
1548                                 lck_mtx_unlock(&lmap->s_lock);
1549                         } else {
1550                                 vm_object_t object = VME_OBEJCT(entry);
1551                                 vm_object_lock(object);
1552                                 /*
1553                                  * This call may iterate through the
1554                                  * shadow chain.
1555                                  */
1556                                 vm_object_res_reference(object);
1557                                 vm_object_unlock(object);
1558                         }
1559                 }
1560                 entry = entry->vme_next;
1561         }
1562         assert(map->sw_state == MAP_SW_OUT);
1563         map->sw_state = MAP_SW_IN;
1564 }
1565
1566 void vm_map_swapout(vm_map_t map)
1567 {
1568         vm_map_entry_t entry;
1569
1570         /*
1571          * Map is locked
1572          * First deal with various races.
1573          * If we raced with a swapin and lost, the residence count
1574          * will have been incremented to 1, and we simply return.
1575          */
1576         lck_mtx_lock(&map->s_lock);
1577         if (map->res_count != 0) {
1578                 lck_mtx_unlock(&map->s_lock);
1579                 return;
1580         }
1581         lck_mtx_unlock(&map->s_lock);
1582
1583         /*
1584          * There are no intermediate states of a map going out or
1585          * coming in, since the map is locked during the transition.
1586          */
1587         assert(map->sw_state == MAP_SW_IN);
1588
1589         if (!vm_map_swap_enable)
1590                 return;
1591
1592         /*
1593          * We now operate upon each map entry.  If the entry is a sub-
1594          * or share-map, we call vm_map_res_deallocate upon it.
1595          * If the entry is an object, we call vm_object_res_deallocate
1596          * (this may iterate through the shadow chain).
1597          * Note that we hold the map locked the entire time,
1598          * even if we get back here via a recursive call in
1599          * vm_map_res_deallocate.
1600          */
1601         entry = vm_map_first_entry(map);
1602
1603         while (entry != vm_map_to_entry(map)) {
1604                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1605                         if (entry->is_sub_map) {
1606                                 vm_map_t lmap = VME_SUBMAP(entry);
1607                                 lck_mtx_lock(&lmap->s_lock);
1608                                 vm_map_res_deallocate(lmap);
1609                                 lck_mtx_unlock(&lmap->s_lock);
1610                         } else {
1611                                 vm_object_t object = VME_OBJECT(entry);
1612                                 vm_object_lock(object);
1613                                 /*
1614                                  * This call may take a long time,
1615                                  * since it could actively push
1616                                  * out pages (if we implement it
1617                                  * that way).
1618                                  */
1619                                 vm_object_res_deallocate(object);
1620                                 vm_object_unlock(object);
1621                         }
1622                 }
1623                 entry = entry->vme_next;
1624         }
1625         assert(map->sw_state == MAP_SW_IN);
1626         map->sw_state = MAP_SW_OUT;
1627 }
1628
1629 #endif  /* TASK_SWAPPER */
1630
1631 /*
1632  *      vm_map_lookup_entry:    [ internal use only ]
1633  *
1634  *      Calls into the vm map store layer to find the map
1635  *      entry containing (or immediately preceding) the
1636  *      specified address in the given map; the entry is returned
1637  *      in the "entry" parameter.  The boolean
1638  *      result indicates whether the address is
1639  *      actually contained in the map.
1640  */
1641 boolean_t
1642 vm_map_lookup_entry(
1643         vm_map_t                map,
1644         vm_map_offset_t address,
1645         vm_map_entry_t          *entry)         /* OUT */
1646 {
1647         return ( vm_map_store_lookup_entry( map, address, entry ));
1648 }
1649
1650 /*
1651  *      Routine:        vm_map_find_space
1652  *      Purpose:
1653  *              Allocate a range in the specified virtual address map,
1654  *              returning the entry allocated for that range.
1655  *              Used by kmem_alloc, etc.
1656  *
1657  *              The map must be NOT be locked. It will be returned locked
1658  *              on KERN_SUCCESS, unlocked on failure.
1659  *
1660  *              If an entry is allocated, the object/offset fields
1661  *              are initialized to zero.
1662  */
1663 kern_return_t
1664 vm_map_find_space(
1665         vm_map_t        map,
1666         vm_map_offset_t         *address,       /* OUT */
1667         vm_map_size_t           size,
1668         vm_map_offset_t         mask,
1669         int                     flags __unused,
1670         vm_map_kernel_flags_t   vmk_flags,
1671         vm_tag_t                tag,
1672         vm_map_entry_t          *o_entry)       /* OUT */
1673 {
1674         vm_map_entry_t                  entry, new_entry;
1675         vm_map_offset_t start;
1676         vm_map_offset_t end;
1677         vm_map_entry_t                  hole_entry;
1678
1679         if (size == 0) {
1680                 *address = 0;
1681                 return KERN_INVALID_ARGUMENT;
1682         }
1683
1684         if (vmk_flags.vmkf_guard_after) {
1685                 /* account for the back guard page in the size */
1686                 size += VM_MAP_PAGE_SIZE(map);
1687         }
1688
1689         new_entry = vm_map_entry_create(map, FALSE);
1690
1691         /*
1692          *      Look for the first possible address; if there's already
1693          *      something at this address, we have to start after it.
1694          */
1695
1696         vm_map_lock(map);
1697
1698         if( map->disable_vmentry_reuse == TRUE) {
1699                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1700         } else {
1701                 if (map->holelistenabled) {
1702                         hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1703
1704                         if (hole_entry == NULL) {
1705                                 /*
1706                                  * No more space in the map?
1707                                  */
1708                                 vm_map_entry_dispose(map, new_entry);
1709                                 vm_map_unlock(map);
1710                                 return(KERN_NO_SPACE);
1711                         }
1712
1713                         entry = hole_entry;
1714                         start = entry->vme_start;
1715                 } else {
1716                         assert(first_free_is_valid(map));
1717                         if ((entry = map->first_free) == vm_map_to_entry(map))
1718                                 start = map->min_offset;
1719                         else
1720                                 start = entry->vme_end;
1721                 }
1722         }
1723
1724         /*
1725          *      In any case, the "entry" always precedes
1726          *      the proposed new region throughout the loop:
1727          */
1728
1729         while (TRUE) {
1730                 vm_map_entry_t  next;
1731
1732                 /*
1733                  *      Find the end of the proposed new region.
1734                  *      Be sure we didn't go beyond the end, or
1735                  *      wrap around the address.
1736                  */
1737
1738                 if (vmk_flags.vmkf_guard_before) {
1739                         /* reserve space for the front guard page */
1740                         start += VM_MAP_PAGE_SIZE(map);
1741                 }
1742                 end = ((start + mask) & ~mask);
1743
1744                 if (end < start) {
1745                         vm_map_entry_dispose(map, new_entry);
1746                         vm_map_unlock(map);
1747                         return(KERN_NO_SPACE);
1748                 }
1749                 start = end;
1750                 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1751                 end += size;
1752                 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1753
1754                 if ((end > map->max_offset) || (end < start)) {
1755                         vm_map_entry_dispose(map, new_entry);
1756                         vm_map_unlock(map);
1757                         return(KERN_NO_SPACE);
1758                 }
1759
1760                 next = entry->vme_next;
1761
1762                 if (map->holelistenabled) {
1763                         if (entry->vme_end >= end)
1764                                 break;
1765                 } else {
1766                         /*
1767                          *      If there are no more entries, we must win.
1768                          *
1769                          *      OR
1770                          *
1771                          *      If there is another entry, it must be
1772                          *      after the end of the potential new region.
1773                          */
1774
1775                         if (next == vm_map_to_entry(map))
1776                                 break;
1777
1778                         if (next->vme_start >= end)
1779                                 break;
1780                 }
1781
1782                 /*
1783                  *      Didn't fit -- move to the next entry.
1784                  */
1785
1786                 entry = next;
1787
1788                 if (map->holelistenabled) {
1789                         if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1790                                 /*
1791                                  * Wrapped around
1792                                  */
1793                                 vm_map_entry_dispose(map, new_entry);
1794                                 vm_map_unlock(map);
1795                                 return(KERN_NO_SPACE);
1796                         }
1797                         start = entry->vme_start;
1798                 } else {
1799                         start = entry->vme_end;
1800                 }
1801         }
1802
1803         if (map->holelistenabled) {
1804                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1805                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1806                 }
1807         }
1808
1809         /*
1810          *      At this point,
1811          *              "start" and "end" should define the endpoints of the
1812          *                      available new range, and
1813          *              "entry" should refer to the region before the new
1814          *                      range, and
1815          *
1816          *              the map should be locked.
1817          */
1818
1819         if (vmk_flags.vmkf_guard_before) {
1820                 /* go back for the front guard page */
1821                 start -= VM_MAP_PAGE_SIZE(map);
1822         }
1823         *address = start;
1824
1825         assert(start < end);
1826         new_entry->vme_start = start;
1827         new_entry->vme_end = end;
1828         assert(page_aligned(new_entry->vme_start));
1829         assert(page_aligned(new_entry->vme_end));
1830         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1831                                    VM_MAP_PAGE_MASK(map)));
1832         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1833                                    VM_MAP_PAGE_MASK(map)));
1834
1835         new_entry->is_shared = FALSE;
1836         new_entry->is_sub_map = FALSE;
1837         new_entry->use_pmap = TRUE;
1838         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1839         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1840
1841         new_entry->needs_copy = FALSE;
1842
1843         new_entry->inheritance = VM_INHERIT_DEFAULT;
1844         new_entry->protection = VM_PROT_DEFAULT;
1845         new_entry->max_protection = VM_PROT_ALL;
1846         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1847         new_entry->wired_count = 0;
1848         new_entry->user_wired_count = 0;
1849
1850         new_entry->in_transition = FALSE;
1851         new_entry->needs_wakeup = FALSE;
1852         new_entry->no_cache = FALSE;
1853         new_entry->permanent = FALSE;
1854         new_entry->superpage_size = FALSE;
1855         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1856                 new_entry->map_aligned = TRUE;
1857         } else {
1858                 new_entry->map_aligned = FALSE;
1859         }
1860
1861         new_entry->used_for_jit = FALSE;
1862         new_entry->pmap_cs_associated = FALSE;
1863         new_entry->zero_wired_pages = FALSE;
1864         new_entry->iokit_acct = FALSE;
1865         new_entry->vme_resilient_codesign = FALSE;
1866         new_entry->vme_resilient_media = FALSE;
1867         if (vmk_flags.vmkf_atomic_entry)
1868                 new_entry->vme_atomic = TRUE;
1869         else
1870                 new_entry->vme_atomic = FALSE;
1871
1872         VME_ALIAS_SET(new_entry, tag);
1873
1874         /*
1875          *      Insert the new entry into the list
1876          */
1877
1878         vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1879
1880         map->size += size;
1881
1882         /*
1883          *      Update the lookup hint
1884          */
1885         SAVE_HINT_MAP_WRITE(map, new_entry);
1886
1887         *o_entry = new_entry;
1888         return(KERN_SUCCESS);
1889 }
1890
1891 int vm_map_pmap_enter_print = FALSE;
1892 int vm_map_pmap_enter_enable = FALSE;
1893
1894 /*
1895  *      Routine:        vm_map_pmap_enter [internal only]
1896  *
1897  *      Description:
1898  *              Force pages from the specified object to be entered into
1899  *              the pmap at the specified address if they are present.
1900  *              As soon as a page not found in the object the scan ends.
1901  *
1902  *      Returns:
1903  *              Nothing.
1904  *
1905  *      In/out conditions:
1906  *              The source map should not be locked on entry.
1907  */
1908 __unused static void
1909 vm_map_pmap_enter(
1910         vm_map_t                map,
1911         vm_map_offset_t         addr,
1912         vm_map_offset_t         end_addr,
1913         vm_object_t             object,
1914         vm_object_offset_t      offset,
1915         vm_prot_t               protection)
1916 {
1917         int                     type_of_fault;
1918         kern_return_t           kr;
1919         struct vm_object_fault_info fault_info = {};
1920
1921         if(map->pmap == 0)
1922                 return;
1923
1924         while (addr < end_addr) {
1925                 vm_page_t       m;
1926
1927
1928                 /*
1929                  * TODO:
1930                  * From vm_map_enter(), we come into this function without the map
1931                  * lock held or the object lock held.
1932                  * We haven't taken a reference on the object either.
1933                  * We should do a proper lookup on the map to make sure
1934                  * that things are sane before we go locking objects that
1935                  * could have been deallocated from under us.
1936                  */
1937
1938                 vm_object_lock(object);
1939
1940                 m = vm_page_lookup(object, offset);
1941
1942                 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
1943                     (m->vmp_unusual && ( m->vmp_error || m->vmp_restart || m->vmp_absent))) {
1944                         vm_object_unlock(object);
1945                         return;
1946                 }
1947
1948                 if (vm_map_pmap_enter_print) {
1949                         printf("vm_map_pmap_enter:");
1950                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1951                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1952                 }
1953                 type_of_fault = DBG_CACHE_HIT_FAULT;
1954                 kr = vm_fault_enter(m, map->pmap,
1955                                     addr, protection, protection,
1956                                     VM_PAGE_WIRED(m),
1957                                     FALSE, /* change_wiring */
1958                                     VM_KERN_MEMORY_NONE, /* tag - not wiring */
1959                                     &fault_info,
1960                                     NULL,  /* need_retry */
1961                                     &type_of_fault);
1962
1963                 vm_object_unlock(object);
1964
1965                 offset += PAGE_SIZE_64;
1966                 addr += PAGE_SIZE;
1967         }
1968 }
1969
1970 boolean_t vm_map_pmap_is_empty(
1971         vm_map_t        map,
1972         vm_map_offset_t start,
1973         vm_map_offset_t end);
1974 boolean_t vm_map_pmap_is_empty(
1975         vm_map_t        map,
1976         vm_map_offset_t start,
1977         vm_map_offset_t end)
1978 {
1979 #ifdef MACHINE_PMAP_IS_EMPTY
1980         return pmap_is_empty(map->pmap, start, end);
1981 #else   /* MACHINE_PMAP_IS_EMPTY */
1982         vm_map_offset_t offset;
1983         ppnum_t         phys_page;
1984
1985         if (map->pmap == NULL) {
1986                 return TRUE;
1987         }
1988
1989         for (offset = start;
1990              offset < end;
1991              offset += PAGE_SIZE) {
1992                 phys_page = pmap_find_phys(map->pmap, offset);
1993                 if (phys_page) {
1994                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1995                                 "page %d at 0x%llx\n",
1996                                 map, (long long)start, (long long)end,
1997                                 phys_page, (long long)offset);
1998                         return FALSE;
1999                 }
2000         }
2001         return TRUE;
2002 #endif  /* MACHINE_PMAP_IS_EMPTY */
2003 }
2004
2005 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2006 kern_return_t
2007 vm_map_random_address_for_size(
2008         vm_map_t        map,
2009         vm_map_offset_t *address,
2010         vm_map_size_t   size)
2011 {
2012         kern_return_t   kr = KERN_SUCCESS;
2013         int             tries = 0;
2014         vm_map_offset_t random_addr = 0;
2015         vm_map_offset_t hole_end;
2016
2017         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2018         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2019         vm_map_size_t   vm_hole_size = 0;
2020         vm_map_size_t   addr_space_size;
2021
2022         addr_space_size = vm_map_max(map) - vm_map_min(map);
2023
2024         assert(page_aligned(size));
2025
2026         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2027                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2028                 random_addr = vm_map_trunc_page(
2029                         vm_map_min(map) +(random_addr % addr_space_size),
2030                         VM_MAP_PAGE_MASK(map));
2031
2032                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2033                         if (prev_entry == vm_map_to_entry(map)) {
2034                                 next_entry = vm_map_first_entry(map);
2035                         } else {
2036                                 next_entry = prev_entry->vme_next;
2037                         }
2038                         if (next_entry == vm_map_to_entry(map)) {
2039                                 hole_end = vm_map_max(map);
2040                         } else {
2041                                 hole_end = next_entry->vme_start;
2042                         }
2043                         vm_hole_size = hole_end - random_addr;
2044                         if (vm_hole_size >= size) {
2045                                 *address = random_addr;
2046                                 break;
2047                         }
2048                 }
2049                 tries++;
2050         }
2051
2052         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2053                 kr = KERN_NO_SPACE;
2054         }
2055         return kr;
2056 }
2057
2058 static boolean_t
2059 vm_memory_malloc_no_cow(
2060         int alias)
2061 {
2062         uint64_t alias_mask;
2063
2064         alias_mask = 1ULL << alias;
2065         if (alias_mask & vm_memory_malloc_no_cow_mask) {
2066                 return TRUE;
2067         }
2068         return FALSE;
2069 }
2070
2071 /*
2072  *      Routine:        vm_map_enter
2073  *
2074  *      Description:
2075  *              Allocate a range in the specified virtual address map.
2076  *              The resulting range will refer to memory defined by
2077  *              the given memory object and offset into that object.
2078  *
2079  *              Arguments are as defined in the vm_map call.
2080  */
2081 int _map_enter_debug = 0;
2082 static unsigned int vm_map_enter_restore_successes = 0;
2083 static unsigned int vm_map_enter_restore_failures = 0;
2084 kern_return_t
2085 vm_map_enter(
2086         vm_map_t                map,
2087         vm_map_offset_t         *address,       /* IN/OUT */
2088         vm_map_size_t           size,
2089         vm_map_offset_t         mask,
2090         int                     flags,
2091         vm_map_kernel_flags_t   vmk_flags,
2092         vm_tag_t                alias,
2093         vm_object_t             object,
2094         vm_object_offset_t      offset,
2095         boolean_t               needs_copy,
2096         vm_prot_t               cur_protection,
2097         vm_prot_t               max_protection,
2098         vm_inherit_t            inheritance)
2099 {
2100         vm_map_entry_t          entry, new_entry;
2101         vm_map_offset_t         start, tmp_start, tmp_offset;
2102         vm_map_offset_t         end, tmp_end;
2103         vm_map_offset_t         tmp2_start, tmp2_end;
2104         vm_map_offset_t         desired_empty_end;
2105         vm_map_offset_t         step;
2106         kern_return_t           result = KERN_SUCCESS;
2107         vm_map_t                zap_old_map = VM_MAP_NULL;
2108         vm_map_t                zap_new_map = VM_MAP_NULL;
2109         boolean_t               map_locked = FALSE;
2110         boolean_t               pmap_empty = TRUE;
2111         boolean_t               new_mapping_established = FALSE;
2112         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2113         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2114         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2115         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2116         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2117         boolean_t               is_submap = vmk_flags.vmkf_submap;
2118         boolean_t               permanent = vmk_flags.vmkf_permanent;
2119         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2120         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2121         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2122         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2123         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2124         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2125         vm_tag_t                user_alias;
2126         vm_map_offset_t         effective_min_offset, effective_max_offset;
2127         kern_return_t           kr;
2128         boolean_t               clear_map_aligned = FALSE;
2129         vm_map_entry_t          hole_entry;
2130         vm_map_size_t           chunk_size = 0;
2131
2132         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2133
2134         if (flags & VM_FLAGS_4GB_CHUNK) {
2135 #if defined(__LP64__)
2136                 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2137 #else /* __LP64__ */
2138                 chunk_size = ANON_CHUNK_SIZE;
2139 #endif /* __LP64__ */
2140         } else {
2141                 chunk_size = ANON_CHUNK_SIZE;
2142         }
2143
2144         if (superpage_size) {
2145                 switch (superpage_size) {
2146                         /*
2147                          * Note that the current implementation only supports
2148                          * a single size for superpages, SUPERPAGE_SIZE, per
2149                          * architecture. As soon as more sizes are supposed
2150                          * to be supported, SUPERPAGE_SIZE has to be replaced
2151                          * with a lookup of the size depending on superpage_size.
2152                          */
2153 #ifdef __x86_64__
2154                         case SUPERPAGE_SIZE_ANY:
2155                                 /* handle it like 2 MB and round up to page size */
2156                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
2157                         case SUPERPAGE_SIZE_2MB:
2158                                 break;
2159 #endif
2160                         default:
2161                                 return KERN_INVALID_ARGUMENT;
2162                 }
2163                 mask = SUPERPAGE_SIZE-1;
2164                 if (size & (SUPERPAGE_SIZE-1))
2165                         return KERN_INVALID_ARGUMENT;
2166                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2167         }
2168
2169
2170         if ((cur_protection & VM_PROT_WRITE) &&
2171             (cur_protection & VM_PROT_EXECUTE) &&
2172 #if !CONFIG_EMBEDDED
2173             map != kernel_map &&
2174             (cs_process_global_enforcement() ||
2175              (vmk_flags.vmkf_cs_enforcement_override
2176               ? vmk_flags.vmkf_cs_enforcement
2177               : cs_process_enforcement(NULL))) &&
2178 #endif /* !CONFIG_EMBEDDED */
2179             !entry_for_jit) {
2180                 DTRACE_VM3(cs_wx,
2181                            uint64_t, 0,
2182                            uint64_t, 0,
2183                            vm_prot_t, cur_protection);
2184                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2185 #if VM_PROTECT_WX_FAIL
2186                        "failing\n",
2187 #else /* VM_PROTECT_WX_FAIL */
2188                        "turning off execute\n",
2189 #endif /* VM_PROTECT_WX_FAIL */
2190                        proc_selfpid(),
2191                        (current_task()->bsd_info
2192                         ? proc_name_address(current_task()->bsd_info)
2193                         : "?"),
2194                        __FUNCTION__);
2195                 cur_protection &= ~VM_PROT_EXECUTE;
2196 #if VM_PROTECT_WX_FAIL
2197                 return KERN_PROTECTION_FAILURE;
2198 #endif /* VM_PROTECT_WX_FAIL */
2199         }
2200
2201         /*
2202          * If the task has requested executable lockdown,
2203          * deny any new executable mapping.
2204          */
2205         if (map->map_disallow_new_exec == TRUE) {
2206                 if (cur_protection & VM_PROT_EXECUTE) {
2207                         return KERN_PROTECTION_FAILURE;
2208                 }
2209         }
2210
2211         if (resilient_codesign || resilient_media) {
2212                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2213                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2214                         return KERN_PROTECTION_FAILURE;
2215                 }
2216         }
2217
2218         if (is_submap) {
2219                 if (purgable) {
2220                         /* submaps can not be purgeable */
2221                         return KERN_INVALID_ARGUMENT;
2222                 }
2223                 if (object == VM_OBJECT_NULL) {
2224                         /* submaps can not be created lazily */
2225                         return KERN_INVALID_ARGUMENT;
2226                 }
2227         }
2228         if (vmk_flags.vmkf_already) {
2229                 /*
2230                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2231                  * is already present.  For it to be meaningul, the requested
2232                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2233                  * we shouldn't try and remove what was mapped there first
2234                  * (!VM_FLAGS_OVERWRITE).
2235                  */
2236                 if ((flags & VM_FLAGS_ANYWHERE) ||
2237                     (flags & VM_FLAGS_OVERWRITE)) {
2238                         return KERN_INVALID_ARGUMENT;
2239                 }
2240         }
2241
2242         effective_min_offset = map->min_offset;
2243
2244         if (vmk_flags.vmkf_beyond_max) {
2245                 /*
2246                  * Allow an insertion beyond the map's max offset.
2247                  */
2248 #if     !defined(__arm__) && !defined(__arm64__)
2249                 if (vm_map_is_64bit(map))
2250                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2251                 else
2252 #endif  /* __arm__ */
2253                         effective_max_offset = 0x00000000FFFFF000ULL;
2254         } else {
2255                 effective_max_offset = map->max_offset;
2256         }
2257
2258         if (size == 0 ||
2259             (offset & PAGE_MASK_64) != 0) {
2260                 *address = 0;
2261                 return KERN_INVALID_ARGUMENT;
2262         }
2263
2264         if (map->pmap == kernel_pmap) {
2265                 user_alias = VM_KERN_MEMORY_NONE;
2266         } else {
2267                 user_alias = alias;
2268         }
2269
2270 #define RETURN(value)   { result = value; goto BailOut; }
2271
2272         assert(page_aligned(*address));
2273         assert(page_aligned(size));
2274
2275         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2276                 /*
2277                  * In most cases, the caller rounds the size up to the
2278                  * map's page size.
2279                  * If we get a size that is explicitly not map-aligned here,
2280                  * we'll have to respect the caller's wish and mark the
2281                  * mapping as "not map-aligned" to avoid tripping the
2282                  * map alignment checks later.
2283                  */
2284                 clear_map_aligned = TRUE;
2285         }
2286         if (!anywhere &&
2287             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2288                 /*
2289                  * We've been asked to map at a fixed address and that
2290                  * address is not aligned to the map's specific alignment.
2291                  * The caller should know what it's doing (i.e. most likely
2292                  * mapping some fragmented copy map, transferring memory from
2293                  * a VM map with a different alignment), so clear map_aligned
2294                  * for this new VM map entry and proceed.
2295                  */
2296                 clear_map_aligned = TRUE;
2297         }
2298
2299         /*
2300          * Only zero-fill objects are allowed to be purgable.
2301          * LP64todo - limit purgable objects to 32-bits for now
2302          */
2303         if (purgable &&
2304             (offset != 0 ||
2305              (object != VM_OBJECT_NULL &&
2306               (object->vo_size != size ||
2307                object->purgable == VM_PURGABLE_DENY))
2308              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2309                 return KERN_INVALID_ARGUMENT;
2310
2311         if (!anywhere && overwrite) {
2312                 /*
2313                  * Create a temporary VM map to hold the old mappings in the
2314                  * affected area while we create the new one.
2315                  * This avoids releasing the VM map lock in
2316                  * vm_map_entry_delete() and allows atomicity
2317                  * when we want to replace some mappings with a new one.
2318                  * It also allows us to restore the old VM mappings if the
2319                  * new mapping fails.
2320                  */
2321                 zap_old_map = vm_map_create(PMAP_NULL,
2322                                             *address,
2323                                             *address + size,
2324                                             map->hdr.entries_pageable);
2325                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2326                 vm_map_disable_hole_optimization(zap_old_map);
2327         }
2328
2329 StartAgain: ;
2330
2331         start = *address;
2332
2333         if (anywhere) {
2334                 vm_map_lock(map);
2335                 map_locked = TRUE;
2336
2337                 if (entry_for_jit) {
2338 #if CONFIG_EMBEDDED
2339                         if (map->jit_entry_exists) {
2340                                 result = KERN_INVALID_ARGUMENT;
2341                                 goto BailOut;
2342                         }
2343                         random_address = TRUE;
2344 #endif /* CONFIG_EMBEDDED */
2345                 }
2346
2347                 if (random_address) {
2348                         /*
2349                          * Get a random start address.
2350                          */
2351                         result = vm_map_random_address_for_size(map, address, size);
2352                         if (result != KERN_SUCCESS) {
2353                                 goto BailOut;
2354                         }
2355                         start = *address;
2356                 }
2357 #if __x86_64__
2358                 else if ((start == 0 || start == vm_map_min(map)) &&
2359                          !map->disable_vmentry_reuse &&
2360                          map->vmmap_high_start != 0) {
2361                         start = map->vmmap_high_start;
2362                 }
2363 #endif /* __x86_64__ */
2364
2365
2366                 /*
2367                  *      Calculate the first possible address.
2368                  */
2369
2370                 if (start < effective_min_offset)
2371                         start = effective_min_offset;
2372                 if (start > effective_max_offset)
2373                         RETURN(KERN_NO_SPACE);
2374
2375                 /*
2376                  *      Look for the first possible address;
2377                  *      if there's already something at this
2378                  *      address, we have to start after it.
2379                  */
2380
2381                 if( map->disable_vmentry_reuse == TRUE) {
2382                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2383                 } else {
2384
2385                         if (map->holelistenabled) {
2386                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2387
2388                                 if (hole_entry == NULL) {
2389                                         /*
2390                                          * No more space in the map?
2391                                          */
2392                                         result = KERN_NO_SPACE;
2393                                         goto BailOut;
2394                                 } else {
2395
2396                                         boolean_t found_hole = FALSE;
2397
2398                                         do {
2399                                                 if (hole_entry->vme_start >= start) {
2400                                                         start = hole_entry->vme_start;
2401                                                         found_hole = TRUE;
2402                                                         break;
2403                                                 }
2404
2405                                                 if (hole_entry->vme_end > start) {
2406                                                         found_hole = TRUE;
2407                                                         break;
2408                                                 }
2409                                                 hole_entry = hole_entry->vme_next;
2410
2411                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2412
2413                                         if (found_hole == FALSE) {
2414                                                 result = KERN_NO_SPACE;
2415                                                 goto BailOut;
2416                                         }
2417
2418                                         entry = hole_entry;
2419
2420                                         if (start == 0)
2421                                                 start += PAGE_SIZE_64;
2422                                 }
2423                         } else {
2424                                 assert(first_free_is_valid(map));
2425
2426                                 entry = map->first_free;
2427
2428                                 if (entry == vm_map_to_entry(map)) {
2429                                         entry = NULL;
2430                                 } else {
2431                                        if (entry->vme_next == vm_map_to_entry(map)){
2432                                                /*
2433                                                 * Hole at the end of the map.
2434                                                 */
2435                                                 entry = NULL;
2436                                        } else {
2437                                                 if (start < (entry->vme_next)->vme_start ) {
2438                                                         start = entry->vme_end;
2439                                                         start = vm_map_round_page(start,
2440                                                                                   VM_MAP_PAGE_MASK(map));
2441                                                 } else {
2442                                                         /*
2443                                                          * Need to do a lookup.
2444                                                          */
2445                                                         entry = NULL;
2446                                                 }
2447                                        }
2448                                 }
2449
2450                                 if (entry == NULL) {
2451                                         vm_map_entry_t  tmp_entry;
2452                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2453                                                 assert(!entry_for_jit);
2454                                                 start = tmp_entry->vme_end;
2455                                                 start = vm_map_round_page(start,
2456                                                                           VM_MAP_PAGE_MASK(map));
2457                                         }
2458                                         entry = tmp_entry;
2459                                 }
2460                         }
2461                 }
2462
2463                 /*
2464                  *      In any case, the "entry" always precedes
2465                  *      the proposed new region throughout the
2466                  *      loop:
2467                  */
2468
2469                 while (TRUE) {
2470                         vm_map_entry_t  next;
2471
2472                         /*
2473                          *      Find the end of the proposed new region.
2474                          *      Be sure we didn't go beyond the end, or
2475                          *      wrap around the address.
2476                          */
2477
2478                         end = ((start + mask) & ~mask);
2479                         end = vm_map_round_page(end,
2480                                                 VM_MAP_PAGE_MASK(map));
2481                         if (end < start)
2482                                 RETURN(KERN_NO_SPACE);
2483                         start = end;
2484                         assert(VM_MAP_PAGE_ALIGNED(start,
2485                                                    VM_MAP_PAGE_MASK(map)));
2486                         end += size;
2487
2488                         /* We want an entire page of empty space, but don't increase the allocation size. */
2489                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2490
2491                         if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2492                                 if (map->wait_for_space) {
2493                                         assert(!keep_map_locked);
2494                                         if (size <= (effective_max_offset -
2495                                                      effective_min_offset)) {
2496                                                 assert_wait((event_t)map,
2497                                                             THREAD_ABORTSAFE);
2498                                                 vm_map_unlock(map);
2499                                                 map_locked = FALSE;
2500                                                 thread_block(THREAD_CONTINUE_NULL);
2501                                                 goto StartAgain;
2502                                         }
2503                                 }
2504                                 RETURN(KERN_NO_SPACE);
2505                         }
2506
2507                         next = entry->vme_next;
2508
2509                         if (map->holelistenabled) {
2510                                 if (entry->vme_end >= desired_empty_end)
2511                                         break;
2512                         } else {
2513                                 /*
2514                                  *      If there are no more entries, we must win.
2515                                  *
2516                                  *      OR
2517                                  *
2518                                  *      If there is another entry, it must be
2519                                  *      after the end of the potential new region.
2520                                  */
2521
2522                                 if (next == vm_map_to_entry(map))
2523                                         break;
2524
2525                                 if (next->vme_start >= desired_empty_end)
2526                                         break;
2527                         }
2528
2529                         /*
2530                          *      Didn't fit -- move to the next entry.
2531                          */
2532
2533                         entry = next;
2534
2535                         if (map->holelistenabled) {
2536                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2537                                         /*
2538                                          * Wrapped around
2539                                          */
2540                                         result = KERN_NO_SPACE;
2541                                         goto BailOut;
2542                                 }
2543                                 start = entry->vme_start;
2544                         } else {
2545                                 start = entry->vme_end;
2546                         }
2547
2548                         start = vm_map_round_page(start,
2549                                                   VM_MAP_PAGE_MASK(map));
2550                 }
2551
2552                 if (map->holelistenabled) {
2553                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2554                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2555                         }
2556                 }
2557
2558                 *address = start;
2559                 assert(VM_MAP_PAGE_ALIGNED(*address,
2560                                            VM_MAP_PAGE_MASK(map)));
2561         } else {
2562                 /*
2563                  *      Verify that:
2564                  *              the address doesn't itself violate
2565                  *              the mask requirement.
2566                  */
2567
2568                 vm_map_lock(map);
2569                 map_locked = TRUE;
2570                 if ((start & mask) != 0)
2571                         RETURN(KERN_NO_SPACE);
2572
2573                 /*
2574                  *      ...     the address is within bounds
2575                  */
2576
2577                 end = start + size;
2578
2579                 if ((start < effective_min_offset) ||
2580                     (end > effective_max_offset) ||
2581                     (start >= end)) {
2582                         RETURN(KERN_INVALID_ADDRESS);
2583                 }
2584
2585                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2586                         int remove_flags;
2587                         /*
2588                          * Fixed mapping and "overwrite" flag: attempt to
2589                          * remove all existing mappings in the specified
2590                          * address range, saving them in our "zap_old_map".
2591                          */
2592                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2593                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2594                         if (vmk_flags.vmkf_overwrite_immutable) {
2595                                 /* we can overwrite immutable mappings */
2596                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2597                         }
2598                         (void) vm_map_delete(map, start, end,
2599                                              remove_flags,
2600                                              zap_old_map);
2601                 }
2602
2603                 /*
2604                  *      ...     the starting address isn't allocated
2605                  */
2606
2607                 if (vm_map_lookup_entry(map, start, &entry)) {
2608                         if (! (vmk_flags.vmkf_already)) {
2609                                 RETURN(KERN_NO_SPACE);
2610                         }
2611                         /*
2612                          * Check if what's already there is what we want.
2613                          */
2614                         tmp_start = start;
2615                         tmp_offset = offset;
2616                         if (entry->vme_start < start) {
2617                                 tmp_start -= start - entry->vme_start;
2618                                 tmp_offset -= start - entry->vme_start;
2619
2620                         }
2621                         for (; entry->vme_start < end;
2622                              entry = entry->vme_next) {
2623                                 /*
2624                                  * Check if the mapping's attributes
2625                                  * match the existing map entry.
2626                                  */
2627                                 if (entry == vm_map_to_entry(map) ||
2628                                     entry->vme_start != tmp_start ||
2629                                     entry->is_sub_map != is_submap ||
2630                                     VME_OFFSET(entry) != tmp_offset ||
2631                                     entry->needs_copy != needs_copy ||
2632                                     entry->protection != cur_protection ||
2633                                     entry->max_protection != max_protection ||
2634                                     entry->inheritance != inheritance ||
2635                                     entry->iokit_acct != iokit_acct ||
2636                                     VME_ALIAS(entry) != alias) {
2637                                         /* not the same mapping ! */
2638                                         RETURN(KERN_NO_SPACE);
2639                                 }
2640                                 /*
2641                                  * Check if the same object is being mapped.
2642                                  */
2643                                 if (is_submap) {
2644                                         if (VME_SUBMAP(entry) !=
2645                                             (vm_map_t) object) {
2646                                                 /* not the same submap */
2647                                                 RETURN(KERN_NO_SPACE);
2648                                         }
2649                                 } else {
2650                                         if (VME_OBJECT(entry) != object) {
2651                                                 /* not the same VM object... */
2652                                                 vm_object_t obj2;
2653
2654                                                 obj2 = VME_OBJECT(entry);
2655                                                 if ((obj2 == VM_OBJECT_NULL ||
2656                                                      obj2->internal) &&
2657                                                     (object == VM_OBJECT_NULL ||
2658                                                      object->internal)) {
2659                                                         /*
2660                                                          * ... but both are
2661                                                          * anonymous memory,
2662                                                          * so equivalent.
2663                                                          */
2664                                                 } else {
2665                                                         RETURN(KERN_NO_SPACE);
2666                                                 }
2667                                         }
2668                                 }
2669
2670                                 tmp_offset += entry->vme_end - entry->vme_start;
2671                                 tmp_start += entry->vme_end - entry->vme_start;
2672                                 if (entry->vme_end >= end) {
2673                                         /* reached the end of our mapping */
2674                                         break;
2675                                 }
2676                         }
2677                         /* it all matches:  let's use what's already there ! */
2678                         RETURN(KERN_MEMORY_PRESENT);
2679                 }
2680
2681                 /*
2682                  *      ...     the next region doesn't overlap the
2683                  *              end point.
2684                  */
2685
2686                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2687                     (entry->vme_next->vme_start < end))
2688                         RETURN(KERN_NO_SPACE);
2689         }
2690
2691         /*
2692          *      At this point,
2693          *              "start" and "end" should define the endpoints of the
2694          *                      available new range, and
2695          *              "entry" should refer to the region before the new
2696          *                      range, and
2697          *
2698          *              the map should be locked.
2699          */
2700
2701         /*
2702          *      See whether we can avoid creating a new entry (and object) by
2703          *      extending one of our neighbors.  [So far, we only attempt to
2704          *      extend from below.]  Note that we can never extend/join
2705          *      purgable objects because they need to remain distinct
2706          *      entities in order to implement their "volatile object"
2707          *      semantics.
2708          */
2709
2710         if (purgable ||
2711             entry_for_jit ||
2712             vm_memory_malloc_no_cow(user_alias)) {
2713                 if (object == VM_OBJECT_NULL) {
2714
2715                         object = vm_object_allocate(size);
2716                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2717                         object->true_share = FALSE;
2718                         if (purgable) {
2719                                 task_t owner;
2720                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2721                                 if (map->pmap == kernel_pmap) {
2722                                         /*
2723                                          * Purgeable mappings made in a kernel
2724                                          * map are "owned" by the kernel itself
2725                                          * rather than the current user task
2726                                          * because they're likely to be used by
2727                                          * more than this user task (see
2728                                          * execargs_purgeable_allocate(), for
2729                                          * example).
2730                                          */
2731                                         owner = kernel_task;
2732                                 } else {
2733                                         owner = current_task();
2734                                 }
2735                                 assert(object->vo_owner == NULL);
2736                                 assert(object->resident_page_count == 0);
2737                                 assert(object->wired_page_count == 0);
2738                                 vm_object_lock(object);
2739                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2740                                 vm_object_unlock(object);
2741                         }
2742                         offset = (vm_object_offset_t)0;
2743                 }
2744         } else if ((is_submap == FALSE) &&
2745                    (object == VM_OBJECT_NULL) &&
2746                    (entry != vm_map_to_entry(map)) &&
2747                    (entry->vme_end == start) &&
2748                    (!entry->is_shared) &&
2749                    (!entry->is_sub_map) &&
2750                    (!entry->in_transition) &&
2751                    (!entry->needs_wakeup) &&
2752                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2753                    (entry->protection == cur_protection) &&
2754                    (entry->max_protection == max_protection) &&
2755                    (entry->inheritance == inheritance) &&
2756                    ((user_alias == VM_MEMORY_REALLOC) ||
2757                     (VME_ALIAS(entry) == alias)) &&
2758                    (entry->no_cache == no_cache) &&
2759                    (entry->permanent == permanent) &&
2760                    /* no coalescing for immutable executable mappings */
2761                    !((entry->protection & VM_PROT_EXECUTE) &&
2762                      entry->permanent) &&
2763                    (!entry->superpage_size && !superpage_size) &&
2764                    /*
2765                     * No coalescing if not map-aligned, to avoid propagating
2766                     * that condition any further than needed:
2767                     */
2768                    (!entry->map_aligned || !clear_map_aligned) &&
2769                    (!entry->zero_wired_pages) &&
2770                    (!entry->used_for_jit && !entry_for_jit) &&
2771                    (!entry->pmap_cs_associated) &&
2772                    (entry->iokit_acct == iokit_acct) &&
2773                    (!entry->vme_resilient_codesign) &&
2774                    (!entry->vme_resilient_media) &&
2775                    (!entry->vme_atomic) &&
2776
2777                    ((entry->vme_end - entry->vme_start) + size <=
2778                     (user_alias == VM_MEMORY_REALLOC ?
2779                      ANON_CHUNK_SIZE :
2780                      NO_COALESCE_LIMIT)) &&
2781
2782                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2783                 if (vm_object_coalesce(VME_OBJECT(entry),
2784                                        VM_OBJECT_NULL,
2785                                        VME_OFFSET(entry),
2786                                        (vm_object_offset_t) 0,
2787                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2788                                        (vm_map_size_t)(end - entry->vme_end))) {
2789
2790                         /*
2791                          *      Coalesced the two objects - can extend
2792                          *      the previous map entry to include the
2793                          *      new range.
2794                          */
2795                         map->size += (end - entry->vme_end);
2796                         assert(entry->vme_start < end);
2797                         assert(VM_MAP_PAGE_ALIGNED(end,
2798                                                    VM_MAP_PAGE_MASK(map)));
2799                         if (__improbable(vm_debug_events))
2800                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2801                         entry->vme_end = end;
2802                         if (map->holelistenabled) {
2803                                 vm_map_store_update_first_free(map, entry, TRUE);
2804                         } else {
2805                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2806                         }
2807                         new_mapping_established = TRUE;
2808                         RETURN(KERN_SUCCESS);
2809                 }
2810         }
2811
2812         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2813         new_entry = NULL;
2814
2815         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2816                 tmp2_end = tmp2_start + step;
2817                 /*
2818                  *      Create a new entry
2819                  *
2820                  * XXX FBDP
2821                  * The reserved "page zero" in each process's address space can
2822                  * be arbitrarily large.  Splitting it into separate objects and
2823                  * therefore different VM map entries serves no purpose and just
2824                  * slows down operations on the VM map, so let's not split the
2825                  * allocation into chunks if the max protection is NONE.  That
2826                  * memory should never be accessible, so it will never get to the
2827                  * default pager.
2828                  */
2829                 tmp_start = tmp2_start;
2830                 if (object == VM_OBJECT_NULL &&
2831                     size > chunk_size &&
2832                     max_protection != VM_PROT_NONE &&
2833                     superpage_size == 0)
2834                         tmp_end = tmp_start + chunk_size;
2835                 else
2836                         tmp_end = tmp2_end;
2837                 do {
2838                         new_entry = vm_map_entry_insert(
2839                                 map, entry, tmp_start, tmp_end,
2840                                 object, offset, needs_copy,
2841                                 FALSE, FALSE,
2842                                 cur_protection, max_protection,
2843                                 VM_BEHAVIOR_DEFAULT,
2844                                 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2845                                 0,
2846                                 no_cache,
2847                                 permanent,
2848                                 superpage_size,
2849                                 clear_map_aligned,
2850                                 is_submap,
2851                                 entry_for_jit,
2852                                 alias);
2853
2854                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2855
2856                         if (resilient_codesign &&
2857                             ! ((cur_protection | max_protection) &
2858                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2859                                 new_entry->vme_resilient_codesign = TRUE;
2860                         }
2861
2862                         if (resilient_media &&
2863                             ! ((cur_protection | max_protection) &
2864                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2865                                 new_entry->vme_resilient_media = TRUE;
2866                         }
2867
2868                         assert(!new_entry->iokit_acct);
2869                         if (!is_submap &&
2870                             object != VM_OBJECT_NULL &&
2871                             (object->purgable != VM_PURGABLE_DENY ||
2872                              object->vo_ledger_tag)) {
2873                                 assert(new_entry->use_pmap);
2874                                 assert(!new_entry->iokit_acct);
2875                                 /*
2876                                  * Turn off pmap accounting since
2877                                  * purgeable (or tagged) objects have their
2878                                  * own ledgers.
2879                                  */
2880                                 new_entry->use_pmap = FALSE;
2881                         } else if (!is_submap &&
2882                                    iokit_acct &&
2883                                    object != VM_OBJECT_NULL &&
2884                                    object->internal) {
2885                                 /* alternate accounting */
2886                                 assert(!new_entry->iokit_acct);
2887                                 assert(new_entry->use_pmap);
2888                                 new_entry->iokit_acct = TRUE;
2889                                 new_entry->use_pmap = FALSE;
2890                                 DTRACE_VM4(
2891                                         vm_map_iokit_mapped_region,
2892                                         vm_map_t, map,
2893                                         vm_map_offset_t, new_entry->vme_start,
2894                                         vm_map_offset_t, new_entry->vme_end,
2895                                         int, VME_ALIAS(new_entry));
2896                                 vm_map_iokit_mapped_region(
2897                                         map,
2898                                         (new_entry->vme_end -
2899                                          new_entry->vme_start));
2900                         } else if (!is_submap) {
2901                                 assert(!new_entry->iokit_acct);
2902                                 assert(new_entry->use_pmap);
2903                         }
2904
2905                         if (is_submap) {
2906                                 vm_map_t        submap;
2907                                 boolean_t       submap_is_64bit;
2908                                 boolean_t       use_pmap;
2909
2910                                 assert(new_entry->is_sub_map);
2911                                 assert(!new_entry->use_pmap);
2912                                 assert(!new_entry->iokit_acct);
2913                                 submap = (vm_map_t) object;
2914                                 submap_is_64bit = vm_map_is_64bit(submap);
2915                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2916 #ifndef NO_NESTED_PMAP
2917                                 if (use_pmap && submap->pmap == NULL) {
2918                                         ledger_t ledger = map->pmap->ledger;
2919                                         /* we need a sub pmap to nest... */
2920                                         submap->pmap = pmap_create(ledger, 0,
2921                                             submap_is_64bit);
2922                                         if (submap->pmap == NULL) {
2923                                                 /* let's proceed without nesting... */
2924                                         }
2925 #if     defined(__arm__) || defined(__arm64__)
2926                                         else {
2927                                                 pmap_set_nested(submap->pmap);
2928                                         }
2929 #endif
2930                                 }
2931                                 if (use_pmap && submap->pmap != NULL) {
2932                                         kr = pmap_nest(map->pmap,
2933                                                        submap->pmap,
2934                                                        tmp_start,
2935                                                        tmp_start,
2936                                                        tmp_end - tmp_start);
2937                                         if (kr != KERN_SUCCESS) {
2938                                                 printf("vm_map_enter: "
2939                                                        "pmap_nest(0x%llx,0x%llx) "
2940                                                        "error 0x%x\n",
2941                                                        (long long)tmp_start,
2942                                                        (long long)tmp_end,
2943                                                        kr);
2944                                         } else {
2945                                                 /* we're now nested ! */
2946                                                 new_entry->use_pmap = TRUE;
2947                                                 pmap_empty = FALSE;
2948                                         }
2949                                 }
2950 #endif /* NO_NESTED_PMAP */
2951                         }
2952                         entry = new_entry;
2953
2954                         if (superpage_size) {
2955                                 vm_page_t pages, m;
2956                                 vm_object_t sp_object;
2957                                 vm_object_offset_t sp_offset;
2958
2959                                 VME_OFFSET_SET(entry, 0);
2960
2961                                 /* allocate one superpage */
2962                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2963                                 if (kr != KERN_SUCCESS) {
2964                                         /* deallocate whole range... */
2965                                         new_mapping_established = TRUE;
2966                                         /* ... but only up to "tmp_end" */
2967                                         size -= end - tmp_end;
2968                                         RETURN(kr);
2969                                 }
2970
2971                                 /* create one vm_object per superpage */
2972                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2973                                 sp_object->phys_contiguous = TRUE;
2974                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2975                                 VME_OBJECT_SET(entry, sp_object);
2976                                 assert(entry->use_pmap);
2977
2978                                 /* enter the base pages into the object */
2979                                 vm_object_lock(sp_object);
2980                                 for (sp_offset = 0;
2981                                      sp_offset < SUPERPAGE_SIZE;
2982                                      sp_offset += PAGE_SIZE) {
2983                                         m = pages;
2984                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2985                                         pages = NEXT_PAGE(m);
2986                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2987                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2988                                 }
2989                                 vm_object_unlock(sp_object);
2990                         }
2991                 } while (tmp_end != tmp2_end &&
2992                          (tmp_start = tmp_end) &&
2993                          (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
2994                           tmp_end + chunk_size : tmp2_end));
2995         }
2996
2997         new_mapping_established = TRUE;
2998
2999 BailOut:
3000         assert(map_locked == TRUE);
3001
3002         if (result == KERN_SUCCESS) {
3003                 vm_prot_t pager_prot;
3004                 memory_object_t pager;
3005
3006 #if DEBUG
3007                 if (pmap_empty &&
3008                     !(vmk_flags.vmkf_no_pmap_check)) {
3009                         assert(vm_map_pmap_is_empty(map,
3010                                                     *address,
3011                                                     *address+size));
3012                 }
3013 #endif /* DEBUG */
3014
3015                 /*
3016                  * For "named" VM objects, let the pager know that the
3017                  * memory object is being mapped.  Some pagers need to keep
3018                  * track of this, to know when they can reclaim the memory
3019                  * object, for example.
3020                  * VM calls memory_object_map() for each mapping (specifying
3021                  * the protection of each mapping) and calls
3022                  * memory_object_last_unmap() when all the mappings are gone.
3023                  */
3024                 pager_prot = max_protection;
3025                 if (needs_copy) {
3026                         /*
3027                          * Copy-On-Write mapping: won't modify
3028                          * the memory object.
3029                          */
3030                         pager_prot &= ~VM_PROT_WRITE;
3031                 }
3032                 if (!is_submap &&
3033                     object != VM_OBJECT_NULL &&
3034                     object->named &&
3035                     object->pager != MEMORY_OBJECT_NULL) {
3036                         vm_object_lock(object);
3037                         pager = object->pager;
3038                         if (object->named &&
3039                             pager != MEMORY_OBJECT_NULL) {
3040                                 assert(object->pager_ready);
3041                                 vm_object_mapping_wait(object, THREAD_UNINT);
3042                                 vm_object_mapping_begin(object);
3043                                 vm_object_unlock(object);
3044
3045                                 kr = memory_object_map(pager, pager_prot);
3046                                 assert(kr == KERN_SUCCESS);
3047
3048                                 vm_object_lock(object);
3049                                 vm_object_mapping_end(object);
3050                         }
3051                         vm_object_unlock(object);
3052                 }
3053         }
3054
3055         assert(map_locked == TRUE);
3056
3057         if (!keep_map_locked) {
3058                 vm_map_unlock(map);
3059                 map_locked = FALSE;
3060         }
3061
3062         /*
3063          * We can't hold the map lock if we enter this block.
3064          */
3065
3066         if (result == KERN_SUCCESS) {
3067
3068                 /*      Wire down the new entry if the user
3069                  *      requested all new map entries be wired.
3070                  */
3071                 if ((map->wiring_required)||(superpage_size)) {
3072                         assert(!keep_map_locked);
3073                         pmap_empty = FALSE; /* pmap won't be empty */
3074                         kr = vm_map_wire_kernel(map, start, end,
3075                                              new_entry->protection, VM_KERN_MEMORY_MLOCK,
3076                                              TRUE);
3077                         result = kr;
3078                 }
3079
3080         }
3081
3082         if (result != KERN_SUCCESS) {
3083                 if (new_mapping_established) {
3084                         /*
3085                          * We have to get rid of the new mappings since we
3086                          * won't make them available to the user.
3087                          * Try and do that atomically, to minimize the risk
3088                          * that someone else create new mappings that range.
3089                          */
3090                         zap_new_map = vm_map_create(PMAP_NULL,
3091                                                     *address,
3092                                                     *address + size,
3093                                                     map->hdr.entries_pageable);
3094                         vm_map_set_page_shift(zap_new_map,
3095                                               VM_MAP_PAGE_SHIFT(map));
3096                         vm_map_disable_hole_optimization(zap_new_map);
3097
3098                         if (!map_locked) {
3099                                 vm_map_lock(map);
3100                                 map_locked = TRUE;
3101                         }
3102                         (void) vm_map_delete(map, *address, *address+size,
3103                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3104                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3105                                              zap_new_map);
3106                 }
3107                 if (zap_old_map != VM_MAP_NULL &&
3108                     zap_old_map->hdr.nentries != 0) {
3109                         vm_map_entry_t  entry1, entry2;
3110
3111                         /*
3112                          * The new mapping failed.  Attempt to restore
3113                          * the old mappings, saved in the "zap_old_map".
3114                          */
3115                         if (!map_locked) {
3116                                 vm_map_lock(map);
3117                                 map_locked = TRUE;
3118                         }
3119
3120                         /* first check if the coast is still clear */
3121                         start = vm_map_first_entry(zap_old_map)->vme_start;
3122                         end = vm_map_last_entry(zap_old_map)->vme_end;
3123                         if (vm_map_lookup_entry(map, start, &entry1) ||
3124                             vm_map_lookup_entry(map, end, &entry2) ||
3125                             entry1 != entry2) {
3126                                 /*
3127                                  * Part of that range has already been
3128                                  * re-mapped:  we can't restore the old
3129                                  * mappings...
3130                                  */
3131                                 vm_map_enter_restore_failures++;
3132                         } else {
3133                                 /*
3134                                  * Transfer the saved map entries from
3135                                  * "zap_old_map" to the original "map",
3136                                  * inserting them all after "entry1".
3137                                  */
3138                                 for (entry2 = vm_map_first_entry(zap_old_map);
3139                                      entry2 != vm_map_to_entry(zap_old_map);
3140                                      entry2 = vm_map_first_entry(zap_old_map)) {
3141                                         vm_map_size_t entry_size;
3142
3143                                         entry_size = (entry2->vme_end -
3144                                                       entry2->vme_start);
3145                                         vm_map_store_entry_unlink(zap_old_map,
3146                                                             entry2);
3147                                         zap_old_map->size -= entry_size;
3148                                         vm_map_store_entry_link(map, entry1, entry2,
3149                                                                 VM_MAP_KERNEL_FLAGS_NONE);
3150                                         map->size += entry_size;
3151                                         entry1 = entry2;
3152                                 }
3153                                 if (map->wiring_required) {
3154                                         /*
3155                                          * XXX TODO: we should rewire the
3156                                          * old pages here...
3157                                          */
3158                                 }
3159                                 vm_map_enter_restore_successes++;
3160                         }
3161                 }
3162         }
3163
3164         /*
3165          * The caller is responsible for releasing the lock if it requested to
3166          * keep the map locked.
3167          */
3168         if (map_locked && !keep_map_locked) {
3169                 vm_map_unlock(map);
3170         }
3171
3172         /*
3173          * Get rid of the "zap_maps" and all the map entries that
3174          * they may still contain.
3175          */
3176         if (zap_old_map != VM_MAP_NULL) {
3177                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3178                 zap_old_map = VM_MAP_NULL;
3179         }
3180         if (zap_new_map != VM_MAP_NULL) {
3181                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3182                 zap_new_map = VM_MAP_NULL;
3183         }
3184
3185         return result;
3186
3187 #undef  RETURN
3188 }
3189
3190 #if __arm64__
3191 extern const struct memory_object_pager_ops fourk_pager_ops;
3192 kern_return_t
3193 vm_map_enter_fourk(
3194         vm_map_t                map,
3195         vm_map_offset_t         *address,       /* IN/OUT */
3196         vm_map_size_t           size,
3197         vm_map_offset_t         mask,
3198         int                     flags,
3199         vm_map_kernel_flags_t   vmk_flags,
3200         vm_tag_t                alias,
3201         vm_object_t             object,
3202         vm_object_offset_t      offset,
3203         boolean_t               needs_copy,
3204         vm_prot_t               cur_protection,
3205         vm_prot_t               max_protection,
3206         vm_inherit_t            inheritance)
3207 {
3208         vm_map_entry_t          entry, new_entry;
3209         vm_map_offset_t         start, fourk_start;
3210         vm_map_offset_t         end, fourk_end;
3211         vm_map_size_t           fourk_size;
3212         kern_return_t           result = KERN_SUCCESS;
3213         vm_map_t                zap_old_map = VM_MAP_NULL;
3214         vm_map_t                zap_new_map = VM_MAP_NULL;
3215         boolean_t               map_locked = FALSE;
3216         boolean_t               pmap_empty = TRUE;
3217         boolean_t               new_mapping_established = FALSE;
3218         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3219         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3220         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3221         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3222         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3223         boolean_t               is_submap = vmk_flags.vmkf_submap;
3224         boolean_t               permanent = vmk_flags.vmkf_permanent;
3225         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3226 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3227         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3228         vm_map_offset_t         effective_min_offset, effective_max_offset;
3229         kern_return_t           kr;
3230         boolean_t               clear_map_aligned = FALSE;
3231         memory_object_t         fourk_mem_obj;
3232         vm_object_t             fourk_object;
3233         vm_map_offset_t         fourk_pager_offset;
3234         int                     fourk_pager_index_start, fourk_pager_index_num;
3235         int                     cur_idx;
3236         boolean_t               fourk_copy;
3237         vm_object_t             copy_object;
3238         vm_object_offset_t      copy_offset;
3239
3240         fourk_mem_obj = MEMORY_OBJECT_NULL;
3241         fourk_object = VM_OBJECT_NULL;
3242
3243         if (superpage_size) {
3244                 return KERN_NOT_SUPPORTED;
3245         }
3246
3247         if ((cur_protection & VM_PROT_WRITE) &&
3248             (cur_protection & VM_PROT_EXECUTE) &&
3249 #if !CONFIG_EMBEDDED
3250             map != kernel_map &&
3251             cs_process_enforcement(NULL) &&
3252 #endif /* !CONFIG_EMBEDDED */
3253             !entry_for_jit) {
3254                 DTRACE_VM3(cs_wx,
3255                            uint64_t, 0,
3256                            uint64_t, 0,
3257                            vm_prot_t, cur_protection);
3258                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3259                        "turning off execute\n",
3260                        proc_selfpid(),
3261                        (current_task()->bsd_info
3262                         ? proc_name_address(current_task()->bsd_info)
3263                         : "?"),
3264                        __FUNCTION__);
3265                 cur_protection &= ~VM_PROT_EXECUTE;
3266         }
3267
3268         /*
3269          * If the task has requested executable lockdown,
3270          * deny any new executable mapping.
3271          */
3272         if (map->map_disallow_new_exec == TRUE) {
3273                 if (cur_protection & VM_PROT_EXECUTE) {
3274                         return KERN_PROTECTION_FAILURE;
3275                 }
3276         }
3277
3278         if (is_submap) {
3279                 return KERN_NOT_SUPPORTED;
3280         }
3281         if (vmk_flags.vmkf_already) {
3282                 return KERN_NOT_SUPPORTED;
3283         }
3284         if (purgable || entry_for_jit) {
3285                 return KERN_NOT_SUPPORTED;
3286         }
3287
3288         effective_min_offset = map->min_offset;
3289
3290         if (vmk_flags.vmkf_beyond_max) {
3291                 return KERN_NOT_SUPPORTED;
3292         } else {
3293                 effective_max_offset = map->max_offset;
3294         }
3295
3296         if (size == 0 ||
3297             (offset & FOURK_PAGE_MASK) != 0) {
3298                 *address = 0;
3299                 return KERN_INVALID_ARGUMENT;
3300         }
3301
3302 #define RETURN(value)   { result = value; goto BailOut; }
3303
3304         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3305         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3306
3307         if (!anywhere && overwrite) {
3308                 return KERN_NOT_SUPPORTED;
3309         }
3310         if (!anywhere && overwrite) {
3311                 /*
3312                  * Create a temporary VM map to hold the old mappings in the
3313                  * affected area while we create the new one.
3314                  * This avoids releasing the VM map lock in
3315                  * vm_map_entry_delete() and allows atomicity
3316                  * when we want to replace some mappings with a new one.
3317                  * It also allows us to restore the old VM mappings if the
3318                  * new mapping fails.
3319                  */
3320                 zap_old_map = vm_map_create(PMAP_NULL,
3321                                             *address,
3322                                             *address + size,
3323                                             map->hdr.entries_pageable);
3324                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3325                 vm_map_disable_hole_optimization(zap_old_map);
3326         }
3327
3328         fourk_start = *address;
3329         fourk_size = size;
3330         fourk_end = fourk_start + fourk_size;
3331
3332         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3333         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3334         size = end - start;
3335
3336         if (anywhere) {
3337                 return KERN_NOT_SUPPORTED;
3338         } else {
3339                 /*
3340                  *      Verify that:
3341                  *              the address doesn't itself violate
3342                  *              the mask requirement.
3343                  */
3344
3345                 vm_map_lock(map);
3346                 map_locked = TRUE;
3347                 if ((start & mask) != 0) {
3348                         RETURN(KERN_NO_SPACE);
3349                 }
3350
3351                 /*
3352                  *      ...     the address is within bounds
3353                  */
3354
3355                 end = start + size;
3356
3357                 if ((start < effective_min_offset) ||
3358                     (end > effective_max_offset) ||
3359                     (start >= end)) {
3360                         RETURN(KERN_INVALID_ADDRESS);
3361                 }
3362
3363                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3364                         /*
3365                          * Fixed mapping and "overwrite" flag: attempt to
3366                          * remove all existing mappings in the specified
3367                          * address range, saving them in our "zap_old_map".
3368                          */
3369                         (void) vm_map_delete(map, start, end,
3370                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3371                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3372                                              zap_old_map);
3373                 }
3374
3375                 /*
3376                  *      ...     the starting address isn't allocated
3377                  */
3378                 if (vm_map_lookup_entry(map, start, &entry)) {
3379                         vm_object_t cur_object, shadow_object;
3380
3381                         /*
3382                          * We might already some 4K mappings
3383                          * in a 16K page here.
3384                          */
3385
3386                         if (entry->vme_end - entry->vme_start
3387                             != SIXTEENK_PAGE_SIZE) {
3388                                 RETURN(KERN_NO_SPACE);
3389                         }
3390                         if (entry->is_sub_map) {
3391                                 RETURN(KERN_NO_SPACE);
3392                         }
3393                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3394                                 RETURN(KERN_NO_SPACE);
3395                         }
3396
3397                         /* go all the way down the shadow chain */
3398                         cur_object = VME_OBJECT(entry);
3399                         vm_object_lock(cur_object);
3400                         while (cur_object->shadow != VM_OBJECT_NULL) {
3401                                 shadow_object = cur_object->shadow;
3402                                 vm_object_lock(shadow_object);
3403                                 vm_object_unlock(cur_object);
3404                                 cur_object = shadow_object;
3405                                 shadow_object = VM_OBJECT_NULL;
3406                         }
3407                         if (cur_object->internal ||
3408                             cur_object->pager == NULL) {
3409                                 vm_object_unlock(cur_object);
3410                                 RETURN(KERN_NO_SPACE);
3411                         }
3412                         if (cur_object->pager->mo_pager_ops
3413                             != &fourk_pager_ops) {
3414                                 vm_object_unlock(cur_object);
3415                                 RETURN(KERN_NO_SPACE);
3416                         }
3417                         fourk_object = cur_object;
3418                         fourk_mem_obj = fourk_object->pager;
3419
3420                         /* keep the "4K" object alive */
3421                         vm_object_reference_locked(fourk_object);
3422                         vm_object_unlock(fourk_object);
3423
3424                         /* merge permissions */
3425                         entry->protection |= cur_protection;
3426                         entry->max_protection |= max_protection;
3427                         if ((entry->protection & (VM_PROT_WRITE |
3428                                                   VM_PROT_EXECUTE)) ==
3429                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3430                             fourk_binary_compatibility_unsafe &&
3431                             fourk_binary_compatibility_allow_wx) {
3432                                 /* write+execute: need to be "jit" */
3433                                 entry->used_for_jit = TRUE;
3434                         }
3435
3436                         goto map_in_fourk_pager;
3437                 }
3438
3439                 /*
3440                  *      ...     the next region doesn't overlap the
3441                  *              end point.
3442                  */
3443
3444                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3445                     (entry->vme_next->vme_start < end)) {
3446                         RETURN(KERN_NO_SPACE);
3447                 }
3448         }
3449
3450         /*
3451          *      At this point,
3452          *              "start" and "end" should define the endpoints of the
3453          *                      available new range, and
3454          *              "entry" should refer to the region before the new
3455          *                      range, and
3456          *
3457          *              the map should be locked.
3458          */
3459
3460         /* create a new "4K" pager */
3461         fourk_mem_obj = fourk_pager_create();
3462         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3463         assert(fourk_object);
3464
3465         /* keep the "4" object alive */
3466         vm_object_reference(fourk_object);
3467
3468         /* create a "copy" object, to map the "4K" object copy-on-write */
3469         fourk_copy = TRUE;
3470         result = vm_object_copy_strategically(fourk_object,
3471                                               0,
3472                                               end - start,
3473                                               &copy_object,
3474                                               &copy_offset,
3475                                               &fourk_copy);
3476         assert(result == KERN_SUCCESS);
3477         assert(copy_object != VM_OBJECT_NULL);
3478         assert(copy_offset == 0);
3479
3480         /* take a reference on the copy object, for this mapping */
3481         vm_object_reference(copy_object);
3482
3483         /* map the "4K" pager's copy object */
3484         new_entry =
3485                 vm_map_entry_insert(map, entry,
3486                                     vm_map_trunc_page(start,
3487                                                       VM_MAP_PAGE_MASK(map)),
3488                                     vm_map_round_page(end,
3489                                                       VM_MAP_PAGE_MASK(map)),
3490                                     copy_object,
3491                                     0, /* offset */
3492                                     FALSE, /* needs_copy */
3493                                     FALSE, FALSE,
3494                                     cur_protection, max_protection,
3495                                     VM_BEHAVIOR_DEFAULT,
3496                                     ((entry_for_jit)
3497                                      ? VM_INHERIT_NONE
3498                                      : inheritance),
3499                                     0,
3500                                     no_cache,
3501                                     permanent,
3502                                     superpage_size,
3503                                     clear_map_aligned,
3504                                     is_submap,
3505                                     FALSE, /* jit */
3506                                     alias);
3507         entry = new_entry;
3508
3509 #if VM_MAP_DEBUG_FOURK
3510         if (vm_map_debug_fourk) {
3511                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3512                        map,
3513                        (uint64_t) entry->vme_start,
3514                        (uint64_t) entry->vme_end,
3515                        fourk_mem_obj);
3516         }
3517 #endif /* VM_MAP_DEBUG_FOURK */
3518
3519         new_mapping_established = TRUE;
3520
3521 map_in_fourk_pager:
3522         /* "map" the original "object" where it belongs in the "4K" pager */
3523         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3524         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3525         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3526                 fourk_pager_index_num = 4;
3527         } else {
3528                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3529         }
3530         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3531                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3532         }
3533         for (cur_idx = 0;
3534              cur_idx < fourk_pager_index_num;
3535              cur_idx++) {
3536                 vm_object_t             old_object;
3537                 vm_object_offset_t      old_offset;
3538
3539                 kr = fourk_pager_populate(fourk_mem_obj,
3540                                           TRUE, /* overwrite */
3541                                           fourk_pager_index_start + cur_idx,
3542                                           object,
3543                                           (object
3544                                            ? (offset +
3545                                               (cur_idx * FOURK_PAGE_SIZE))
3546                                            : 0),
3547                                           &old_object,
3548                                           &old_offset);
3549 #if VM_MAP_DEBUG_FOURK
3550                 if (vm_map_debug_fourk) {
3551                         if (old_object == (vm_object_t) -1 &&
3552                             old_offset == (vm_object_offset_t) -1) {
3553                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3554                                        "pager [%p:0x%llx] "
3555                                        "populate[%d] "
3556                                        "[object:%p,offset:0x%llx]\n",
3557                                        map,
3558                                        (uint64_t) entry->vme_start,
3559                                        (uint64_t) entry->vme_end,
3560                                        fourk_mem_obj,
3561                                        VME_OFFSET(entry),
3562                                        fourk_pager_index_start + cur_idx,
3563                                        object,
3564                                        (object
3565                                         ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3566                                         : 0));
3567                         } else {
3568                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3569                                        "pager [%p:0x%llx] "
3570                                        "populate[%d] [object:%p,offset:0x%llx] "
3571                                        "old [%p:0x%llx]\n",
3572                                        map,
3573                                        (uint64_t) entry->vme_start,
3574                                        (uint64_t) entry->vme_end,
3575                                        fourk_mem_obj,
3576                                        VME_OFFSET(entry),
3577                                        fourk_pager_index_start + cur_idx,
3578                                        object,
3579                                        (object
3580                                         ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3581                                         : 0),
3582                                        old_object,
3583                                        old_offset);
3584                         }
3585                 }
3586 #endif /* VM_MAP_DEBUG_FOURK */
3587
3588                 assert(kr == KERN_SUCCESS);
3589                 if (object != old_object &&
3590                     object != VM_OBJECT_NULL &&
3591                     object != (vm_object_t) -1) {
3592                         vm_object_reference(object);
3593                 }
3594                 if (object != old_object &&
3595                     old_object != VM_OBJECT_NULL &&
3596                     old_object != (vm_object_t) -1) {
3597                         vm_object_deallocate(old_object);
3598                 }
3599         }
3600
3601 BailOut:
3602         assert(map_locked == TRUE);
3603
3604         if (fourk_object != VM_OBJECT_NULL) {
3605                 vm_object_deallocate(fourk_object);
3606                 fourk_object = VM_OBJECT_NULL;
3607                 fourk_mem_obj = MEMORY_OBJECT_NULL;
3608         }
3609
3610         if (result == KERN_SUCCESS) {
3611                 vm_prot_t pager_prot;
3612                 memory_object_t pager;
3613
3614 #if DEBUG
3615                 if (pmap_empty &&
3616                     !(vmk_flags.vmkf_no_pmap_check)) {
3617                         assert(vm_map_pmap_is_empty(map,
3618                                                     *address,
3619                                                     *address+size));
3620                 }
3621 #endif /* DEBUG */
3622
3623                 /*
3624                  * For "named" VM objects, let the pager know that the
3625                  * memory object is being mapped.  Some pagers need to keep
3626                  * track of this, to know when they can reclaim the memory
3627                  * object, for example.
3628                  * VM calls memory_object_map() for each mapping (specifying
3629                  * the protection of each mapping) and calls
3630                  * memory_object_last_unmap() when all the mappings are gone.
3631                  */
3632                 pager_prot = max_protection;
3633                 if (needs_copy) {
3634                         /*
3635                          * Copy-On-Write mapping: won't modify
3636                          * the memory object.
3637                          */
3638                         pager_prot &= ~VM_PROT_WRITE;
3639                 }
3640                 if (!is_submap &&
3641                     object != VM_OBJECT_NULL &&
3642                     object->named &&
3643                     object->pager != MEMORY_OBJECT_NULL) {
3644                         vm_object_lock(object);
3645                         pager = object->pager;
3646                         if (object->named &&
3647                             pager != MEMORY_OBJECT_NULL) {
3648                                 assert(object->pager_ready);
3649                                 vm_object_mapping_wait(object, THREAD_UNINT);
3650                                 vm_object_mapping_begin(object);
3651                                 vm_object_unlock(object);
3652
3653                                 kr = memory_object_map(pager, pager_prot);
3654                                 assert(kr == KERN_SUCCESS);
3655
3656                                 vm_object_lock(object);
3657                                 vm_object_mapping_end(object);
3658                         }
3659                         vm_object_unlock(object);
3660                 }
3661                 if (!is_submap &&
3662                     fourk_object != VM_OBJECT_NULL &&
3663                     fourk_object->named &&
3664                     fourk_object->pager != MEMORY_OBJECT_NULL) {
3665                         vm_object_lock(fourk_object);
3666                         pager = fourk_object->pager;
3667                         if (fourk_object->named &&
3668                             pager != MEMORY_OBJECT_NULL) {
3669                                 assert(fourk_object->pager_ready);
3670                                 vm_object_mapping_wait(fourk_object,
3671                                                        THREAD_UNINT);
3672                                 vm_object_mapping_begin(fourk_object);
3673                                 vm_object_unlock(fourk_object);
3674
3675                                 kr = memory_object_map(pager, VM_PROT_READ);
3676                                 assert(kr == KERN_SUCCESS);
3677
3678                                 vm_object_lock(fourk_object);
3679                                 vm_object_mapping_end(fourk_object);
3680                         }
3681                         vm_object_unlock(fourk_object);
3682                 }
3683         }
3684
3685         assert(map_locked == TRUE);
3686
3687         if (!keep_map_locked) {
3688                 vm_map_unlock(map);
3689                 map_locked = FALSE;
3690         }
3691
3692         /*
3693          * We can't hold the map lock if we enter this block.
3694          */
3695
3696         if (result == KERN_SUCCESS) {
3697
3698                 /*      Wire down the new entry if the user
3699                  *      requested all new map entries be wired.
3700                  */
3701                 if ((map->wiring_required)||(superpage_size)) {
3702                         assert(!keep_map_locked);
3703                         pmap_empty = FALSE; /* pmap won't be empty */
3704                         kr = vm_map_wire_kernel(map, start, end,
3705                                              new_entry->protection, VM_KERN_MEMORY_MLOCK,
3706                                              TRUE);
3707                         result = kr;
3708                 }
3709
3710         }
3711
3712         if (result != KERN_SUCCESS) {
3713                 if (new_mapping_established) {
3714                         /*
3715                          * We have to get rid of the new mappings since we
3716                          * won't make them available to the user.
3717                          * Try and do that atomically, to minimize the risk
3718                          * that someone else create new mappings that range.
3719                          */
3720                         zap_new_map = vm_map_create(PMAP_NULL,
3721                                                     *address,
3722                                                     *address + size,
3723                                                     map->hdr.entries_pageable);
3724                         vm_map_set_page_shift(zap_new_map,
3725                                               VM_MAP_PAGE_SHIFT(map));
3726                         vm_map_disable_hole_optimization(zap_new_map);
3727
3728                         if (!map_locked) {
3729                                 vm_map_lock(map);
3730                                 map_locked = TRUE;
3731                         }
3732                         (void) vm_map_delete(map, *address, *address+size,
3733                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3734                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3735                                              zap_new_map);
3736                 }
3737                 if (zap_old_map != VM_MAP_NULL &&
3738                     zap_old_map->hdr.nentries != 0) {
3739                         vm_map_entry_t  entry1, entry2;
3740
3741                         /*
3742                          * The new mapping failed.  Attempt to restore
3743                          * the old mappings, saved in the "zap_old_map".
3744                          */
3745                         if (!map_locked) {
3746                                 vm_map_lock(map);
3747                                 map_locked = TRUE;
3748                         }
3749
3750                         /* first check if the coast is still clear */
3751                         start = vm_map_first_entry(zap_old_map)->vme_start;
3752                         end = vm_map_last_entry(zap_old_map)->vme_end;
3753                         if (vm_map_lookup_entry(map, start, &entry1) ||
3754                             vm_map_lookup_entry(map, end, &entry2) ||
3755                             entry1 != entry2) {
3756                                 /*
3757                                  * Part of that range has already been
3758                                  * re-mapped:  we can't restore the old
3759                                  * mappings...
3760                                  */
3761                                 vm_map_enter_restore_failures++;
3762                         } else {
3763                                 /*
3764                                  * Transfer the saved map entries from
3765                                  * "zap_old_map" to the original "map",
3766                                  * inserting them all after "entry1".
3767                                  */
3768                                 for (entry2 = vm_map_first_entry(zap_old_map);
3769                                      entry2 != vm_map_to_entry(zap_old_map);
3770                                      entry2 = vm_map_first_entry(zap_old_map)) {
3771                                         vm_map_size_t entry_size;
3772
3773                                         entry_size = (entry2->vme_end -
3774                                                       entry2->vme_start);
3775                                         vm_map_store_entry_unlink(zap_old_map,
3776                                                             entry2);
3777                                         zap_old_map->size -= entry_size;
3778                                         vm_map_store_entry_link(map, entry1, entry2,
3779                                                                 VM_MAP_KERNEL_FLAGS_NONE);
3780                                         map->size += entry_size;
3781                                         entry1 = entry2;
3782                                 }
3783                                 if (map->wiring_required) {
3784                                         /*
3785                                          * XXX TODO: we should rewire the
3786                                          * old pages here...
3787                                          */
3788                                 }
3789                                 vm_map_enter_restore_successes++;
3790                         }
3791                 }
3792         }
3793
3794         /*
3795          * The caller is responsible for releasing the lock if it requested to
3796          * keep the map locked.
3797          */
3798         if (map_locked && !keep_map_locked) {
3799                 vm_map_unlock(map);
3800         }
3801
3802         /*
3803          * Get rid of the "zap_maps" and all the map entries that
3804          * they may still contain.
3805          */
3806         if (zap_old_map != VM_MAP_NULL) {
3807                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3808                 zap_old_map = VM_MAP_NULL;
3809         }
3810         if (zap_new_map != VM_MAP_NULL) {
3811                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3812                 zap_new_map = VM_MAP_NULL;
3813         }
3814
3815         return result;
3816
3817 #undef  RETURN
3818 }
3819 #endif /* __arm64__ */
3820
3821 /*
3822  * Counters for the prefault optimization.
3823  */
3824 int64_t vm_prefault_nb_pages = 0;
3825 int64_t vm_prefault_nb_bailout = 0;
3826
3827 static kern_return_t
3828 vm_map_enter_mem_object_helper(
3829         vm_map_t                target_map,
3830         vm_map_offset_t         *address,
3831         vm_map_size_t           initial_size,
3832         vm_map_offset_t         mask,
3833         int                     flags,
3834         vm_map_kernel_flags_t   vmk_flags,
3835         vm_tag_t                tag,
3836         ipc_port_t              port,
3837         vm_object_offset_t      offset,
3838         boolean_t               copy,
3839         vm_prot_t               cur_protection,
3840         vm_prot_t               max_protection,
3841         vm_inherit_t            inheritance,
3842         upl_page_list_ptr_t     page_list,
3843         unsigned int            page_list_count)
3844 {
3845         vm_map_address_t        map_addr;
3846         vm_map_size_t           map_size;
3847         vm_object_t             object;
3848         vm_object_size_t        size;
3849         kern_return_t           result;
3850         boolean_t               mask_cur_protection, mask_max_protection;
3851         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
3852         vm_map_offset_t         offset_in_mapping = 0;
3853 #if __arm64__
3854         boolean_t               fourk = vmk_flags.vmkf_fourk;
3855 #endif /* __arm64__ */
3856
3857         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3858
3859         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3860         mask_max_protection = max_protection & VM_PROT_IS_MASK;
3861         cur_protection &= ~VM_PROT_IS_MASK;
3862         max_protection &= ~VM_PROT_IS_MASK;
3863
3864         /*
3865          * Check arguments for validity
3866          */
3867         if ((target_map == VM_MAP_NULL) ||
3868             (cur_protection & ~VM_PROT_ALL) ||
3869             (max_protection & ~VM_PROT_ALL) ||
3870             (inheritance > VM_INHERIT_LAST_VALID) ||
3871             (try_prefault && (copy || !page_list)) ||
3872             initial_size == 0) {
3873                 return KERN_INVALID_ARGUMENT;
3874         }
3875
3876 #if __arm64__
3877         if (fourk) {
3878                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3879                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3880         } else
3881 #endif /* __arm64__ */
3882         {
3883                 map_addr = vm_map_trunc_page(*address,
3884                                              VM_MAP_PAGE_MASK(target_map));
3885                 map_size = vm_map_round_page(initial_size,
3886                                              VM_MAP_PAGE_MASK(target_map));
3887         }
3888         size = vm_object_round_page(initial_size);
3889
3890         /*
3891          * Find the vm object (if any) corresponding to this port.
3892          */
3893         if (!IP_VALID(port)) {
3894                 object = VM_OBJECT_NULL;
3895                 offset = 0;
3896                 copy = FALSE;
3897         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
3898                 vm_named_entry_t        named_entry;
3899
3900                 named_entry = (vm_named_entry_t) port->ip_kobject;
3901
3902                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3903                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3904                         offset += named_entry->data_offset;
3905                 }
3906
3907                 /* a few checks to make sure user is obeying rules */
3908                 if (size == 0) {
3909                         if (offset >= named_entry->size)
3910                                 return KERN_INVALID_RIGHT;
3911                         size = named_entry->size - offset;
3912                 }
3913                 if (mask_max_protection) {
3914                         max_protection &= named_entry->protection;
3915                 }
3916                 if (mask_cur_protection) {
3917                         cur_protection &= named_entry->protection;
3918                 }
3919                 if ((named_entry->protection & max_protection) !=
3920                     max_protection)
3921                         return KERN_INVALID_RIGHT;
3922                 if ((named_entry->protection & cur_protection) !=
3923                     cur_protection)
3924                         return KERN_INVALID_RIGHT;
3925                 if (offset + size < offset) {
3926                         /* overflow */
3927                         return KERN_INVALID_ARGUMENT;
3928                 }
3929                 if (named_entry->size < (offset + initial_size)) {
3930                         return KERN_INVALID_ARGUMENT;
3931                 }
3932
3933                 if (named_entry->is_copy) {
3934                         /* for a vm_map_copy, we can only map it whole */
3935                         if ((size != named_entry->size) &&
3936                             (vm_map_round_page(size,
3937                                                VM_MAP_PAGE_MASK(target_map)) ==
3938                              named_entry->size)) {
3939                                 /* XXX FBDP use the rounded size... */
3940                                 size = vm_map_round_page(
3941                                         size,
3942                                         VM_MAP_PAGE_MASK(target_map));
3943                         }
3944
3945                         if (!(flags & VM_FLAGS_ANYWHERE) &&
3946                             (offset != 0 ||
3947                              size != named_entry->size)) {
3948                                 /*
3949                                  * XXX for a mapping at a "fixed" address,
3950                                  * we can't trim after mapping the whole
3951                                  * memory entry, so reject a request for a
3952                                  * partial mapping.
3953                                  */
3954                                 return KERN_INVALID_ARGUMENT;
3955                         }
3956                 }
3957
3958                 /* the callers parameter offset is defined to be the */
3959                 /* offset from beginning of named entry offset in object */
3960                 offset = offset + named_entry->offset;
3961
3962                 if (! VM_MAP_PAGE_ALIGNED(size,
3963                                           VM_MAP_PAGE_MASK(target_map))) {
3964                         /*
3965                          * Let's not map more than requested;
3966                          * vm_map_enter() will handle this "not map-aligned"
3967                          * case.
3968                          */
3969                         map_size = size;
3970                 }
3971
3972                 named_entry_lock(named_entry);
3973                 if (named_entry->is_sub_map) {
3974                         vm_map_t                submap;
3975
3976                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3977                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3978                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3979                         }
3980
3981                         submap = named_entry->backing.map;
3982                         vm_map_lock(submap);
3983                         vm_map_reference(submap);
3984                         vm_map_unlock(submap);
3985                         named_entry_unlock(named_entry);
3986
3987                         vmk_flags.vmkf_submap = TRUE;
3988
3989                         result = vm_map_enter(target_map,
3990                                               &map_addr,
3991                                               map_size,
3992                                               mask,
3993                                               flags,
3994                                               vmk_flags,
3995                                               tag,
3996                                               (vm_object_t)(uintptr_t) submap,
3997                                               offset,
3998                                               copy,
3999                                               cur_protection,
4000                                               max_protection,
4001                                               inheritance);
4002                         if (result != KERN_SUCCESS) {
4003                                 vm_map_deallocate(submap);
4004                         } else {
4005                                 /*
4006                                  * No need to lock "submap" just to check its
4007                                  * "mapped" flag: that flag is never reset
4008                                  * once it's been set and if we race, we'll
4009                                  * just end up setting it twice, which is OK.
4010                                  */
4011                                 if (submap->mapped_in_other_pmaps == FALSE &&
4012                                     vm_map_pmap(submap) != PMAP_NULL &&
4013                                     vm_map_pmap(submap) !=
4014                                     vm_map_pmap(target_map)) {
4015                                         /*
4016                                          * This submap is being mapped in a map
4017                                          * that uses a different pmap.
4018                                          * Set its "mapped_in_other_pmaps" flag
4019                                          * to indicate that we now need to
4020                                          * remove mappings from all pmaps rather
4021                                          * than just the submap's pmap.
4022                                          */
4023                                         vm_map_lock(submap);
4024                                         submap->mapped_in_other_pmaps = TRUE;
4025                                         vm_map_unlock(submap);
4026                                 }
4027                                 *address = map_addr;
4028                         }
4029                         return result;
4030
4031                 } else if (named_entry->is_copy) {
4032                         kern_return_t   kr;
4033                         vm_map_copy_t   copy_map;
4034                         vm_map_entry_t  copy_entry;
4035                         vm_map_offset_t copy_addr;
4036
4037                         if (flags & ~(VM_FLAGS_FIXED |
4038                                       VM_FLAGS_ANYWHERE |
4039                                       VM_FLAGS_OVERWRITE |
4040                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
4041                                       VM_FLAGS_RETURN_DATA_ADDR |
4042                                       VM_FLAGS_ALIAS_MASK)) {
4043                                 named_entry_unlock(named_entry);
4044                                 return KERN_INVALID_ARGUMENT;
4045                         }
4046
4047                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4048                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4049                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4050                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4051                                         offset_in_mapping &= ~((signed)(0xFFF));
4052                                 offset = vm_object_trunc_page(offset);
4053                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4054                         }
4055
4056                         copy_map = named_entry->backing.copy;
4057                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4058                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4059                                 /* unsupported type; should not happen */
4060                                 printf("vm_map_enter_mem_object: "
4061                                        "memory_entry->backing.copy "
4062                                        "unsupported type 0x%x\n",
4063                                        copy_map->type);
4064                                 named_entry_unlock(named_entry);
4065                                 return KERN_INVALID_ARGUMENT;
4066                         }
4067
4068                         /* reserve a contiguous range */
4069                         kr = vm_map_enter(target_map,
4070                                           &map_addr,
4071                                           /* map whole mem entry, trim later: */
4072                                           named_entry->size,
4073                                           mask,
4074                                           flags & (VM_FLAGS_ANYWHERE |
4075                                                    VM_FLAGS_OVERWRITE |
4076                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
4077                                                    VM_FLAGS_RETURN_DATA_ADDR),
4078                                           vmk_flags,
4079                                           tag,
4080                                           VM_OBJECT_NULL,
4081                                           0,
4082                                           FALSE, /* copy */
4083                                           cur_protection,
4084                                           max_protection,
4085                                           inheritance);
4086                         if (kr != KERN_SUCCESS) {
4087                                 named_entry_unlock(named_entry);
4088                                 return kr;
4089                         }
4090
4091                         copy_addr = map_addr;
4092
4093                         for (copy_entry = vm_map_copy_first_entry(copy_map);
4094                              copy_entry != vm_map_copy_to_entry(copy_map);
4095                              copy_entry = copy_entry->vme_next) {
4096                                 int                     remap_flags;
4097                                 vm_map_kernel_flags_t   vmk_remap_flags;
4098                                 vm_map_t                copy_submap;
4099                                 vm_object_t             copy_object;
4100                                 vm_map_size_t           copy_size;
4101                                 vm_object_offset_t      copy_offset;
4102                                 int                     copy_vm_alias;
4103
4104                                 remap_flags = 0;
4105                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4106
4107                                 copy_object = VME_OBJECT(copy_entry);
4108                                 copy_offset = VME_OFFSET(copy_entry);
4109                                 copy_size = (copy_entry->vme_end -
4110                                              copy_entry->vme_start);
4111                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4112                                 if (copy_vm_alias == 0) {
4113                                         /*
4114                                          * Caller does not want a specific
4115                                          * alias for this new mapping:  use
4116                                          * the alias of the original mapping.
4117                                          */
4118                                         copy_vm_alias = VME_ALIAS(copy_entry);
4119                                 }
4120
4121                                 /* sanity check */
4122                                 if ((copy_addr + copy_size) >
4123                                     (map_addr +
4124                                      named_entry->size /* XXX full size */ )) {
4125                                         /* over-mapping too much !? */
4126                                         kr = KERN_INVALID_ARGUMENT;
4127                                         /* abort */
4128                                         break;
4129                                 }
4130
4131                                 /* take a reference on the object */
4132                                 if (copy_entry->is_sub_map) {
4133                                         vmk_remap_flags.vmkf_submap = TRUE;
4134                                         copy_submap = VME_SUBMAP(copy_entry);
4135                                         vm_map_lock(copy_submap);
4136                                         vm_map_reference(copy_submap);
4137                                         vm_map_unlock(copy_submap);
4138                                         copy_object = (vm_object_t)(uintptr_t) copy_submap;
4139                                 } else if (!copy &&
4140                                            copy_object != VM_OBJECT_NULL &&
4141                                            (copy_entry->needs_copy ||
4142                                             copy_object->shadowed ||
4143                                             (!copy_object->true_share &&
4144                                              !copy_entry->is_shared &&
4145                                              copy_object->vo_size > copy_size))) {
4146                                         /*
4147                                          * We need to resolve our side of this
4148                                          * "symmetric" copy-on-write now; we
4149                                          * need a new object to map and share,
4150                                          * instead of the current one which
4151                                          * might still be shared with the
4152                                          * original mapping.
4153                                          *
4154                                          * Note: A "vm_map_copy_t" does not
4155                                          * have a lock but we're protected by
4156                                          * the named entry's lock here.
4157                                          */
4158                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4159                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
4160                                         if (!copy_entry->needs_copy &&
4161                                             copy_entry->protection & VM_PROT_WRITE) {
4162                                                 vm_prot_t prot;
4163
4164                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
4165                                                 vm_object_pmap_protect(copy_object,
4166                                                                        copy_offset,
4167                                                                        copy_size,
4168                                                                        PMAP_NULL,
4169                                                                        0,
4170                                                                        prot);
4171                                         }
4172
4173                                         copy_entry->needs_copy = FALSE;
4174                                         copy_entry->is_shared = TRUE;
4175                                         copy_object = VME_OBJECT(copy_entry);
4176                                         copy_offset = VME_OFFSET(copy_entry);
4177                                         vm_object_lock(copy_object);
4178                                         vm_object_reference_locked(copy_object);
4179                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4180                                                 /* we're about to make a shared mapping of this object */
4181                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4182                                                 copy_object->true_share = TRUE;
4183                                         }
4184                                         vm_object_unlock(copy_object);
4185                                 } else {
4186                                         /*
4187                                          * We already have the right object
4188                                          * to map.
4189                                          */
4190                                         copy_object = VME_OBJECT(copy_entry);
4191                                         vm_object_reference(copy_object);
4192                                 }
4193
4194                                 /* over-map the object into destination */
4195                                 remap_flags |= flags;
4196                                 remap_flags |= VM_FLAGS_FIXED;
4197                                 remap_flags |= VM_FLAGS_OVERWRITE;
4198                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4199                                 if (!copy && !copy_entry->is_sub_map) {
4200                                         /*
4201                                          * copy-on-write should have been
4202                                          * resolved at this point, or we would
4203                                          * end up sharing instead of copying.
4204                                          */
4205                                         assert(!copy_entry->needs_copy);
4206                                 }
4207 #if !CONFIG_EMBEDDED
4208                                 if (copy_entry->used_for_jit) {
4209                                         vmk_remap_flags.vmkf_map_jit = TRUE;
4210                                 }
4211 #endif /* !CONFIG_EMBEDDED */
4212                                 kr = vm_map_enter(target_map,
4213                                                   &copy_addr,
4214                                                   copy_size,
4215                                                   (vm_map_offset_t) 0,
4216                                                   remap_flags,
4217                                                   vmk_remap_flags,
4218                                                   copy_vm_alias,
4219                                                   copy_object,
4220                                                   copy_offset,
4221                                                   copy,
4222                                                   cur_protection,
4223                                                   max_protection,
4224                                                   inheritance);
4225                                 if (kr != KERN_SUCCESS) {
4226                                         if (copy_entry->is_sub_map) {
4227                                                 vm_map_deallocate(copy_submap);
4228                                         } else {
4229                                                 vm_object_deallocate(copy_object);
4230                                         }
4231                                         /* abort */
4232                                         break;
4233                                 }
4234
4235                                 /* next mapping */
4236                                 copy_addr += copy_size;
4237                         }
4238
4239                         if (kr == KERN_SUCCESS) {
4240                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4241                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4242                                         *address = map_addr + offset_in_mapping;
4243                                 } else {
4244                                         *address = map_addr;
4245                                 }
4246
4247                                 if (offset) {
4248                                         /*
4249                                          * Trim in front, from 0 to "offset".
4250                                          */
4251                                         vm_map_remove(target_map,
4252                                                       map_addr,
4253                                                       map_addr + offset,
4254                                                       VM_MAP_REMOVE_NO_FLAGS);
4255                                         *address += offset;
4256                                 }
4257                                 if (offset + map_size < named_entry->size) {
4258                                         /*
4259                                          * Trim in back, from
4260                                          * "offset + map_size" to
4261                                          * "named_entry->size".
4262                                          */
4263                                         vm_map_remove(target_map,
4264                                                       (map_addr +
4265                                                        offset + map_size),
4266                                                       (map_addr +
4267                                                        named_entry->size),
4268                                                       VM_MAP_REMOVE_NO_FLAGS);
4269                                 }
4270                         }
4271                         named_entry_unlock(named_entry);
4272
4273                         if (kr != KERN_SUCCESS) {
4274                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
4275                                         /* deallocate the contiguous range */
4276                                         (void) vm_deallocate(target_map,
4277                                                              map_addr,
4278                                                              map_size);
4279                                 }
4280                         }
4281
4282                         return kr;
4283
4284                 } else {
4285                         unsigned int    access;
4286                         vm_prot_t       protections;
4287                         unsigned int    wimg_mode;
4288
4289                         /* we are mapping a VM object */
4290
4291                         protections = named_entry->protection & VM_PROT_ALL;
4292                         access = GET_MAP_MEM(named_entry->protection);
4293
4294                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4295                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4296                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4297                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4298                                         offset_in_mapping &= ~((signed)(0xFFF));
4299                                 offset = vm_object_trunc_page(offset);
4300                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4301                         }
4302
4303                         object = named_entry->backing.object;
4304                         assert(object != VM_OBJECT_NULL);
4305                         vm_object_lock(object);
4306                         named_entry_unlock(named_entry);
4307
4308                         vm_object_reference_locked(object);
4309
4310                         wimg_mode = object->wimg_bits;
4311                         vm_prot_to_wimg(access, &wimg_mode);
4312                         if (object->wimg_bits != wimg_mode)
4313                                 vm_object_change_wimg_mode(object, wimg_mode);
4314
4315                         vm_object_unlock(object);
4316                 }
4317         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4318                 /*
4319                  * JMM - This is temporary until we unify named entries
4320                  * and raw memory objects.
4321                  *
4322                  * Detected fake ip_kotype for a memory object.  In
4323                  * this case, the port isn't really a port at all, but
4324                  * instead is just a raw memory object.
4325                  */
4326                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4327                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4328                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4329                 }
4330
4331                 object = memory_object_to_vm_object((memory_object_t)port);
4332                 if (object == VM_OBJECT_NULL)
4333                         return KERN_INVALID_OBJECT;
4334                 vm_object_reference(object);
4335
4336                 /* wait for object (if any) to be ready */
4337                 if (object != VM_OBJECT_NULL) {
4338                         if (object == kernel_object) {
4339                                 printf("Warning: Attempt to map kernel object"
4340                                         " by a non-private kernel entity\n");
4341                                 return KERN_INVALID_OBJECT;
4342                         }
4343                         if (!object->pager_ready) {
4344                                 vm_object_lock(object);
4345
4346                                 while (!object->pager_ready) {
4347                                         vm_object_wait(object,
4348                                                        VM_OBJECT_EVENT_PAGER_READY,
4349                                                        THREAD_UNINT);
4350                                         vm_object_lock(object);
4351                                 }
4352                                 vm_object_unlock(object);
4353                         }
4354                 }
4355         } else {
4356                 return KERN_INVALID_OBJECT;
4357         }
4358
4359         if (object != VM_OBJECT_NULL &&
4360             object->named &&
4361             object->pager != MEMORY_OBJECT_NULL &&
4362             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4363                 memory_object_t pager;
4364                 vm_prot_t       pager_prot;
4365                 kern_return_t   kr;
4366
4367                 /*
4368                  * For "named" VM objects, let the pager know that the
4369                  * memory object is being mapped.  Some pagers need to keep
4370                  * track of this, to know when they can reclaim the memory
4371                  * object, for example.
4372                  * VM calls memory_object_map() for each mapping (specifying
4373                  * the protection of each mapping) and calls
4374                  * memory_object_last_unmap() when all the mappings are gone.
4375                  */
4376                 pager_prot = max_protection;
4377                 if (copy) {
4378                         /*
4379                          * Copy-On-Write mapping: won't modify the
4380                          * memory object.
4381                          */
4382                         pager_prot &= ~VM_PROT_WRITE;
4383                 }
4384                 vm_object_lock(object);
4385                 pager = object->pager;
4386                 if (object->named &&
4387                     pager != MEMORY_OBJECT_NULL &&
4388                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4389                         assert(object->pager_ready);
4390                         vm_object_mapping_wait(object, THREAD_UNINT);
4391                         vm_object_mapping_begin(object);
4392                         vm_object_unlock(object);
4393
4394                         kr = memory_object_map(pager, pager_prot);
4395                         assert(kr == KERN_SUCCESS);
4396
4397                         vm_object_lock(object);
4398                         vm_object_mapping_end(object);
4399                 }
4400                 vm_object_unlock(object);
4401         }
4402
4403         /*
4404          *      Perform the copy if requested
4405          */
4406
4407         if (copy) {
4408                 vm_object_t             new_object;
4409                 vm_object_offset_t      new_offset;
4410
4411                 result = vm_object_copy_strategically(object, offset,
4412                                                       map_size,
4413                                                       &new_object, &new_offset,
4414                                                       &copy);
4415
4416
4417                 if (result == KERN_MEMORY_RESTART_COPY) {
4418                         boolean_t success;
4419                         boolean_t src_needs_copy;
4420
4421                         /*
4422                          * XXX
4423                          * We currently ignore src_needs_copy.
4424                          * This really is the issue of how to make
4425                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4426                          * non-kernel users to use. Solution forthcoming.
4427                          * In the meantime, since we don't allow non-kernel
4428                          * memory managers to specify symmetric copy,
4429                          * we won't run into problems here.
4430                          */
4431                         new_object = object;
4432                         new_offset = offset;
4433                         success = vm_object_copy_quickly(&new_object,
4434                                                          new_offset,
4435                                                          map_size,
4436                                                          &src_needs_copy,
4437                                                          &copy);
4438                         assert(success);
4439                         result = KERN_SUCCESS;
4440                 }
4441                 /*
4442                  *      Throw away the reference to the
4443                  *      original object, as it won't be mapped.
4444                  */
4445
4446                 vm_object_deallocate(object);
4447
4448                 if (result != KERN_SUCCESS) {
4449                         return result;
4450                 }
4451
4452                 object = new_object;
4453                 offset = new_offset;
4454         }
4455
4456         /*
4457          * If non-kernel users want to try to prefault pages, the mapping and prefault
4458          * needs to be atomic.
4459          */
4460         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4461         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4462
4463 #if __arm64__
4464         if (fourk) {
4465                 /* map this object in a "4K" pager */
4466                 result = vm_map_enter_fourk(target_map,
4467                                             &map_addr,
4468                                             map_size,
4469                                             (vm_map_offset_t) mask,
4470                                             flags,
4471                                             vmk_flags,
4472                                             tag,
4473                                             object,
4474                                             offset,
4475                                             copy,
4476                                             cur_protection,
4477                                             max_protection,
4478                                             inheritance);
4479         } else
4480 #endif /* __arm64__ */
4481         {
4482                 result = vm_map_enter(target_map,
4483                                       &map_addr, map_size,
4484                                       (vm_map_offset_t)mask,
4485                                       flags,
4486                                       vmk_flags,
4487                                       tag,
4488                                       object, offset,
4489                                       copy,
4490                                       cur_protection, max_protection,
4491                                       inheritance);
4492         }
4493         if (result != KERN_SUCCESS)
4494                 vm_object_deallocate(object);
4495
4496         /*
4497          * Try to prefault, and do not forget to release the vm map lock.
4498          */
4499         if (result == KERN_SUCCESS && try_prefault) {
4500                 mach_vm_address_t va = map_addr;
4501                 kern_return_t kr = KERN_SUCCESS;
4502                 unsigned int i = 0;
4503                 int pmap_options;
4504
4505                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4506                 if (object->internal) {
4507                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4508                 }
4509
4510                 for (i = 0; i < page_list_count; ++i) {
4511                         if (!UPL_VALID_PAGE(page_list, i)) {
4512                                 if (kernel_prefault) {
4513                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4514                                         result = KERN_MEMORY_ERROR;
4515                                         break;
4516                                 }
4517                         } else {
4518                                 /*
4519                                  * If this function call failed, we should stop
4520                                  * trying to optimize, other calls are likely
4521                                  * going to fail too.
4522                                  *
4523                                  * We are not gonna report an error for such
4524                                  * failure though. That's an optimization, not
4525                                  * something critical.
4526                                  */
4527                                 kr = pmap_enter_options(target_map->pmap,
4528                                                         va, UPL_PHYS_PAGE(page_list, i),
4529                                                         cur_protection, VM_PROT_NONE,
4530                                                         0, TRUE, pmap_options, NULL);
4531                                 if (kr != KERN_SUCCESS) {
4532                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4533                                         if (kernel_prefault) {
4534                                                 result = kr;
4535                                         }
4536                                         break;
4537                                 }
4538                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4539                         }
4540
4541                         /* Next virtual address */
4542                         va += PAGE_SIZE;
4543                 }
4544                 if (vmk_flags.vmkf_keep_map_locked) {
4545                         vm_map_unlock(target_map);
4546                 }
4547         }
4548
4549         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4550                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4551                 *address = map_addr + offset_in_mapping;
4552         } else {
4553                 *address = map_addr;
4554         }
4555         return result;
4556 }
4557
4558 kern_return_t
4559 vm_map_enter_mem_object(
4560         vm_map_t                target_map,
4561         vm_map_offset_t         *address,
4562         vm_map_size_t           initial_size,
4563         vm_map_offset_t         mask,
4564         int                     flags,
4565         vm_map_kernel_flags_t   vmk_flags,
4566         vm_tag_t                tag,
4567         ipc_port_t              port,
4568         vm_object_offset_t      offset,
4569         boolean_t               copy,
4570         vm_prot_t               cur_protection,
4571         vm_prot_t               max_protection,
4572         vm_inherit_t            inheritance)
4573 {
4574         kern_return_t ret;
4575
4576         ret = vm_map_enter_mem_object_helper(target_map,
4577                                              address,
4578                                              initial_size,
4579                                              mask,
4580                                              flags,
4581                                              vmk_flags,
4582                                              tag,
4583                                              port,
4584                                              offset,
4585                                              copy,
4586                                              cur_protection,
4587                                              max_protection,
4588                                              inheritance,
4589                                              NULL,
4590                                              0);
4591
4592 #if KASAN
4593         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4594                 kasan_notify_address(*address, initial_size);
4595         }
4596 #endif
4597
4598         return ret;
4599 }
4600
4601 kern_return_t
4602 vm_map_enter_mem_object_prefault(
4603         vm_map_t                target_map,
4604         vm_map_offset_t         *address,
4605         vm_map_size_t           initial_size,
4606         vm_map_offset_t         mask,
4607         int                     flags,
4608         vm_map_kernel_flags_t   vmk_flags,
4609         vm_tag_t                tag,
4610         ipc_port_t              port,
4611         vm_object_offset_t      offset,
4612         vm_prot_t               cur_protection,
4613         vm_prot_t               max_protection,
4614         upl_page_list_ptr_t     page_list,
4615         unsigned int            page_list_count)
4616 {
4617         kern_return_t ret;
4618
4619         ret = vm_map_enter_mem_object_helper(target_map,
4620                                              address,
4621                                              initial_size,
4622                                              mask,
4623                                              flags,
4624                                              vmk_flags,
4625                                              tag,
4626                                              port,
4627                                              offset,
4628                                              FALSE,
4629                                              cur_protection,
4630                                              max_protection,
4631                                              VM_INHERIT_DEFAULT,
4632                                              page_list,
4633                                              page_list_count);
4634
4635 #if KASAN
4636         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4637                 kasan_notify_address(*address, initial_size);
4638         }
4639 #endif
4640
4641         return ret;
4642 }
4643
4644
4645 kern_return_t
4646 vm_map_enter_mem_object_control(
4647         vm_map_t                target_map,
4648         vm_map_offset_t         *address,
4649         vm_map_size_t           initial_size,
4650         vm_map_offset_t         mask,
4651         int                     flags,
4652         vm_map_kernel_flags_t   vmk_flags,
4653         vm_tag_t                tag,
4654         memory_object_control_t control,
4655         vm_object_offset_t      offset,
4656         boolean_t               copy,
4657         vm_prot_t               cur_protection,
4658         vm_prot_t               max_protection,
4659         vm_inherit_t            inheritance)
4660 {
4661         vm_map_address_t        map_addr;
4662         vm_map_size_t           map_size;
4663         vm_object_t             object;
4664         vm_object_size_t        size;
4665         kern_return_t           result;
4666         memory_object_t         pager;
4667         vm_prot_t               pager_prot;
4668         kern_return_t           kr;
4669 #if __arm64__
4670         boolean_t               fourk = vmk_flags.vmkf_fourk;
4671 #endif /* __arm64__ */
4672
4673         /*
4674          * Check arguments for validity
4675          */
4676         if ((target_map == VM_MAP_NULL) ||
4677             (cur_protection & ~VM_PROT_ALL) ||
4678             (max_protection & ~VM_PROT_ALL) ||
4679             (inheritance > VM_INHERIT_LAST_VALID) ||
4680             initial_size == 0) {
4681                 return KERN_INVALID_ARGUMENT;
4682         }
4683
4684 #if __arm64__
4685         if (fourk) {
4686                 map_addr = vm_map_trunc_page(*address,
4687                                              FOURK_PAGE_MASK);
4688                 map_size = vm_map_round_page(initial_size,
4689                                              FOURK_PAGE_MASK);
4690         } else
4691 #endif /* __arm64__ */
4692         {
4693                 map_addr = vm_map_trunc_page(*address,
4694                                              VM_MAP_PAGE_MASK(target_map));
4695                 map_size = vm_map_round_page(initial_size,
4696                                              VM_MAP_PAGE_MASK(target_map));
4697         }
4698         size = vm_object_round_page(initial_size);
4699
4700         object = memory_object_control_to_vm_object(control);
4701
4702         if (object == VM_OBJECT_NULL)
4703                 return KERN_INVALID_OBJECT;
4704
4705         if (object == kernel_object) {
4706                 printf("Warning: Attempt to map kernel object"
4707                        " by a non-private kernel entity\n");
4708                 return KERN_INVALID_OBJECT;
4709         }
4710
4711         vm_object_lock(object);
4712         object->ref_count++;
4713         vm_object_res_reference(object);
4714
4715         /*
4716          * For "named" VM objects, let the pager know that the
4717          * memory object is being mapped.  Some pagers need to keep
4718          * track of this, to know when they can reclaim the memory
4719          * object, for example.
4720          * VM calls memory_object_map() for each mapping (specifying
4721          * the protection of each mapping) and calls
4722          * memory_object_last_unmap() when all the mappings are gone.
4723          */
4724         pager_prot = max_protection;
4725         if (copy) {
4726                 pager_prot &= ~VM_PROT_WRITE;
4727         }
4728         pager = object->pager;
4729         if (object->named &&
4730             pager != MEMORY_OBJECT_NULL &&
4731             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4732                 assert(object->pager_ready);
4733                 vm_object_mapping_wait(object, THREAD_UNINT);
4734                 vm_object_mapping_begin(object);
4735                 vm_object_unlock(object);
4736
4737                 kr = memory_object_map(pager, pager_prot);
4738                 assert(kr == KERN_SUCCESS);
4739
4740                 vm_object_lock(object);
4741                 vm_object_mapping_end(object);
4742         }
4743         vm_object_unlock(object);
4744
4745         /*
4746          *      Perform the copy if requested
4747          */
4748
4749         if (copy) {
4750                 vm_object_t             new_object;
4751                 vm_object_offset_t      new_offset;
4752
4753                 result = vm_object_copy_strategically(object, offset, size,
4754                                                       &new_object, &new_offset,
4755                                                       &copy);
4756
4757
4758                 if (result == KERN_MEMORY_RESTART_COPY) {
4759                         boolean_t success;
4760                         boolean_t src_needs_copy;
4761
4762                         /*
4763                          * XXX
4764                          * We currently ignore src_needs_copy.
4765                          * This really is the issue of how to make
4766                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4767                          * non-kernel users to use. Solution forthcoming.
4768                          * In the meantime, since we don't allow non-kernel
4769                          * memory managers to specify symmetric copy,
4770                          * we won't run into problems here.
4771                          */
4772                         new_object = object;
4773                         new_offset = offset;
4774                         success = vm_object_copy_quickly(&new_object,
4775                                                          new_offset, size,
4776                                                          &src_needs_copy,
4777                                                          &copy);
4778                         assert(success);
4779                         result = KERN_SUCCESS;
4780                 }
4781                 /*
4782                  *      Throw away the reference to the
4783                  *      original object, as it won't be mapped.
4784                  */
4785
4786                 vm_object_deallocate(object);
4787
4788                 if (result != KERN_SUCCESS) {
4789                         return result;
4790                 }
4791
4792                 object = new_object;
4793                 offset = new_offset;
4794         }
4795
4796 #if __arm64__
4797         if (fourk) {
4798                 result = vm_map_enter_fourk(target_map,
4799                                             &map_addr,
4800                                             map_size,
4801                                             (vm_map_offset_t)mask,
4802                                             flags,
4803                                             vmk_flags,
4804                                             tag,
4805                                             object, offset,
4806                                             copy,
4807                                             cur_protection, max_protection,
4808                                             inheritance);
4809         } else
4810 #endif /* __arm64__ */
4811         {
4812                 result = vm_map_enter(target_map,
4813                                       &map_addr, map_size,
4814                                       (vm_map_offset_t)mask,
4815                                       flags,
4816                                       vmk_flags,
4817                                       tag,
4818                                       object, offset,
4819                                       copy,
4820                                       cur_protection, max_protection,
4821                                       inheritance);
4822         }
4823         if (result != KERN_SUCCESS)
4824                 vm_object_deallocate(object);
4825         *address = map_addr;
4826
4827         return result;
4828 }
4829
4830
4831 #if     VM_CPM
4832
4833 #ifdef MACH_ASSERT
4834 extern pmap_paddr_t     avail_start, avail_end;
4835 #endif
4836
4837 /*
4838  *      Allocate memory in the specified map, with the caveat that
4839  *      the memory is physically contiguous.  This call may fail
4840  *      if the system can't find sufficient contiguous memory.
4841  *      This call may cause or lead to heart-stopping amounts of
4842  *      paging activity.
4843  *
4844  *      Memory obtained from this call should be freed in the
4845  *      normal way, viz., via vm_deallocate.
4846  */
4847 kern_return_t
4848 vm_map_enter_cpm(
4849         vm_map_t                map,
4850         vm_map_offset_t *addr,
4851         vm_map_size_t           size,
4852         int                     flags)
4853 {
4854         vm_object_t             cpm_obj;
4855         pmap_t                  pmap;
4856         vm_page_t               m, pages;
4857         kern_return_t           kr;
4858         vm_map_offset_t         va, start, end, offset;
4859 #if     MACH_ASSERT
4860         vm_map_offset_t         prev_addr = 0;
4861 #endif  /* MACH_ASSERT */
4862
4863         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4864         vm_tag_t tag;
4865
4866         VM_GET_FLAGS_ALIAS(flags, tag);
4867
4868         if (size == 0) {
4869                 *addr = 0;
4870                 return KERN_SUCCESS;
4871         }
4872         if (anywhere)
4873                 *addr = vm_map_min(map);
4874         else
4875                 *addr = vm_map_trunc_page(*addr,
4876                                           VM_MAP_PAGE_MASK(map));
4877         size = vm_map_round_page(size,
4878                                  VM_MAP_PAGE_MASK(map));
4879
4880         /*
4881          * LP64todo - cpm_allocate should probably allow
4882          * allocations of >4GB, but not with the current
4883          * algorithm, so just cast down the size for now.
4884          */
4885         if (size > VM_MAX_ADDRESS)
4886                 return KERN_RESOURCE_SHORTAGE;
4887         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
4888                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
4889                 return kr;
4890
4891         cpm_obj = vm_object_allocate((vm_object_size_t)size);
4892         assert(cpm_obj != VM_OBJECT_NULL);
4893         assert(cpm_obj->internal);
4894         assert(cpm_obj->vo_size == (vm_object_size_t)size);
4895         assert(cpm_obj->can_persist == FALSE);
4896         assert(cpm_obj->pager_created == FALSE);
4897         assert(cpm_obj->pageout == FALSE);
4898         assert(cpm_obj->shadow == VM_OBJECT_NULL);
4899
4900         /*
4901          *      Insert pages into object.
4902          */
4903
4904         vm_object_lock(cpm_obj);
4905         for (offset = 0; offset < size; offset += PAGE_SIZE) {
4906                 m = pages;
4907                 pages = NEXT_PAGE(m);
4908                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
4909
4910                 assert(!m->vmp_gobbled);
4911                 assert(!m->vmp_wanted);
4912                 assert(!m->vmp_pageout);
4913                 assert(!m->vmp_tabled);
4914                 assert(VM_PAGE_WIRED(m));
4915                 assert(m->vmp_busy);
4916                 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
4917
4918                 m->vmp_busy = FALSE;
4919                 vm_page_insert(m, cpm_obj, offset);
4920         }
4921         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4922         vm_object_unlock(cpm_obj);
4923
4924         /*
4925          *      Hang onto a reference on the object in case a
4926          *      multi-threaded application for some reason decides
4927          *      to deallocate the portion of the address space into
4928          *      which we will insert this object.
4929          *
4930          *      Unfortunately, we must insert the object now before
4931          *      we can talk to the pmap module about which addresses
4932          *      must be wired down.  Hence, the race with a multi-
4933          *      threaded app.
4934          */
4935         vm_object_reference(cpm_obj);
4936
4937         /*
4938          *      Insert object into map.
4939          */
4940
4941         kr = vm_map_enter(
4942                 map,
4943                 addr,
4944                 size,
4945                 (vm_map_offset_t)0,
4946                 flags,
4947                 VM_MAP_KERNEL_FLAGS_NONE,
4948                 cpm_obj,
4949                 (vm_object_offset_t)0,
4950                 FALSE,
4951                 VM_PROT_ALL,
4952                 VM_PROT_ALL,
4953                 VM_INHERIT_DEFAULT);
4954
4955         if (kr != KERN_SUCCESS) {
4956                 /*
4957                  *      A CPM object doesn't have can_persist set,
4958                  *      so all we have to do is deallocate it to
4959                  *      free up these pages.
4960                  */
4961                 assert(cpm_obj->pager_created == FALSE);
4962                 assert(cpm_obj->can_persist == FALSE);
4963                 assert(cpm_obj->pageout == FALSE);
4964                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4965                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
4966                 vm_object_deallocate(cpm_obj); /* kill creation ref */
4967         }
4968
4969         /*
4970          *      Inform the physical mapping system that the
4971          *      range of addresses may not fault, so that
4972          *      page tables and such can be locked down as well.
4973          */
4974         start = *addr;
4975         end = start + size;
4976         pmap = vm_map_pmap(map);
4977         pmap_pageable(pmap, start, end, FALSE);
4978
4979         /*
4980          *      Enter each page into the pmap, to avoid faults.
4981          *      Note that this loop could be coded more efficiently,
4982          *      if the need arose, rather than looking up each page
4983          *      again.
4984          */
4985         for (offset = 0, va = start; offset < size;
4986              va += PAGE_SIZE, offset += PAGE_SIZE) {
4987                 int type_of_fault;
4988
4989                 vm_object_lock(cpm_obj);
4990                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4991                 assert(m != VM_PAGE_NULL);
4992
4993                 vm_page_zero_fill(m);
4994
4995                 type_of_fault = DBG_ZERO_FILL_FAULT;
4996
4997                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
4998                                                 VM_PAGE_WIRED(m),
4999                                                 FALSE, /* change_wiring */
5000                                                 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5001                                                 FALSE, /* no_cache */
5002                                                 FALSE, /* cs_bypass */
5003                                                 0,     /* user_tag */
5004                                             0,     /* pmap_options */
5005                                                 NULL,  /* need_retry */
5006                                                 &type_of_fault);
5007
5008                 vm_object_unlock(cpm_obj);
5009         }
5010
5011 #if     MACH_ASSERT
5012         /*
5013          *      Verify ordering in address space.
5014          */
5015         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5016                 vm_object_lock(cpm_obj);
5017                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5018                 vm_object_unlock(cpm_obj);
5019                 if (m == VM_PAGE_NULL)
5020                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5021                               cpm_obj, (uint64_t)offset);
5022                 assert(m->vmp_tabled);
5023                 assert(!m->vmp_busy);
5024                 assert(!m->vmp_wanted);
5025                 assert(!m->vmp_fictitious);
5026                 assert(!m->vmp_private);
5027                 assert(!m->vmp_absent);
5028                 assert(!m->vmp_error);
5029                 assert(!m->vmp_cleaning);
5030                 assert(!m->vmp_laundry);
5031                 assert(!m->vmp_precious);
5032                 assert(!m->vmp_clustered);
5033                 if (offset != 0) {
5034                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5035                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5036                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
5037                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5038                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5039                                 panic("vm_allocate_cpm:  pages not contig!");
5040                         }
5041                 }
5042                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5043         }
5044 #endif  /* MACH_ASSERT */
5045
5046         vm_object_deallocate(cpm_obj); /* kill extra ref */
5047
5048         return kr;
5049 }
5050
5051
5052 #else   /* VM_CPM */
5053
5054 /*
5055  *      Interface is defined in all cases, but unless the kernel
5056  *      is built explicitly for this option, the interface does
5057  *      nothing.
5058  */
5059
5060 kern_return_t
5061 vm_map_enter_cpm(
5062         __unused vm_map_t       map,
5063         __unused vm_map_offset_t        *addr,
5064         __unused vm_map_size_t  size,
5065         __unused int            flags)
5066 {
5067         return KERN_FAILURE;
5068 }
5069 #endif /* VM_CPM */
5070
5071 /* Not used without nested pmaps */
5072 #ifndef NO_NESTED_PMAP
5073 /*
5074  * Clip and unnest a portion of a nested submap mapping.
5075  */
5076
5077
5078 static void
5079 vm_map_clip_unnest(
5080         vm_map_t        map,
5081         vm_map_entry_t  entry,
5082         vm_map_offset_t start_unnest,
5083         vm_map_offset_t end_unnest)
5084 {
5085         vm_map_offset_t old_start_unnest = start_unnest;
5086         vm_map_offset_t old_end_unnest = end_unnest;
5087
5088         assert(entry->is_sub_map);
5089         assert(VME_SUBMAP(entry) != NULL);
5090         assert(entry->use_pmap);
5091
5092         /*
5093          * Query the platform for the optimal unnest range.
5094          * DRK: There's some duplication of effort here, since
5095          * callers may have adjusted the range to some extent. This
5096          * routine was introduced to support 1GiB subtree nesting
5097          * for x86 platforms, which can also nest on 2MiB boundaries
5098          * depending on size/alignment.
5099          */
5100         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5101                 assert(VME_SUBMAP(entry)->is_nested_map);
5102                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5103                 log_unnest_badness(map,
5104                                    old_start_unnest,
5105                                    old_end_unnest,
5106                                    VME_SUBMAP(entry)->is_nested_map,
5107                                    (entry->vme_start +
5108                                     VME_SUBMAP(entry)->lowest_unnestable_start -
5109                                     VME_OFFSET(entry)));
5110         }
5111
5112         if (entry->vme_start > start_unnest ||
5113             entry->vme_end < end_unnest) {
5114                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5115                       "bad nested entry: start=0x%llx end=0x%llx\n",
5116                       (long long)start_unnest, (long long)end_unnest,
5117                       (long long)entry->vme_start, (long long)entry->vme_end);
5118         }
5119
5120         if (start_unnest > entry->vme_start) {
5121                 _vm_map_clip_start(&map->hdr,
5122                                    entry,
5123                                    start_unnest);
5124                 if (map->holelistenabled) {
5125                         vm_map_store_update_first_free(map, NULL, FALSE);
5126                 } else {
5127                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5128                 }
5129         }
5130         if (entry->vme_end > end_unnest) {
5131                 _vm_map_clip_end(&map->hdr,
5132                                  entry,
5133                                  end_unnest);
5134                 if (map->holelistenabled) {
5135                         vm_map_store_update_first_free(map, NULL, FALSE);
5136                 } else {
5137                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5138                 }
5139         }
5140
5141         pmap_unnest(map->pmap,
5142                     entry->vme_start,
5143                     entry->vme_end - entry->vme_start);
5144         if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
5145                 /* clean up parent map/maps */
5146                 vm_map_submap_pmap_clean(
5147                         map, entry->vme_start,
5148                         entry->vme_end,
5149                         VME_SUBMAP(entry),
5150                         VME_OFFSET(entry));
5151         }
5152         entry->use_pmap = FALSE;
5153         if ((map->pmap != kernel_pmap) &&
5154             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5155                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5156         }
5157 }
5158 #endif  /* NO_NESTED_PMAP */
5159
5160 /*
5161  *      vm_map_clip_start:      [ internal use only ]
5162  *
5163  *      Asserts that the given entry begins at or after
5164  *      the specified address; if necessary,
5165  *      it splits the entry into two.
5166  */
5167 void
5168 vm_map_clip_start(
5169         vm_map_t        map,
5170         vm_map_entry_t  entry,
5171         vm_map_offset_t startaddr)
5172 {
5173 #ifndef NO_NESTED_PMAP
5174         if (entry->is_sub_map &&
5175             entry->use_pmap &&
5176             startaddr >= entry->vme_start) {
5177                 vm_map_offset_t start_unnest, end_unnest;
5178
5179                 /*
5180                  * Make sure "startaddr" is no longer in a nested range
5181                  * before we clip.  Unnest only the minimum range the platform
5182                  * can handle.
5183                  * vm_map_clip_unnest may perform additional adjustments to
5184                  * the unnest range.
5185                  */
5186                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5187                 end_unnest = start_unnest + pmap_nesting_size_min;
5188                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5189         }
5190 #endif /* NO_NESTED_PMAP */
5191         if (startaddr > entry->vme_start) {
5192                 if (VME_OBJECT(entry) &&
5193                     !entry->is_sub_map &&
5194                     VME_OBJECT(entry)->phys_contiguous) {
5195                         pmap_remove(map->pmap,
5196                                     (addr64_t)(entry->vme_start),
5197                                     (addr64_t)(entry->vme_end));
5198                 }
5199                 if (entry->vme_atomic) {
5200                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5201                 }
5202
5203                 DTRACE_VM5(
5204                         vm_map_clip_start,
5205                         vm_map_t, map,
5206                         vm_map_offset_t, entry->vme_start,
5207                         vm_map_offset_t, entry->vme_end,
5208                         vm_map_offset_t, startaddr,
5209                         int, VME_ALIAS(entry));
5210
5211                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5212                 if (map->holelistenabled) {
5213                         vm_map_store_update_first_free(map, NULL, FALSE);
5214                 } else {
5215                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5216                 }
5217         }
5218 }
5219
5220
5221 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5222         MACRO_BEGIN \
5223         if ((startaddr) > (entry)->vme_start) \
5224                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5225         MACRO_END
5226
5227 /*
5228  *      This routine is called only when it is known that
5229  *      the entry must be split.
5230  */
5231 static void
5232 _vm_map_clip_start(
5233         struct vm_map_header    *map_header,
5234         vm_map_entry_t          entry,
5235         vm_map_offset_t         start)
5236 {
5237         vm_map_entry_t  new_entry;
5238
5239         /*
5240          *      Split off the front portion --
5241          *      note that we must insert the new
5242          *      entry BEFORE this one, so that
5243          *      this entry has the specified starting
5244          *      address.
5245          */
5246
5247         if (entry->map_aligned) {
5248                 assert(VM_MAP_PAGE_ALIGNED(start,
5249                                            VM_MAP_HDR_PAGE_MASK(map_header)));
5250         }
5251
5252         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5253         vm_map_entry_copy_full(new_entry, entry);
5254
5255         new_entry->vme_end = start;
5256         assert(new_entry->vme_start < new_entry->vme_end);
5257         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5258         assert(start < entry->vme_end);
5259         entry->vme_start = start;
5260
5261         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5262
5263         if (entry->is_sub_map)
5264                 vm_map_reference(VME_SUBMAP(new_entry));
5265         else
5266                 vm_object_reference(VME_OBJECT(new_entry));
5267 }
5268
5269
5270 /*
5271  *      vm_map_clip_end:        [ internal use only ]
5272  *
5273  *      Asserts that the given entry ends at or before
5274  *      the specified address; if necessary,
5275  *      it splits the entry into two.
5276  */
5277 void
5278 vm_map_clip_end(
5279         vm_map_t        map,
5280         vm_map_entry_t  entry,
5281         vm_map_offset_t endaddr)
5282 {
5283         if (endaddr > entry->vme_end) {
5284                 /*
5285                  * Within the scope of this clipping, limit "endaddr" to
5286                  * the end of this map entry...
5287                  */
5288                 endaddr = entry->vme_end;
5289         }
5290 #ifndef NO_NESTED_PMAP
5291         if (entry->is_sub_map && entry->use_pmap) {
5292                 vm_map_offset_t start_unnest, end_unnest;
5293
5294                 /*
5295                  * Make sure the range between the start of this entry and
5296                  * the new "endaddr" is no longer nested before we clip.
5297                  * Unnest only the minimum range the platform can handle.
5298                  * vm_map_clip_unnest may perform additional adjustments to
5299                  * the unnest range.
5300                  */
5301                 start_unnest = entry->vme_start;
5302                 end_unnest =
5303                         (endaddr + pmap_nesting_size_min - 1) &
5304                         ~(pmap_nesting_size_min - 1);
5305                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5306         }
5307 #endif /* NO_NESTED_PMAP */
5308         if (endaddr < entry->vme_end) {
5309                 if (VME_OBJECT(entry) &&
5310                     !entry->is_sub_map &&
5311                     VME_OBJECT(entry)->phys_contiguous) {
5312                         pmap_remove(map->pmap,
5313                                     (addr64_t)(entry->vme_start),
5314                                     (addr64_t)(entry->vme_end));
5315                 }
5316                 if (entry->vme_atomic) {
5317                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5318                 }
5319                 DTRACE_VM5(
5320                         vm_map_clip_end,
5321                         vm_map_t, map,
5322                         vm_map_offset_t, entry->vme_start,
5323                         vm_map_offset_t, entry->vme_end,
5324                         vm_map_offset_t, endaddr,
5325                         int, VME_ALIAS(entry));
5326
5327                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5328                 if (map->holelistenabled) {
5329                         vm_map_store_update_first_free(map, NULL, FALSE);
5330                 } else {
5331                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5332                 }
5333         }
5334 }
5335
5336
5337 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5338         MACRO_BEGIN \
5339         if ((endaddr) < (entry)->vme_end) \
5340                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5341         MACRO_END
5342
5343 /*
5344  *      This routine is called only when it is known that
5345  *      the entry must be split.
5346  */
5347 static void
5348 _vm_map_clip_end(
5349         struct vm_map_header    *map_header,
5350         vm_map_entry_t          entry,
5351         vm_map_offset_t         end)
5352 {
5353         vm_map_entry_t  new_entry;
5354
5355         /*
5356          *      Create a new entry and insert it
5357          *      AFTER the specified entry
5358          */
5359
5360         if (entry->map_aligned) {
5361                 assert(VM_MAP_PAGE_ALIGNED(end,
5362                                            VM_MAP_HDR_PAGE_MASK(map_header)));
5363         }
5364
5365         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5366         vm_map_entry_copy_full(new_entry, entry);
5367
5368         assert(entry->vme_start < end);
5369         new_entry->vme_start = entry->vme_end = end;
5370         VME_OFFSET_SET(new_entry,
5371                        VME_OFFSET(new_entry) + (end - entry->vme_start));
5372         assert(new_entry->vme_start < new_entry->vme_end);
5373
5374         _vm_map_store_entry_link(map_header, entry, new_entry);
5375
5376         if (entry->is_sub_map)
5377                 vm_map_reference(VME_SUBMAP(new_entry));
5378         else
5379                 vm_object_reference(VME_OBJECT(new_entry));
5380 }
5381
5382
5383 /*
5384  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5385  *
5386  *      Asserts that the starting and ending region
5387  *      addresses fall within the valid range of the map.
5388  */
5389 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5390         MACRO_BEGIN                             \
5391         if (start < vm_map_min(map))            \
5392                 start = vm_map_min(map);        \
5393         if (end > vm_map_max(map))              \
5394                 end = vm_map_max(map);          \
5395         if (start > end)                        \
5396                 start = end;                    \
5397         MACRO_END
5398
5399 /*
5400  *      vm_map_range_check:     [ internal use only ]
5401  *
5402  *      Check that the region defined by the specified start and
5403  *      end addresses are wholly contained within a single map
5404  *      entry or set of adjacent map entries of the spacified map,
5405  *      i.e. the specified region contains no unmapped space.
5406  *      If any or all of the region is unmapped, FALSE is returned.
5407  *      Otherwise, TRUE is returned and if the output argument 'entry'
5408  *      is not NULL it points to the map entry containing the start
5409  *      of the region.
5410  *
5411  *      The map is locked for reading on entry and is left locked.
5412  */
5413 static boolean_t
5414 vm_map_range_check(
5415         vm_map_t                map,
5416         vm_map_offset_t         start,
5417         vm_map_offset_t         end,
5418         vm_map_entry_t          *entry)
5419 {
5420         vm_map_entry_t          cur;
5421         vm_map_offset_t         prev;
5422
5423         /*
5424          *      Basic sanity checks first
5425          */
5426         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
5427                 return (FALSE);
5428
5429         /*
5430          *      Check first if the region starts within a valid
5431          *      mapping for the map.
5432          */
5433         if (!vm_map_lookup_entry(map, start, &cur))
5434                 return (FALSE);
5435
5436         /*
5437          *      Optimize for the case that the region is contained
5438          *      in a single map entry.
5439          */
5440         if (entry != (vm_map_entry_t *) NULL)
5441                 *entry = cur;
5442         if (end <= cur->vme_end)
5443                 return (TRUE);
5444
5445         /*
5446          *      If the region is not wholly contained within a
5447          *      single entry, walk the entries looking for holes.
5448          */
5449         prev = cur->vme_end;
5450         cur = cur->vme_next;
5451         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5452                 if (end <= cur->vme_end)
5453                         return (TRUE);
5454                 prev = cur->vme_end;
5455                 cur = cur->vme_next;
5456         }
5457         return (FALSE);
5458 }
5459
5460 /*
5461  *      vm_map_submap:          [ kernel use only ]
5462  *
5463  *      Mark the given range as handled by a subordinate map.
5464  *
5465  *      This range must have been created with vm_map_find using
5466  *      the vm_submap_object, and no other operations may have been
5467  *      performed on this range prior to calling vm_map_submap.
5468  *
5469  *      Only a limited number of operations can be performed
5470  *      within this rage after calling vm_map_submap:
5471  *              vm_fault
5472  *      [Don't try vm_map_copyin!]
5473  *
5474  *      To remove a submapping, one must first remove the
5475  *      range from the superior map, and then destroy the
5476  *      submap (if desired).  [Better yet, don't try it.]
5477  */
5478 kern_return_t
5479 vm_map_submap(
5480         vm_map_t        map,
5481         vm_map_offset_t start,
5482         vm_map_offset_t end,
5483         vm_map_t        submap,
5484         vm_map_offset_t offset,
5485 #ifdef NO_NESTED_PMAP
5486         __unused
5487 #endif  /* NO_NESTED_PMAP */
5488         boolean_t       use_pmap)
5489 {
5490         vm_map_entry_t          entry;
5491         kern_return_t           result = KERN_INVALID_ARGUMENT;
5492         vm_object_t             object;
5493
5494         vm_map_lock(map);
5495
5496         if (! vm_map_lookup_entry(map, start, &entry)) {
5497                 entry = entry->vme_next;
5498         }
5499
5500         if (entry == vm_map_to_entry(map) ||
5501             entry->is_sub_map) {
5502                 vm_map_unlock(map);
5503                 return KERN_INVALID_ARGUMENT;
5504         }
5505
5506         vm_map_clip_start(map, entry, start);
5507         vm_map_clip_end(map, entry, end);
5508
5509         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5510             (!entry->is_sub_map) &&
5511             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5512             (object->resident_page_count == 0) &&
5513             (object->copy == VM_OBJECT_NULL) &&
5514             (object->shadow == VM_OBJECT_NULL) &&
5515             (!object->pager_created)) {
5516                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5517                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5518                 vm_object_deallocate(object);
5519                 entry->is_sub_map = TRUE;
5520                 entry->use_pmap = FALSE;
5521                 VME_SUBMAP_SET(entry, submap);
5522                 vm_map_reference(submap);
5523                 if (submap->mapped_in_other_pmaps == FALSE &&
5524                     vm_map_pmap(submap) != PMAP_NULL &&
5525                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5526                         /*
5527                          * This submap is being mapped in a map
5528                          * that uses a different pmap.
5529                          * Set its "mapped_in_other_pmaps" flag
5530                          * to indicate that we now need to
5531                          * remove mappings from all pmaps rather
5532                          * than just the submap's pmap.
5533                          */
5534                         submap->mapped_in_other_pmaps = TRUE;
5535                 }
5536
5537 #ifndef NO_NESTED_PMAP
5538                 if (use_pmap) {
5539                         /* nest if platform code will allow */
5540                         if(submap->pmap == NULL) {
5541                                 ledger_t ledger = map->pmap->ledger;
5542                                 submap->pmap = pmap_create(ledger,
5543                                                 (vm_map_size_t) 0, FALSE);
5544                                 if(submap->pmap == PMAP_NULL) {
5545                                         vm_map_unlock(map);
5546                                         return(KERN_NO_SPACE);
5547                                 }
5548 #if     defined(__arm__) || defined(__arm64__)
5549                                 pmap_set_nested(submap->pmap);
5550 #endif
5551                         }
5552                         result = pmap_nest(map->pmap,
5553                                            (VME_SUBMAP(entry))->pmap,
5554                                            (addr64_t)start,
5555                                            (addr64_t)start,
5556                                            (uint64_t)(end - start));
5557                         if(result)
5558                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5559                         entry->use_pmap = TRUE;
5560                 }
5561 #else   /* NO_NESTED_PMAP */
5562                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5563 #endif  /* NO_NESTED_PMAP */
5564                 result = KERN_SUCCESS;
5565         }
5566         vm_map_unlock(map);
5567
5568         return(result);
5569 }
5570
5571 /*
5572  *      vm_map_protect:
5573  *
5574  *      Sets the protection of the specified address
5575  *      region in the target map.  If "set_max" is
5576  *      specified, the maximum protection is to be set;
5577  *      otherwise, only the current protection is affected.
5578  */
5579 kern_return_t
5580 vm_map_protect(
5581         vm_map_t        map,
5582         vm_map_offset_t start,
5583         vm_map_offset_t end,
5584         vm_prot_t       new_prot,
5585         boolean_t       set_max)
5586 {
5587         vm_map_entry_t                  current;
5588         vm_map_offset_t                 prev;
5589         vm_map_entry_t                  entry;
5590         vm_prot_t                       new_max;
5591         int                             pmap_options = 0;
5592         kern_return_t                   kr;
5593
5594         XPR(XPR_VM_MAP,
5595             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
5596             map, start, end, new_prot, set_max);
5597
5598         if (new_prot & VM_PROT_COPY) {
5599                 vm_map_offset_t         new_start;
5600                 vm_prot_t               cur_prot, max_prot;
5601                 vm_map_kernel_flags_t   kflags;
5602
5603                 /* LP64todo - see below */
5604                 if (start >= map->max_offset) {
5605                         return KERN_INVALID_ADDRESS;
5606                 }
5607
5608 #if VM_PROTECT_WX_FAIL
5609                 if ((new_prot & VM_PROT_EXECUTE) &&
5610                     map != kernel_map &&
5611                     cs_process_enforcement(NULL)) {
5612                         DTRACE_VM3(cs_wx,
5613                                    uint64_t, (uint64_t) start,
5614                                    uint64_t, (uint64_t) end,
5615                                    vm_prot_t, new_prot);
5616                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5617                                proc_selfpid(),
5618                                (current_task()->bsd_info
5619                                 ? proc_name_address(current_task()->bsd_info)
5620                                 : "?"),
5621                                __FUNCTION__);
5622                         return KERN_PROTECTION_FAILURE;
5623                 }
5624 #endif /* VM_PROTECT_WX_FAIL */
5625
5626                 /*
5627                  * Let vm_map_remap_extract() know that it will need to:
5628                  * + make a copy of the mapping
5629                  * + add VM_PROT_WRITE to the max protections
5630                  * + remove any protections that are no longer allowed from the
5631                  *   max protections (to avoid any WRITE/EXECUTE conflict, for
5632                  *   example).
5633                  * Note that "max_prot" is an IN/OUT parameter only for this
5634                  * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
5635                  * only.
5636                  */
5637                 max_prot = new_prot & VM_PROT_ALL;
5638                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5639                 kflags.vmkf_remap_prot_copy = TRUE;
5640                 kflags.vmkf_overwrite_immutable = TRUE;
5641                 new_start = start;
5642                 kr = vm_map_remap(map,
5643                                   &new_start,
5644                                   end - start,
5645                                   0, /* mask */
5646                                   VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5647                                   kflags,
5648                                   0,
5649                                   map,
5650                                   start,
5651                                   TRUE, /* copy-on-write remapping! */
5652                                   &cur_prot,
5653                                   &max_prot,
5654                                   VM_INHERIT_DEFAULT);
5655                 if (kr != KERN_SUCCESS) {
5656                         return kr;
5657                 }
5658                 new_prot &= ~VM_PROT_COPY;
5659         }
5660
5661         vm_map_lock(map);
5662
5663         /* LP64todo - remove this check when vm_map_commpage64()
5664          * no longer has to stuff in a map_entry for the commpage
5665          * above the map's max_offset.
5666          */
5667         if (start >= map->max_offset) {
5668                 vm_map_unlock(map);
5669                 return(KERN_INVALID_ADDRESS);
5670         }
5671
5672         while(1) {
5673                 /*
5674                  *      Lookup the entry.  If it doesn't start in a valid
5675                  *      entry, return an error.
5676                  */
5677                 if (! vm_map_lookup_entry(map, start, &entry)) {
5678                         vm_map_unlock(map);
5679                         return(KERN_INVALID_ADDRESS);
5680                 }
5681
5682                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
5683                         start = SUPERPAGE_ROUND_DOWN(start);
5684                         continue;
5685                 }
5686                 break;
5687         }
5688         if (entry->superpage_size)
5689                 end = SUPERPAGE_ROUND_UP(end);
5690
5691         /*
5692          *      Make a first pass to check for protection and address
5693          *      violations.
5694          */
5695
5696         current = entry;
5697         prev = current->vme_start;
5698         while ((current != vm_map_to_entry(map)) &&
5699                (current->vme_start < end)) {
5700
5701                 /*
5702                  * If there is a hole, return an error.
5703                  */
5704                 if (current->vme_start != prev) {
5705                         vm_map_unlock(map);
5706                         return(KERN_INVALID_ADDRESS);
5707                 }
5708
5709                 new_max = current->max_protection;
5710                 if ((new_prot & new_max) != new_prot) {
5711                         vm_map_unlock(map);
5712                         return(KERN_PROTECTION_FAILURE);
5713                 }
5714
5715                 if ((new_prot & VM_PROT_WRITE) &&
5716                     (new_prot & VM_PROT_EXECUTE) &&
5717 #if !CONFIG_EMBEDDED
5718                     map != kernel_map &&
5719                     cs_process_enforcement(NULL) &&
5720 #endif /* !CONFIG_EMBEDDED */
5721                     !(current->used_for_jit)) {
5722                         DTRACE_VM3(cs_wx,
5723                                    uint64_t, (uint64_t) current->vme_start,
5724                                    uint64_t, (uint64_t) current->vme_end,
5725                                    vm_prot_t, new_prot);
5726                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5727                                proc_selfpid(),
5728                                (current_task()->bsd_info
5729                                 ? proc_name_address(current_task()->bsd_info)
5730                                 : "?"),
5731                                __FUNCTION__);
5732                         new_prot &= ~VM_PROT_EXECUTE;
5733 #if VM_PROTECT_WX_FAIL
5734                         vm_map_unlock(map);
5735                         return KERN_PROTECTION_FAILURE;
5736 #endif /* VM_PROTECT_WX_FAIL */
5737                 }
5738
5739                 /*
5740                  * If the task has requested executable lockdown,
5741                  * deny both:
5742                  * - adding executable protections OR
5743                  * - adding write protections to an existing executable mapping.
5744                  */
5745                 if (map->map_disallow_new_exec == TRUE) {
5746                         if ((new_prot & VM_PROT_EXECUTE) ||
5747                             ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5748                                 vm_map_unlock(map);
5749                                 return(KERN_PROTECTION_FAILURE);
5750                         }
5751                 }
5752
5753                 prev = current->vme_end;
5754                 current = current->vme_next;
5755         }
5756
5757 #if __arm64__
5758         if (end > prev &&
5759             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5760                 vm_map_entry_t prev_entry;
5761
5762                 prev_entry = current->vme_prev;
5763                 if (prev_entry != vm_map_to_entry(map) &&
5764                     !prev_entry->map_aligned &&
5765                     (vm_map_round_page(prev_entry->vme_end,
5766                                        VM_MAP_PAGE_MASK(map))
5767                      == end)) {
5768                         /*
5769                          * The last entry in our range is not "map-aligned"
5770                          * but it would have reached all the way to "end"
5771                          * if it had been map-aligned, so this is not really
5772                          * a hole in the range and we can proceed.
5773                          */
5774                         prev = end;
5775                 }
5776         }
5777 #endif /* __arm64__ */
5778
5779         if (end > prev) {
5780                 vm_map_unlock(map);
5781                 return(KERN_INVALID_ADDRESS);
5782         }
5783
5784         /*
5785          *      Go back and fix up protections.
5786          *      Clip to start here if the range starts within
5787          *      the entry.
5788          */
5789
5790         current = entry;
5791         if (current != vm_map_to_entry(map)) {
5792                 /* clip and unnest if necessary */
5793                 vm_map_clip_start(map, current, start);
5794         }
5795
5796         while ((current != vm_map_to_entry(map)) &&
5797                (current->vme_start < end)) {
5798
5799                 vm_prot_t       old_prot;
5800
5801                 vm_map_clip_end(map, current, end);
5802
5803                 if (current->is_sub_map) {
5804                         /* clipping did unnest if needed */
5805                         assert(!current->use_pmap);
5806                 }
5807
5808                 old_prot = current->protection;
5809
5810                 if (set_max) {
5811                         current->max_protection = new_prot;
5812                         current->protection = new_prot & old_prot;
5813                 } else {
5814                         current->protection = new_prot;
5815                 }
5816
5817                 /*
5818                  *      Update physical map if necessary.
5819                  *      If the request is to turn off write protection,
5820                  *      we won't do it for real (in pmap). This is because
5821                  *      it would cause copy-on-write to fail.  We've already
5822                  *      set, the new protection in the map, so if a
5823                  *      write-protect fault occurred, it will be fixed up
5824                  *      properly, COW or not.
5825                  */
5826                 if (current->protection != old_prot) {
5827                         /* Look one level in we support nested pmaps */
5828                         /* from mapped submaps which are direct entries */
5829                         /* in our map */
5830
5831                         vm_prot_t prot;
5832
5833                         prot = current->protection;
5834                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5835                                 prot &= ~VM_PROT_WRITE;
5836                         } else {
5837                                 assert(!VME_OBJECT(current)->code_signed);
5838                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5839                         }
5840
5841                         if (override_nx(map, VME_ALIAS(current)) && prot)
5842                                 prot |= VM_PROT_EXECUTE;
5843
5844 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5845                         if (!(old_prot & VM_PROT_EXECUTE) &&
5846                             (prot & VM_PROT_EXECUTE) &&
5847                             panic_on_unsigned_execute &&
5848                             (proc_selfcsflags() & CS_KILL)) {
5849                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5850                         }
5851 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5852
5853                         if (pmap_has_prot_policy(prot)) {
5854                                 if (current->wired_count) {
5855                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5856                                               map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5857                                 }
5858
5859                                 /* If the pmap layer cares about this
5860                                  * protection type, force a fault for
5861                                  * each page so that vm_fault will
5862                                  * repopulate the page with the full
5863                                  * set of protections.
5864                                  */
5865                                 /*
5866                                  * TODO: We don't seem to need this,
5867                                  * but this is due to an internal
5868                                  * implementation detail of
5869                                  * pmap_protect.  Do we want to rely
5870                                  * on this?
5871                                  */
5872                                 prot = VM_PROT_NONE;
5873                         }
5874
5875                         if (current->is_sub_map && current->use_pmap) {
5876                                 pmap_protect(VME_SUBMAP(current)->pmap,
5877                                              current->vme_start,
5878                                              current->vme_end,
5879                                              prot);
5880                         } else {
5881                                 if (prot & VM_PROT_WRITE) {
5882                                         if (VME_OBJECT(current) == compressor_object) {
5883                                                 /*
5884                                                  * For write requests on the
5885                                                  * compressor, we wil ask the
5886                                                  * pmap layer to prevent us from
5887                                                  * taking a write fault when we
5888                                                  * attempt to access the mapping
5889                                                  * next.
5890                                                  */
5891                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5892                                         }
5893                                 }
5894
5895                                 pmap_protect_options(map->pmap,
5896                                                      current->vme_start,
5897                                                      current->vme_end,
5898                                                      prot,
5899                                                      pmap_options,
5900                                                      NULL);
5901                         }
5902                 }
5903                 current = current->vme_next;
5904         }
5905
5906         current = entry;
5907         while ((current != vm_map_to_entry(map)) &&
5908                (current->vme_start <= end)) {
5909                 vm_map_simplify_entry(map, current);
5910                 current = current->vme_next;
5911         }
5912
5913         vm_map_unlock(map);
5914         return(KERN_SUCCESS);
5915 }
5916
5917 /*
5918  *      vm_map_inherit:
5919  *
5920  *      Sets the inheritance of the specified address
5921  *      range in the target map.  Inheritance
5922  *      affects how the map will be shared with
5923  *      child maps at the time of vm_map_fork.
5924  */
5925 kern_return_t
5926 vm_map_inherit(
5927         vm_map_t        map,
5928         vm_map_offset_t start,
5929         vm_map_offset_t end,
5930         vm_inherit_t    new_inheritance)
5931 {
5932         vm_map_entry_t  entry;
5933         vm_map_entry_t  temp_entry;
5934
5935         vm_map_lock(map);
5936
5937         VM_MAP_RANGE_CHECK(map, start, end);
5938
5939         if (vm_map_lookup_entry(map, start, &temp_entry)) {
5940                 entry = temp_entry;
5941         }
5942         else {
5943                 temp_entry = temp_entry->vme_next;
5944                 entry = temp_entry;
5945         }
5946
5947         /* first check entire range for submaps which can't support the */
5948         /* given inheritance. */
5949         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5950                 if(entry->is_sub_map) {
5951                         if(new_inheritance == VM_INHERIT_COPY) {
5952                                 vm_map_unlock(map);
5953                                 return(KERN_INVALID_ARGUMENT);
5954                         }
5955                 }
5956
5957                 entry = entry->vme_next;
5958         }
5959
5960         entry = temp_entry;
5961         if (entry != vm_map_to_entry(map)) {
5962                 /* clip and unnest if necessary */
5963                 vm_map_clip_start(map, entry, start);
5964         }
5965
5966         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5967                 vm_map_clip_end(map, entry, end);
5968                 if (entry->is_sub_map) {
5969                         /* clip did unnest if needed */
5970                         assert(!entry->use_pmap);
5971                 }
5972
5973                 entry->inheritance = new_inheritance;
5974
5975                 entry = entry->vme_next;
5976         }
5977
5978         vm_map_unlock(map);
5979         return(KERN_SUCCESS);
5980 }
5981
5982 /*
5983  * Update the accounting for the amount of wired memory in this map.  If the user has
5984  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
5985  */
5986
5987 static kern_return_t
5988 add_wire_counts(
5989         vm_map_t        map,
5990         vm_map_entry_t  entry,
5991         boolean_t       user_wire)
5992 {
5993         vm_map_size_t   size;
5994
5995         if (user_wire) {
5996                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
5997
5998                 /*
5999                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6000                  * this map entry.
6001                  */
6002
6003                 if (entry->user_wired_count == 0) {
6004                         size = entry->vme_end - entry->vme_start;
6005
6006                         /*
6007                          * Since this is the first time the user is wiring this map entry, check to see if we're
6008                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6009                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
6010                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6011                          * limit, then we fail.
6012                          */
6013
6014                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6015                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
6016                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
6017                                 return KERN_RESOURCE_SHORTAGE;
6018
6019                         /*
6020                          * The first time the user wires an entry, we also increment the wired_count and add this to
6021                          * the total that has been wired in the map.
6022                          */
6023
6024                         if (entry->wired_count >= MAX_WIRE_COUNT)
6025                                 return KERN_FAILURE;
6026
6027                         entry->wired_count++;
6028                         map->user_wire_size += size;
6029                 }
6030
6031                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
6032                         return KERN_FAILURE;
6033
6034                 entry->user_wired_count++;
6035
6036         } else {
6037
6038                 /*
6039                  * The kernel's wiring the memory.  Just bump the count and continue.
6040                  */
6041
6042                 if (entry->wired_count >= MAX_WIRE_COUNT)
6043                         panic("vm_map_wire: too many wirings");
6044
6045                 entry->wired_count++;
6046         }
6047
6048         return KERN_SUCCESS;
6049 }
6050
6051 /*
6052  * Update the memory wiring accounting now that the given map entry is being unwired.
6053  */
6054
6055 static void
6056 subtract_wire_counts(
6057         vm_map_t        map,
6058         vm_map_entry_t  entry,
6059         boolean_t       user_wire)
6060 {
6061
6062         if (user_wire) {
6063
6064                 /*
6065                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6066                  */
6067
6068                 if (entry->user_wired_count == 1) {
6069
6070                         /*
6071                          * We're removing the last user wire reference.  Decrement the wired_count and the total
6072                          * user wired memory for this map.
6073                          */
6074
6075                         assert(entry->wired_count >= 1);
6076                         entry->wired_count--;
6077                         map->user_wire_size -= entry->vme_end - entry->vme_start;
6078                 }
6079
6080                 assert(entry->user_wired_count >= 1);
6081                 entry->user_wired_count--;
6082
6083         } else {
6084
6085                 /*
6086                  * The kernel is unwiring the memory.   Just update the count.
6087                  */
6088
6089                 assert(entry->wired_count >= 1);
6090                 entry->wired_count--;
6091         }
6092 }
6093
6094 int cs_executable_wire = 0;
6095
6096 /*
6097  *      vm_map_wire:
6098  *
6099  *      Sets the pageability of the specified address range in the
6100  *      target map as wired.  Regions specified as not pageable require
6101  *      locked-down physical memory and physical page maps.  The
6102  *      access_type variable indicates types of accesses that must not
6103  *      generate page faults.  This is checked against protection of
6104  *      memory being locked-down.
6105  *
6106  *      The map must not be locked, but a reference must remain to the
6107  *      map throughout the call.
6108  */
6109 static kern_return_t
6110 vm_map_wire_nested(
6111         vm_map_t                map,
6112         vm_map_offset_t         start,
6113         vm_map_offset_t         end,
6114         vm_prot_t               caller_prot,
6115         vm_tag_t                tag,
6116         boolean_t               user_wire,
6117         pmap_t                  map_pmap,
6118         vm_map_offset_t         pmap_addr,
6119         ppnum_t                 *physpage_p)
6120 {
6121         vm_map_entry_t          entry;
6122         vm_prot_t               access_type;
6123         struct vm_map_entry     *first_entry, tmp_entry;
6124         vm_map_t                real_map;
6125         vm_map_offset_t         s,e;
6126         kern_return_t           rc;
6127         boolean_t               need_wakeup;
6128         boolean_t               main_map = FALSE;
6129         wait_interrupt_t        interruptible_state;
6130         thread_t                cur_thread;
6131         unsigned int            last_timestamp;
6132         vm_map_size_t           size;
6133         boolean_t               wire_and_extract;
6134
6135         access_type = (caller_prot & VM_PROT_ALL);
6136
6137         wire_and_extract = FALSE;
6138         if (physpage_p != NULL) {
6139                 /*
6140                  * The caller wants the physical page number of the
6141                  * wired page.  We return only one physical page number
6142                  * so this works for only one page at a time.
6143                  */
6144                 if ((end - start) != PAGE_SIZE) {
6145                         return KERN_INVALID_ARGUMENT;
6146                 }
6147                 wire_and_extract = TRUE;
6148                 *physpage_p = 0;
6149         }
6150
6151         vm_map_lock(map);
6152         if(map_pmap == NULL)
6153                 main_map = TRUE;
6154         last_timestamp = map->timestamp;
6155
6156         VM_MAP_RANGE_CHECK(map, start, end);
6157         assert(page_aligned(start));
6158         assert(page_aligned(end));
6159         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6160         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6161         if (start == end) {
6162                 /* We wired what the caller asked for, zero pages */
6163                 vm_map_unlock(map);
6164                 return KERN_SUCCESS;
6165         }
6166
6167         need_wakeup = FALSE;
6168         cur_thread = current_thread();
6169
6170         s = start;
6171         rc = KERN_SUCCESS;
6172
6173         if (vm_map_lookup_entry(map, s, &first_entry)) {
6174                 entry = first_entry;
6175                 /*
6176                  * vm_map_clip_start will be done later.
6177                  * We don't want to unnest any nested submaps here !
6178                  */
6179         } else {
6180                 /* Start address is not in map */
6181                 rc = KERN_INVALID_ADDRESS;
6182                 goto done;
6183         }
6184
6185         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6186                 /*
6187                  * At this point, we have wired from "start" to "s".
6188                  * We still need to wire from "s" to "end".
6189                  *
6190                  * "entry" hasn't been clipped, so it could start before "s"
6191                  * and/or end after "end".
6192                  */
6193
6194                 /* "e" is how far we want to wire in this entry */
6195                 e = entry->vme_end;
6196                 if (e > end)
6197                         e = end;
6198
6199                 /*
6200                  * If another thread is wiring/unwiring this entry then
6201                  * block after informing other thread to wake us up.
6202                  */
6203                 if (entry->in_transition) {
6204                         wait_result_t wait_result;
6205
6206                         /*
6207                          * We have not clipped the entry.  Make sure that
6208                          * the start address is in range so that the lookup
6209                          * below will succeed.
6210                          * "s" is the current starting point: we've already
6211                          * wired from "start" to "s" and we still have
6212                          * to wire from "s" to "end".
6213                          */
6214
6215                         entry->needs_wakeup = TRUE;
6216
6217                         /*
6218                          * wake up anybody waiting on entries that we have
6219                          * already wired.
6220                          */
6221                         if (need_wakeup) {
6222                                 vm_map_entry_wakeup(map);
6223                                 need_wakeup = FALSE;
6224                         }
6225                         /*
6226                          * User wiring is interruptible
6227                          */
6228                         wait_result = vm_map_entry_wait(map,
6229                                                         (user_wire) ? THREAD_ABORTSAFE :
6230                                                         THREAD_UNINT);
6231                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
6232                                 /*
6233                                  * undo the wirings we have done so far
6234                                  * We do not clear the needs_wakeup flag,
6235                                  * because we cannot tell if we were the
6236                                  * only one waiting.
6237                                  */
6238                                 rc = KERN_FAILURE;
6239                                 goto done;
6240                         }
6241
6242                         /*
6243                          * Cannot avoid a lookup here. reset timestamp.
6244                          */
6245                         last_timestamp = map->timestamp;
6246
6247                         /*
6248                          * The entry could have been clipped, look it up again.
6249                          * Worse that can happen is, it may not exist anymore.
6250                          */
6251                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6252                                 /*
6253                                  * User: undo everything upto the previous
6254                                  * entry.  let vm_map_unwire worry about
6255                                  * checking the validity of the range.
6256                                  */
6257                                 rc = KERN_FAILURE;
6258                                 goto done;
6259                         }
6260                         entry = first_entry;
6261                         continue;
6262                 }
6263
6264                 if (entry->is_sub_map) {
6265                         vm_map_offset_t sub_start;
6266                         vm_map_offset_t sub_end;
6267                         vm_map_offset_t local_start;
6268                         vm_map_offset_t local_end;
6269                         pmap_t          pmap;
6270
6271                         if (wire_and_extract) {
6272                                 /*
6273                                  * Wiring would result in copy-on-write
6274                                  * which would not be compatible with
6275                                  * the sharing we have with the original
6276                                  * provider of this memory.
6277                                  */
6278                                 rc = KERN_INVALID_ARGUMENT;
6279                                 goto done;
6280                         }
6281
6282                         vm_map_clip_start(map, entry, s);
6283                         vm_map_clip_end(map, entry, end);
6284
6285                         sub_start = VME_OFFSET(entry);
6286                         sub_end = entry->vme_end;
6287                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6288
6289                         local_end = entry->vme_end;
6290                         if(map_pmap == NULL) {
6291                                 vm_object_t             object;
6292                                 vm_object_offset_t      offset;
6293                                 vm_prot_t               prot;
6294                                 boolean_t               wired;
6295                                 vm_map_entry_t          local_entry;
6296                                 vm_map_version_t         version;
6297                                 vm_map_t                lookup_map;
6298
6299                                 if(entry->use_pmap) {
6300                                         pmap = VME_SUBMAP(entry)->pmap;
6301                                         /* ppc implementation requires that */
6302                                         /* submaps pmap address ranges line */
6303                                         /* up with parent map */
6304 #ifdef notdef
6305                                         pmap_addr = sub_start;
6306 #endif
6307                                         pmap_addr = s;
6308                                 } else {
6309                                         pmap = map->pmap;
6310                                         pmap_addr = s;
6311                                 }
6312
6313                                 if (entry->wired_count) {
6314                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6315                                                 goto done;
6316
6317                                         /*
6318                                          * The map was not unlocked:
6319                                          * no need to goto re-lookup.
6320                                          * Just go directly to next entry.
6321                                          */
6322                                         entry = entry->vme_next;
6323                                         s = entry->vme_start;
6324                                         continue;
6325
6326                                 }
6327
6328                                 /* call vm_map_lookup_locked to */
6329                                 /* cause any needs copy to be   */
6330                                 /* evaluated */
6331                                 local_start = entry->vme_start;
6332                                 lookup_map = map;
6333                                 vm_map_lock_write_to_read(map);
6334                                 if(vm_map_lookup_locked(
6335                                            &lookup_map, local_start,
6336                                            access_type | VM_PROT_COPY,
6337                                            OBJECT_LOCK_EXCLUSIVE,
6338                                            &version, &object,
6339                                            &offset, &prot, &wired,
6340                                            NULL,
6341                                            &real_map)) {
6342
6343                                         vm_map_unlock_read(lookup_map);
6344                                         assert(map_pmap == NULL);
6345                                         vm_map_unwire(map, start,
6346                                                       s, user_wire);
6347                                         return(KERN_FAILURE);
6348                                 }
6349                                 vm_object_unlock(object);
6350                                 if(real_map != lookup_map)
6351                                         vm_map_unlock(real_map);
6352                                 vm_map_unlock_read(lookup_map);
6353                                 vm_map_lock(map);
6354
6355                                 /* we unlocked, so must re-lookup */
6356                                 if (!vm_map_lookup_entry(map,
6357                                                          local_start,
6358                                                          &local_entry)) {
6359                                         rc = KERN_FAILURE;
6360                                         goto done;
6361                                 }
6362
6363                                 /*
6364                                  * entry could have been "simplified",
6365                                  * so re-clip
6366                                  */
6367                                 entry = local_entry;
6368                                 assert(s == local_start);
6369                                 vm_map_clip_start(map, entry, s);
6370                                 vm_map_clip_end(map, entry, end);
6371                                 /* re-compute "e" */
6372                                 e = entry->vme_end;
6373                                 if (e > end)
6374                                         e = end;
6375
6376                                 /* did we have a change of type? */
6377                                 if (!entry->is_sub_map) {
6378                                         last_timestamp = map->timestamp;
6379                                         continue;
6380                                 }
6381                         } else {
6382                                 local_start = entry->vme_start;
6383                                 pmap = map_pmap;
6384                         }
6385
6386                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6387                                 goto done;
6388
6389                         entry->in_transition = TRUE;
6390
6391                         vm_map_unlock(map);
6392                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6393                                                 sub_start, sub_end,
6394                                                 caller_prot, tag,
6395                                                 user_wire, pmap, pmap_addr,
6396                                                 NULL);
6397                         vm_map_lock(map);
6398
6399                         /*
6400                          * Find the entry again.  It could have been clipped
6401                          * after we unlocked the map.
6402                          */
6403                         if (!vm_map_lookup_entry(map, local_start,
6404                                                  &first_entry))
6405                                 panic("vm_map_wire: re-lookup failed");
6406                         entry = first_entry;
6407
6408                         assert(local_start == s);
6409                         /* re-compute "e" */
6410                         e = entry->vme_end;
6411                         if (e > end)
6412                                 e = end;
6413
6414                         last_timestamp = map->timestamp;
6415                         while ((entry != vm_map_to_entry(map)) &&
6416                                (entry->vme_start < e)) {
6417                                 assert(entry->in_transition);
6418                                 entry->in_transition = FALSE;
6419                                 if (entry->needs_wakeup) {
6420                                         entry->needs_wakeup = FALSE;
6421                                         need_wakeup = TRUE;
6422                                 }
6423                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6424                                         subtract_wire_counts(map, entry, user_wire);
6425                                 }
6426                                 entry = entry->vme_next;
6427                         }
6428                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6429                                 goto done;
6430                         }
6431
6432                         /* no need to relookup again */
6433                         s = entry->vme_start;
6434                         continue;
6435                 }
6436
6437                 /*
6438                  * If this entry is already wired then increment
6439                  * the appropriate wire reference count.
6440                  */
6441                 if (entry->wired_count) {
6442
6443                         if ((entry->protection & access_type) != access_type) {
6444                                 /* found a protection problem */
6445
6446                                 /*
6447                                  * XXX FBDP
6448                                  * We should always return an error
6449                                  * in this case but since we didn't
6450                                  * enforce it before, let's do
6451                                  * it only for the new "wire_and_extract"
6452                                  * code path for now...
6453                                  */
6454                                 if (wire_and_extract) {
6455                                         rc = KERN_PROTECTION_FAILURE;
6456                                         goto done;
6457                                 }
6458                         }
6459
6460                         /*
6461                          * entry is already wired down, get our reference
6462                          * after clipping to our range.
6463                          */
6464                         vm_map_clip_start(map, entry, s);
6465                         vm_map_clip_end(map, entry, end);
6466
6467                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6468                                 goto done;
6469
6470                         if (wire_and_extract) {
6471                                 vm_object_t             object;
6472                                 vm_object_offset_t      offset;
6473                                 vm_page_t               m;
6474
6475                                 /*
6476                                  * We don't have to "wire" the page again
6477                                  * bit we still have to "extract" its
6478                                  * physical page number, after some sanity
6479                                  * checks.
6480                                  */
6481                                 assert((entry->vme_end - entry->vme_start)
6482                                        == PAGE_SIZE);
6483                                 assert(!entry->needs_copy);
6484                                 assert(!entry->is_sub_map);
6485                                 assert(VME_OBJECT(entry));
6486                                 if (((entry->vme_end - entry->vme_start)
6487                                      != PAGE_SIZE) ||
6488                                     entry->needs_copy ||
6489                                     entry->is_sub_map ||
6490                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6491                                         rc = KERN_INVALID_ARGUMENT;
6492                                         goto done;
6493                                 }
6494
6495                                 object = VME_OBJECT(entry);
6496                                 offset = VME_OFFSET(entry);
6497                                 /* need exclusive lock to update m->dirty */
6498                                 if (entry->protection & VM_PROT_WRITE) {
6499                                         vm_object_lock(object);
6500                                 } else {
6501                                         vm_object_lock_shared(object);
6502                                 }
6503                                 m = vm_page_lookup(object, offset);
6504                                 assert(m != VM_PAGE_NULL);
6505                                 assert(VM_PAGE_WIRED(m));
6506                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6507                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6508                                         if (entry->protection & VM_PROT_WRITE) {
6509                                                 vm_object_lock_assert_exclusive(
6510                                                         object);
6511                                                 m->vmp_dirty = TRUE;
6512                                         }
6513                                 } else {
6514                                         /* not already wired !? */
6515                                         *physpage_p = 0;
6516                                 }
6517                                 vm_object_unlock(object);
6518                         }
6519
6520                         /* map was not unlocked: no need to relookup */
6521                         entry = entry->vme_next;
6522                         s = entry->vme_start;
6523                         continue;
6524                 }
6525
6526                 /*
6527                  * Unwired entry or wire request transmitted via submap
6528                  */
6529
6530                 /*
6531                  * Wiring would copy the pages to the shadow object.
6532                  * The shadow object would not be code-signed so
6533                  * attempting to execute code from these copied pages
6534                  * would trigger a code-signing violation.
6535                  */
6536
6537                 if ((entry->protection & VM_PROT_EXECUTE)
6538 #if !CONFIG_EMBEDDED
6539                     &&
6540                     map != kernel_map &&
6541                     cs_process_enforcement(NULL)
6542 #endif /* !CONFIG_EMBEDDED */
6543                         ) {
6544 #if MACH_ASSERT
6545                         printf("pid %d[%s] wiring executable range from "
6546                                "0x%llx to 0x%llx: rejected to preserve "
6547                                "code-signing\n",
6548                                proc_selfpid(),
6549                                (current_task()->bsd_info
6550                                 ? proc_name_address(current_task()->bsd_info)
6551                                 : "?"),
6552                                (uint64_t) entry->vme_start,
6553                                (uint64_t) entry->vme_end);
6554 #endif /* MACH_ASSERT */
6555                         DTRACE_VM2(cs_executable_wire,
6556                                    uint64_t, (uint64_t)entry->vme_start,
6557                                    uint64_t, (uint64_t)entry->vme_end);
6558                         cs_executable_wire++;
6559                         rc = KERN_PROTECTION_FAILURE;
6560                         goto done;
6561                 }
6562
6563                 /*
6564                  * Perform actions of vm_map_lookup that need the write
6565                  * lock on the map: create a shadow object for a
6566                  * copy-on-write region, or an object for a zero-fill
6567                  * region.
6568                  */
6569                 size = entry->vme_end - entry->vme_start;
6570                 /*
6571                  * If wiring a copy-on-write page, we need to copy it now
6572                  * even if we're only (currently) requesting read access.
6573                  * This is aggressive, but once it's wired we can't move it.
6574                  */
6575                 if (entry->needs_copy) {
6576                         if (wire_and_extract) {
6577                                 /*
6578                                  * We're supposed to share with the original
6579                                  * provider so should not be "needs_copy"
6580                                  */
6581                                 rc = KERN_INVALID_ARGUMENT;
6582                                 goto done;
6583                         }
6584
6585                         VME_OBJECT_SHADOW(entry, size);
6586                         entry->needs_copy = FALSE;
6587                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6588                         if (wire_and_extract) {
6589                                 /*
6590                                  * We're supposed to share with the original
6591                                  * provider so should already have an object.
6592                                  */
6593                                 rc = KERN_INVALID_ARGUMENT;
6594                                 goto done;
6595                         }
6596                         VME_OBJECT_SET(entry, vm_object_allocate(size));
6597                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6598                         assert(entry->use_pmap);
6599                 }
6600
6601                 vm_map_clip_start(map, entry, s);
6602                 vm_map_clip_end(map, entry, end);
6603
6604                 /* re-compute "e" */
6605                 e = entry->vme_end;
6606                 if (e > end)
6607                         e = end;
6608
6609                 /*
6610                  * Check for holes and protection mismatch.
6611                  * Holes: Next entry should be contiguous unless this
6612                  *        is the end of the region.
6613                  * Protection: Access requested must be allowed, unless
6614                  *      wiring is by protection class
6615                  */
6616                 if ((entry->vme_end < end) &&
6617                     ((entry->vme_next == vm_map_to_entry(map)) ||
6618                      (entry->vme_next->vme_start > entry->vme_end))) {
6619                         /* found a hole */
6620                         rc = KERN_INVALID_ADDRESS;
6621                         goto done;
6622                 }
6623                 if ((entry->protection & access_type) != access_type) {
6624                         /* found a protection problem */
6625                         rc = KERN_PROTECTION_FAILURE;
6626                         goto done;
6627                 }
6628
6629                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6630
6631                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6632                         goto done;
6633
6634                 entry->in_transition = TRUE;
6635
6636                 /*
6637                  * This entry might get split once we unlock the map.
6638                  * In vm_fault_wire(), we need the current range as
6639                  * defined by this entry.  In order for this to work
6640                  * along with a simultaneous clip operation, we make a
6641                  * temporary copy of this entry and use that for the
6642                  * wiring.  Note that the underlying objects do not
6643                  * change during a clip.
6644                  */
6645                 tmp_entry = *entry;
6646
6647                 /*
6648                  * The in_transition state guarentees that the entry
6649                  * (or entries for this range, if split occured) will be
6650                  * there when the map lock is acquired for the second time.
6651                  */
6652                 vm_map_unlock(map);
6653
6654                 if (!user_wire && cur_thread != THREAD_NULL)
6655                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
6656                 else
6657                         interruptible_state = THREAD_UNINT;
6658
6659                 if(map_pmap)
6660                         rc = vm_fault_wire(map,
6661                                            &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6662                                            physpage_p);
6663                 else
6664                         rc = vm_fault_wire(map,
6665                                            &tmp_entry, caller_prot, tag, map->pmap,
6666                                            tmp_entry.vme_start,
6667                                            physpage_p);
6668
6669                 if (!user_wire && cur_thread != THREAD_NULL)
6670                         thread_interrupt_level(interruptible_state);
6671
6672                 vm_map_lock(map);
6673
6674                 if (last_timestamp+1 != map->timestamp) {
6675                         /*
6676                          * Find the entry again.  It could have been clipped
6677                          * after we unlocked the map.
6678                          */
6679                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6680                                                  &first_entry))
6681                                 panic("vm_map_wire: re-lookup failed");
6682
6683                         entry = first_entry;
6684                 }
6685
6686                 last_timestamp = map->timestamp;
6687
6688                 while ((entry != vm_map_to_entry(map)) &&
6689                        (entry->vme_start < tmp_entry.vme_end)) {
6690                         assert(entry->in_transition);
6691                         entry->in_transition = FALSE;
6692                         if (entry->needs_wakeup) {
6693                                 entry->needs_wakeup = FALSE;
6694                                 need_wakeup = TRUE;
6695                         }
6696                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6697                                 subtract_wire_counts(map, entry, user_wire);
6698                         }
6699                         entry = entry->vme_next;
6700                 }
6701
6702                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
6703                         goto done;
6704                 }
6705
6706                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6707                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
6708                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6709                         /* found a "new" hole */
6710                         s = tmp_entry.vme_end;
6711                         rc = KERN_INVALID_ADDRESS;
6712                         goto done;
6713                 }
6714
6715                 s = entry->vme_start;
6716
6717         } /* end while loop through map entries */
6718
6719 done:
6720         if (rc == KERN_SUCCESS) {
6721                 /* repair any damage we may have made to the VM map */
6722                 vm_map_simplify_range(map, start, end);
6723         }
6724
6725         vm_map_unlock(map);
6726
6727         /*
6728          * wake up anybody waiting on entries we wired.
6729          */
6730         if (need_wakeup)
6731                 vm_map_entry_wakeup(map);
6732
6733         if (rc != KERN_SUCCESS) {
6734                 /* undo what has been wired so far */
6735                 vm_map_unwire_nested(map, start, s, user_wire,
6736                                      map_pmap, pmap_addr);
6737                 if (physpage_p) {
6738                         *physpage_p = 0;
6739                 }
6740         }
6741
6742         return rc;
6743
6744 }
6745
6746 kern_return_t
6747 vm_map_wire_external(
6748         vm_map_t                map,
6749         vm_map_offset_t         start,
6750         vm_map_offset_t         end,
6751         vm_prot_t               caller_prot,
6752         boolean_t               user_wire)
6753 {
6754         kern_return_t   kret;
6755
6756         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6757                                   user_wire, (pmap_t)NULL, 0, NULL);
6758         return kret;
6759 }
6760
6761 kern_return_t
6762 vm_map_wire_kernel(
6763         vm_map_t                map,
6764         vm_map_offset_t         start,
6765         vm_map_offset_t         end,
6766         vm_prot_t               caller_prot,
6767         vm_tag_t                tag,
6768         boolean_t               user_wire)
6769 {
6770         kern_return_t   kret;
6771
6772         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6773                                   user_wire, (pmap_t)NULL, 0, NULL);
6774         return kret;
6775 }
6776
6777 kern_return_t
6778 vm_map_wire_and_extract_external(
6779         vm_map_t        map,
6780         vm_map_offset_t start,
6781         vm_prot_t       caller_prot,
6782         boolean_t       user_wire,
6783         ppnum_t         *physpage_p)
6784 {
6785         kern_return_t   kret;
6786
6787         kret = vm_map_wire_nested(map,
6788                                   start,
6789                                   start+VM_MAP_PAGE_SIZE(map),
6790                                   caller_prot,
6791                                   vm_tag_bt(),
6792                                   user_wire,
6793                                   (pmap_t)NULL,
6794                                   0,
6795                                   physpage_p);
6796         if (kret != KERN_SUCCESS &&
6797             physpage_p != NULL) {
6798                 *physpage_p = 0;
6799         }
6800         return kret;
6801 }
6802
6803 kern_return_t
6804 vm_map_wire_and_extract_kernel(
6805         vm_map_t        map,
6806         vm_map_offset_t start,
6807         vm_prot_t       caller_prot,
6808         vm_tag_t        tag,
6809         boolean_t       user_wire,
6810         ppnum_t         *physpage_p)
6811 {
6812         kern_return_t   kret;
6813
6814         kret = vm_map_wire_nested(map,
6815                                   start,
6816                                   start+VM_MAP_PAGE_SIZE(map),
6817                                   caller_prot,
6818                                   tag,
6819                                   user_wire,
6820                                   (pmap_t)NULL,
6821                                   0,
6822                                   physpage_p);
6823         if (kret != KERN_SUCCESS &&
6824             physpage_p != NULL) {
6825                 *physpage_p = 0;
6826         }
6827         return kret;
6828 }
6829
6830 /*
6831  *      vm_map_unwire:
6832  *
6833  *      Sets the pageability of the specified address range in the target
6834  *      as pageable.  Regions specified must have been wired previously.
6835  *
6836  *      The map must not be locked, but a reference must remain to the map
6837  *      throughout the call.
6838  *
6839  *      Kernel will panic on failures.  User unwire ignores holes and
6840  *      unwired and intransition entries to avoid losing memory by leaving
6841  *      it unwired.
6842  */
6843 static kern_return_t
6844 vm_map_unwire_nested(
6845         vm_map_t                map,
6846         vm_map_offset_t         start,
6847         vm_map_offset_t         end,
6848         boolean_t               user_wire,
6849         pmap_t                  map_pmap,
6850         vm_map_offset_t         pmap_addr)
6851 {
6852         vm_map_entry_t          entry;
6853         struct vm_map_entry     *first_entry, tmp_entry;
6854         boolean_t               need_wakeup;
6855         boolean_t               main_map = FALSE;
6856         unsigned int            last_timestamp;
6857
6858         vm_map_lock(map);
6859         if(map_pmap == NULL)
6860                 main_map = TRUE;
6861         last_timestamp = map->timestamp;
6862
6863         VM_MAP_RANGE_CHECK(map, start, end);
6864         assert(page_aligned(start));
6865         assert(page_aligned(end));
6866         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6867         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6868
6869         if (start == end) {
6870                 /* We unwired what the caller asked for: zero pages */
6871                 vm_map_unlock(map);
6872                 return KERN_SUCCESS;
6873         }
6874
6875         if (vm_map_lookup_entry(map, start, &first_entry)) {
6876                 entry = first_entry;
6877                 /*
6878                  * vm_map_clip_start will be done later.
6879                  * We don't want to unnest any nested sub maps here !
6880                  */
6881         }
6882         else {
6883                 if (!user_wire) {
6884                         panic("vm_map_unwire: start not found");
6885                 }
6886                 /*      Start address is not in map. */
6887                 vm_map_unlock(map);
6888                 return(KERN_INVALID_ADDRESS);
6889         }
6890
6891         if (entry->superpage_size) {
6892                 /* superpages are always wired */
6893                 vm_map_unlock(map);
6894                 return KERN_INVALID_ADDRESS;
6895         }
6896
6897         need_wakeup = FALSE;
6898         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6899                 if (entry->in_transition) {
6900                         /*
6901                          * 1)
6902                          * Another thread is wiring down this entry. Note
6903                          * that if it is not for the other thread we would
6904                          * be unwiring an unwired entry.  This is not
6905                          * permitted.  If we wait, we will be unwiring memory
6906                          * we did not wire.
6907                          *
6908                          * 2)
6909                          * Another thread is unwiring this entry.  We did not
6910                          * have a reference to it, because if we did, this
6911                          * entry will not be getting unwired now.
6912                          */
6913                         if (!user_wire) {
6914                                 /*
6915                                  * XXX FBDP
6916                                  * This could happen:  there could be some
6917                                  * overlapping vslock/vsunlock operations
6918                                  * going on.
6919                                  * We should probably just wait and retry,
6920                                  * but then we have to be careful that this
6921                                  * entry could get "simplified" after
6922                                  * "in_transition" gets unset and before
6923                                  * we re-lookup the entry, so we would
6924                                  * have to re-clip the entry to avoid
6925                                  * re-unwiring what we have already unwired...
6926                                  * See vm_map_wire_nested().
6927                                  *
6928                                  * Or we could just ignore "in_transition"
6929                                  * here and proceed to decement the wired
6930                                  * count(s) on this entry.  That should be fine
6931                                  * as long as "wired_count" doesn't drop all
6932                                  * the way to 0 (and we should panic if THAT
6933                                  * happens).
6934                                  */
6935                                 panic("vm_map_unwire: in_transition entry");
6936                         }
6937
6938                         entry = entry->vme_next;
6939                         continue;
6940                 }
6941
6942                 if (entry->is_sub_map) {
6943                         vm_map_offset_t sub_start;
6944                         vm_map_offset_t sub_end;
6945                         vm_map_offset_t local_end;
6946                         pmap_t          pmap;
6947
6948                         vm_map_clip_start(map, entry, start);
6949                         vm_map_clip_end(map, entry, end);
6950
6951                         sub_start = VME_OFFSET(entry);
6952                         sub_end = entry->vme_end - entry->vme_start;
6953                         sub_end += VME_OFFSET(entry);
6954                         local_end = entry->vme_end;
6955                         if(map_pmap == NULL) {
6956                                 if(entry->use_pmap) {
6957                                         pmap = VME_SUBMAP(entry)->pmap;
6958                                         pmap_addr = sub_start;
6959                                 } else {
6960                                         pmap = map->pmap;
6961                                         pmap_addr = start;
6962                                 }
6963                                 if (entry->wired_count == 0 ||
6964                                     (user_wire && entry->user_wired_count == 0)) {
6965                                         if (!user_wire)
6966                                                 panic("vm_map_unwire: entry is unwired");
6967                                         entry = entry->vme_next;
6968                                         continue;
6969                                 }
6970
6971                                 /*
6972                                  * Check for holes
6973                                  * Holes: Next entry should be contiguous unless
6974                                  * this is the end of the region.
6975                                  */
6976                                 if (((entry->vme_end < end) &&
6977                                      ((entry->vme_next == vm_map_to_entry(map)) ||
6978                                       (entry->vme_next->vme_start
6979                                        > entry->vme_end)))) {
6980                                         if (!user_wire)
6981                                                 panic("vm_map_unwire: non-contiguous region");
6982 /*
6983                                         entry = entry->vme_next;
6984                                         continue;
6985 */
6986                                 }
6987
6988                                 subtract_wire_counts(map, entry, user_wire);
6989
6990                                 if (entry->wired_count != 0) {
6991                                         entry = entry->vme_next;
6992                                         continue;
6993                                 }
6994
6995                                 entry->in_transition = TRUE;
6996                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
6997
6998                                 /*
6999                                  * We can unlock the map now. The in_transition state
7000                                  * guarantees existance of the entry.
7001                                  */
7002                                 vm_map_unlock(map);
7003                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7004                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
7005                                 vm_map_lock(map);
7006
7007                                 if (last_timestamp+1 != map->timestamp) {
7008                                         /*
7009                                          * Find the entry again.  It could have been
7010                                          * clipped or deleted after we unlocked the map.
7011                                          */
7012                                         if (!vm_map_lookup_entry(map,
7013                                                                  tmp_entry.vme_start,
7014                                                                  &first_entry)) {
7015                                                 if (!user_wire)
7016                                                         panic("vm_map_unwire: re-lookup failed");
7017                                                 entry = first_entry->vme_next;
7018                                         } else
7019                                                 entry = first_entry;
7020                                 }
7021                                 last_timestamp = map->timestamp;
7022
7023                                 /*
7024                                  * clear transition bit for all constituent entries
7025                                  * that were in the original entry (saved in
7026                                  * tmp_entry).  Also check for waiters.
7027                                  */
7028                                 while ((entry != vm_map_to_entry(map)) &&
7029                                        (entry->vme_start < tmp_entry.vme_end)) {
7030                                         assert(entry->in_transition);
7031                                         entry->in_transition = FALSE;
7032                                         if (entry->needs_wakeup) {
7033                                                 entry->needs_wakeup = FALSE;
7034                                                 need_wakeup = TRUE;
7035                                         }
7036                                         entry = entry->vme_next;
7037                                 }
7038                                 continue;
7039                         } else {
7040                                 vm_map_unlock(map);
7041                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7042                                                      sub_start, sub_end, user_wire, map_pmap,
7043                                                      pmap_addr);
7044                                 vm_map_lock(map);
7045
7046                                 if (last_timestamp+1 != map->timestamp) {
7047                                         /*
7048                                          * Find the entry again.  It could have been
7049                                          * clipped or deleted after we unlocked the map.
7050                                          */
7051                                         if (!vm_map_lookup_entry(map,
7052                                                                  tmp_entry.vme_start,
7053                                                                  &first_entry)) {
7054                                                 if (!user_wire)
7055                                                         panic("vm_map_unwire: re-lookup failed");
7056                                                 entry = first_entry->vme_next;
7057                                         } else
7058                                                 entry = first_entry;
7059                                 }
7060                                 last_timestamp = map->timestamp;
7061                         }
7062                 }
7063
7064
7065                 if ((entry->wired_count == 0) ||
7066                     (user_wire && entry->user_wired_count == 0)) {
7067                         if (!user_wire)
7068                                 panic("vm_map_unwire: entry is unwired");
7069
7070                         entry = entry->vme_next;
7071                         continue;
7072                 }
7073
7074                 assert(entry->wired_count > 0 &&
7075                        (!user_wire || entry->user_wired_count > 0));
7076
7077                 vm_map_clip_start(map, entry, start);
7078                 vm_map_clip_end(map, entry, end);
7079
7080                 /*
7081                  * Check for holes
7082                  * Holes: Next entry should be contiguous unless
7083                  *        this is the end of the region.
7084                  */
7085                 if (((entry->vme_end < end) &&
7086                      ((entry->vme_next == vm_map_to_entry(map)) ||
7087                       (entry->vme_next->vme_start > entry->vme_end)))) {
7088
7089                         if (!user_wire)
7090                                 panic("vm_map_unwire: non-contiguous region");
7091                         entry = entry->vme_next;
7092                         continue;
7093                 }
7094
7095                 subtract_wire_counts(map, entry, user_wire);
7096
7097                 if (entry->wired_count != 0) {
7098                         entry = entry->vme_next;
7099                         continue;
7100                 }
7101
7102                 if(entry->zero_wired_pages) {
7103                         entry->zero_wired_pages = FALSE;
7104                 }
7105
7106                 entry->in_transition = TRUE;
7107                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
7108
7109                 /*
7110                  * We can unlock the map now. The in_transition state
7111                  * guarantees existance of the entry.
7112                  */
7113                 vm_map_unlock(map);
7114                 if(map_pmap) {
7115                         vm_fault_unwire(map,
7116                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
7117                 } else {
7118                         vm_fault_unwire(map,
7119                                         &tmp_entry, FALSE, map->pmap,
7120                                         tmp_entry.vme_start);
7121                 }
7122                 vm_map_lock(map);
7123
7124                 if (last_timestamp+1 != map->timestamp) {
7125                         /*
7126                          * Find the entry again.  It could have been clipped
7127                          * or deleted after we unlocked the map.
7128                          */
7129                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7130                                                  &first_entry)) {
7131                                 if (!user_wire)
7132                                         panic("vm_map_unwire: re-lookup failed");
7133                                 entry = first_entry->vme_next;
7134                         } else
7135                                 entry = first_entry;
7136                 }
7137                 last_timestamp = map->timestamp;
7138
7139                 /*
7140                  * clear transition bit for all constituent entries that
7141                  * were in the original entry (saved in tmp_entry).  Also
7142                  * check for waiters.
7143                  */
7144                 while ((entry != vm_map_to_entry(map)) &&
7145                        (entry->vme_start < tmp_entry.vme_end)) {
7146                         assert(entry->in_transition);
7147                         entry->in_transition = FALSE;
7148                         if (entry->needs_wakeup) {
7149                                 entry->needs_wakeup = FALSE;
7150                                 need_wakeup = TRUE;
7151                         }
7152                         entry = entry->vme_next;
7153                 }
7154         }
7155
7156         /*
7157          * We might have fragmented the address space when we wired this
7158          * range of addresses.  Attempt to re-coalesce these VM map entries
7159          * with their neighbors now that they're no longer wired.
7160          * Under some circumstances, address space fragmentation can
7161          * prevent VM object shadow chain collapsing, which can cause
7162          * swap space leaks.
7163          */
7164         vm_map_simplify_range(map, start, end);
7165
7166         vm_map_unlock(map);
7167         /*
7168          * wake up anybody waiting on entries that we have unwired.
7169          */
7170         if (need_wakeup)
7171                 vm_map_entry_wakeup(map);
7172         return(KERN_SUCCESS);
7173
7174 }
7175
7176 kern_return_t
7177 vm_map_unwire(
7178         vm_map_t                map,
7179         vm_map_offset_t         start,
7180         vm_map_offset_t         end,
7181         boolean_t               user_wire)
7182 {
7183         return vm_map_unwire_nested(map, start, end,
7184                                     user_wire, (pmap_t)NULL, 0);
7185 }
7186
7187
7188 /*
7189  *      vm_map_entry_delete:    [ internal use only ]
7190  *
7191  *      Deallocate the given entry from the target map.
7192  */
7193 static void
7194 vm_map_entry_delete(
7195         vm_map_t        map,
7196         vm_map_entry_t  entry)
7197 {
7198         vm_map_offset_t s, e;
7199         vm_object_t     object;
7200         vm_map_t        submap;
7201
7202         s = entry->vme_start;
7203         e = entry->vme_end;
7204         assert(page_aligned(s));
7205         assert(page_aligned(e));
7206         if (entry->map_aligned == TRUE) {
7207                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7208                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7209         }
7210         assert(entry->wired_count == 0);
7211         assert(entry->user_wired_count == 0);
7212         assert(!entry->permanent);
7213
7214         if (entry->is_sub_map) {
7215                 object = NULL;
7216                 submap = VME_SUBMAP(entry);
7217         } else {
7218                 submap = NULL;
7219                 object = VME_OBJECT(entry);
7220         }
7221
7222         vm_map_store_entry_unlink(map, entry);
7223         map->size -= e - s;
7224
7225         vm_map_entry_dispose(map, entry);
7226
7227         vm_map_unlock(map);
7228         /*
7229          *      Deallocate the object only after removing all
7230          *      pmap entries pointing to its pages.
7231          */
7232         if (submap)
7233                 vm_map_deallocate(submap);
7234         else
7235                 vm_object_deallocate(object);
7236
7237 }
7238
7239 void
7240 vm_map_submap_pmap_clean(
7241         vm_map_t        map,
7242         vm_map_offset_t start,
7243         vm_map_offset_t end,
7244         vm_map_t        sub_map,
7245         vm_map_offset_t offset)
7246 {
7247         vm_map_offset_t submap_start;
7248         vm_map_offset_t submap_end;
7249         vm_map_size_t   remove_size;
7250         vm_map_entry_t  entry;
7251
7252         submap_end = offset + (end - start);
7253         submap_start = offset;
7254
7255         vm_map_lock_read(sub_map);
7256         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
7257
7258                 remove_size = (entry->vme_end - entry->vme_start);
7259                 if(offset > entry->vme_start)
7260                         remove_size -= offset - entry->vme_start;
7261
7262
7263                 if(submap_end < entry->vme_end) {
7264                         remove_size -=
7265                                 entry->vme_end - submap_end;
7266                 }
7267                 if(entry->is_sub_map) {
7268                         vm_map_submap_pmap_clean(
7269                                 sub_map,
7270                                 start,
7271                                 start + remove_size,
7272                                 VME_SUBMAP(entry),
7273                                 VME_OFFSET(entry));
7274                 } else {
7275
7276                         if((map->mapped_in_other_pmaps) && (map->map_refcnt)
7277                            && (VME_OBJECT(entry) != NULL)) {
7278                                 vm_object_pmap_protect_options(
7279                                         VME_OBJECT(entry),
7280                                         (VME_OFFSET(entry) +
7281                                          offset -
7282                                          entry->vme_start),
7283                                         remove_size,
7284                                         PMAP_NULL,
7285                                         entry->vme_start,
7286                                         VM_PROT_NONE,
7287                                         PMAP_OPTIONS_REMOVE);
7288                         } else {
7289                                 pmap_remove(map->pmap,
7290                                             (addr64_t)start,
7291                                             (addr64_t)(start + remove_size));
7292                         }
7293                 }
7294         }
7295
7296         entry = entry->vme_next;
7297
7298         while((entry != vm_map_to_entry(sub_map))
7299               && (entry->vme_start < submap_end)) {
7300                 remove_size = (entry->vme_end - entry->vme_start);
7301                 if(submap_end < entry->vme_end) {
7302                         remove_size -= entry->vme_end - submap_end;
7303                 }
7304                 if(entry->is_sub_map) {
7305                         vm_map_submap_pmap_clean(
7306                                 sub_map,
7307                                 (start + entry->vme_start) - offset,
7308                                 ((start + entry->vme_start) - offset) + remove_size,
7309                                 VME_SUBMAP(entry),
7310                                 VME_OFFSET(entry));
7311                 } else {
7312                         if((map->mapped_in_other_pmaps) && (map->map_refcnt)
7313                            && (VME_OBJECT(entry) != NULL)) {
7314                                 vm_object_pmap_protect_options(
7315                                         VME_OBJECT(entry),
7316                                         VME_OFFSET(entry),
7317                                         remove_size,
7318                                         PMAP_NULL,
7319                                         entry->vme_start,
7320                                         VM_PROT_NONE,
7321                                         PMAP_OPTIONS_REMOVE);
7322                         } else {
7323                                 pmap_remove(map->pmap,
7324                                             (addr64_t)((start + entry->vme_start)
7325                                                        - offset),
7326                                             (addr64_t)(((start + entry->vme_start)
7327                                                         - offset) + remove_size));
7328                         }
7329                 }
7330                 entry = entry->vme_next;
7331         }
7332         vm_map_unlock_read(sub_map);
7333         return;
7334 }
7335
7336 /*
7337  *     virt_memory_guard_ast:
7338  *
7339  *     Handle the AST callout for a virtual memory guard.
7340  *         raise an EXC_GUARD exception and terminate the task
7341  *     if configured to do so.
7342  */
7343 void
7344 virt_memory_guard_ast(
7345         thread_t thread,
7346         mach_exception_data_type_t code,
7347         mach_exception_data_type_t subcode)
7348 {
7349         task_t task = thread->task;
7350         assert(task != kernel_task);
7351         assert(task == current_task());
7352         uint32_t behavior;
7353
7354         behavior = task->task_exc_guard;
7355
7356         /* Is delivery enabled */
7357         if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7358                 return;
7359         }
7360
7361         /* If only once, make sure we're that once */
7362         while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7363                 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7364
7365                 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7366                         break;
7367                 }
7368                 behavior = task->task_exc_guard;
7369                 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7370                         return;
7371                 }
7372         }
7373
7374         /* Raise exception via corpse fork or synchronously */
7375         if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7376             (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7377                 task_violated_guard(code, subcode, NULL);
7378         } else {
7379                 task_exception_notify(EXC_GUARD, code, subcode);
7380         }
7381
7382         /* Terminate the task if desired */
7383         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7384                 task_bsdtask_kill(current_task());
7385         }
7386 }
7387
7388 /*
7389  *     vm_map_guard_exception:
7390  *
7391  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7392  *
7393  *     Right now, we do this when we find nothing mapped, or a
7394  *     gap in the mapping when a user address space deallocate
7395  *     was requested. We report the address of the first gap found.
7396  */
7397 static void
7398 vm_map_guard_exception(
7399         vm_map_offset_t gap_start,
7400         unsigned reason)
7401 {
7402         mach_exception_code_t code = 0;
7403         unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7404         unsigned int target = 0; /* should we pass in pid associated with map? */
7405         mach_exception_data_type_t subcode = (uint64_t)gap_start;
7406
7407         /* Can't deliver exceptions to kernel task */
7408         if (current_task() == kernel_task)
7409                 return;
7410
7411         EXC_GUARD_ENCODE_TYPE(code, guard_type);
7412         EXC_GUARD_ENCODE_FLAVOR(code, reason);
7413         EXC_GUARD_ENCODE_TARGET(code, target);
7414         thread_guard_violation(current_thread(), code, subcode);
7415 }
7416
7417 /*
7418  *      vm_map_delete:  [ internal use only ]
7419  *
7420  *      Deallocates the given address range from the target map.
7421  *      Removes all user wirings. Unwires one kernel wiring if
7422  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7423  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7424  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7425  *
7426  *      This routine is called with map locked and leaves map locked.
7427  */
7428 static kern_return_t
7429 vm_map_delete(
7430         vm_map_t                map,
7431         vm_map_offset_t         start,
7432         vm_map_offset_t         end,
7433         int                     flags,
7434         vm_map_t                zap_map)
7435 {
7436         vm_map_entry_t          entry, next;
7437         struct   vm_map_entry   *first_entry, tmp_entry;
7438         vm_map_offset_t         s;
7439         vm_object_t             object;
7440         boolean_t               need_wakeup;
7441         unsigned int            last_timestamp = ~0; /* unlikely value */
7442         int                     interruptible;
7443         vm_map_offset_t         gap_start;
7444         vm_map_offset_t         save_start = start;
7445         vm_map_offset_t         save_end = end;
7446         const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7447         const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7448
7449         if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK))
7450                 gap_start = FIND_GAP;
7451         else
7452                 gap_start = GAPS_OK;
7453
7454         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7455                 THREAD_ABORTSAFE : THREAD_UNINT;
7456
7457         /*
7458          * All our DMA I/O operations in IOKit are currently done by
7459          * wiring through the map entries of the task requesting the I/O.
7460          * Because of this, we must always wait for kernel wirings
7461          * to go away on the entries before deleting them.
7462          *
7463          * Any caller who wants to actually remove a kernel wiring
7464          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7465          * properly remove one wiring instead of blasting through
7466          * them all.
7467          */
7468         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7469
7470         while(1) {
7471                 /*
7472                  *      Find the start of the region, and clip it
7473                  */
7474                 if (vm_map_lookup_entry(map, start, &first_entry)) {
7475                         entry = first_entry;
7476                         if (map == kalloc_map &&
7477                             (entry->vme_start != start ||
7478                              entry->vme_end != end)) {
7479                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7480                                       "mismatched entry %p [0x%llx:0x%llx]\n",
7481                                       map,
7482                                       (uint64_t)start,
7483                                       (uint64_t)end,
7484                                       entry,
7485                                       (uint64_t)entry->vme_start,
7486                                       (uint64_t)entry->vme_end);
7487                         }
7488
7489                         /*
7490                          * If in a superpage, extend the range to include the start of the mapping.
7491                          */
7492                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7493                                 start = SUPERPAGE_ROUND_DOWN(start);
7494                                 continue;
7495                         }
7496
7497                         if (start == entry->vme_start) {
7498                                 /*
7499                                  * No need to clip.  We don't want to cause
7500                                  * any unnecessary unnesting in this case...
7501                                  */
7502                         } else {
7503                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7504                                     entry->map_aligned &&
7505                                     !VM_MAP_PAGE_ALIGNED(
7506                                             start,
7507                                             VM_MAP_PAGE_MASK(map))) {
7508                                         /*
7509                                          * The entry will no longer be
7510                                          * map-aligned after clipping
7511                                          * and the caller said it's OK.
7512                                          */
7513                                         entry->map_aligned = FALSE;
7514                                 }
7515                                 if (map == kalloc_map) {
7516                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
7517                                               " clipping %p at 0x%llx\n",
7518                                               map,
7519                                               (uint64_t)start,
7520                                               (uint64_t)end,
7521                                               entry,
7522                                               (uint64_t)start);
7523                                 }
7524                                 vm_map_clip_start(map, entry, start);
7525                         }
7526
7527                         /*
7528                          *      Fix the lookup hint now, rather than each
7529                          *      time through the loop.
7530                          */
7531                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7532
7533                 } else {
7534
7535                         if (map->pmap == kernel_pmap &&
7536                             map->map_refcnt != 0) {
7537                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7538                                       "no map entry at 0x%llx\n",
7539                                       map,
7540                                       (uint64_t)start,
7541                                       (uint64_t)end,
7542                                       (uint64_t)start);
7543                         }
7544                         entry = first_entry->vme_next;
7545                         if (gap_start == FIND_GAP)
7546                                 gap_start = start;
7547                 }
7548                 break;
7549         }
7550         if (entry->superpage_size)
7551                 end = SUPERPAGE_ROUND_UP(end);
7552
7553         need_wakeup = FALSE;
7554         /*
7555          *      Step through all entries in this region
7556          */
7557         s = entry->vme_start;
7558         while ((entry != vm_map_to_entry(map)) && (s < end)) {
7559                 /*
7560                  * At this point, we have deleted all the memory entries
7561                  * between "start" and "s".  We still need to delete
7562                  * all memory entries between "s" and "end".
7563                  * While we were blocked and the map was unlocked, some
7564                  * new memory entries could have been re-allocated between
7565                  * "start" and "s" and we don't want to mess with those.
7566                  * Some of those entries could even have been re-assembled
7567                  * with an entry after "s" (in vm_map_simplify_entry()), so
7568                  * we may have to vm_map_clip_start() again.
7569                  */
7570
7571                 if (entry->vme_start >= s) {
7572                         /*
7573                          * This entry starts on or after "s"
7574                          * so no need to clip its start.
7575                          */
7576                 } else {
7577                         /*
7578                          * This entry has been re-assembled by a
7579                          * vm_map_simplify_entry().  We need to
7580                          * re-clip its start.
7581                          */
7582                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7583                             entry->map_aligned &&
7584                             !VM_MAP_PAGE_ALIGNED(s,
7585                                                  VM_MAP_PAGE_MASK(map))) {
7586                                 /*
7587                                  * The entry will no longer be map-aligned
7588                                  * after clipping and the caller said it's OK.
7589                                  */
7590                                 entry->map_aligned = FALSE;
7591                         }
7592                         if (map == kalloc_map) {
7593                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7594                                       "clipping %p at 0x%llx\n",
7595                                       map,
7596                                       (uint64_t)start,
7597                                       (uint64_t)end,
7598                                       entry,
7599                                       (uint64_t)s);
7600                         }
7601                         vm_map_clip_start(map, entry, s);
7602                 }
7603                 if (entry->vme_end <= end) {
7604                         /*
7605                          * This entry is going away completely, so no need
7606                          * to clip and possibly cause an unnecessary unnesting.
7607                          */
7608                 } else {
7609                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7610                             entry->map_aligned &&
7611                             !VM_MAP_PAGE_ALIGNED(end,
7612                                                  VM_MAP_PAGE_MASK(map))) {
7613                                 /*
7614                                  * The entry will no longer be map-aligned
7615                                  * after clipping and the caller said it's OK.
7616                                  */
7617                                 entry->map_aligned = FALSE;
7618                         }
7619                         if (map == kalloc_map) {
7620                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7621                                       "clipping %p at 0x%llx\n",
7622                                       map,
7623                                       (uint64_t)start,
7624                                       (uint64_t)end,
7625                                       entry,
7626                                       (uint64_t)end);
7627                         }
7628                         vm_map_clip_end(map, entry, end);
7629                 }
7630
7631                 if (entry->permanent) {
7632                         if (map->pmap == kernel_pmap) {
7633                                 panic("%s(%p,0x%llx,0x%llx): "
7634                                       "attempt to remove permanent "
7635                                       "VM map entry "
7636                                       "%p [0x%llx:0x%llx]\n",
7637                                       __FUNCTION__,
7638                                       map,
7639                                       (uint64_t) start,
7640                                       (uint64_t) end,
7641                                       entry,
7642                                       (uint64_t) entry->vme_start,
7643                                       (uint64_t) entry->vme_end);
7644                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7645 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7646                                 entry->permanent = FALSE;
7647 #if PMAP_CS
7648                         } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7649                                 entry->permanent = FALSE;
7650
7651                                 printf("%d[%s] %s(0x%llx,0x%llx): "
7652                                            "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7653                                            "prot 0x%x/0x%x\n",
7654                                            proc_selfpid(),
7655                                            (current_task()->bsd_info
7656                                                 ? proc_name_address(current_task()->bsd_info)
7657                                                 : "?"),
7658                                            __FUNCTION__,
7659                                            (uint64_t) start,
7660                                            (uint64_t) end,
7661                                            (uint64_t)entry->vme_start,
7662                                            (uint64_t)entry->vme_end,
7663                                            entry->protection,
7664                                            entry->max_protection);
7665 #endif
7666                         } else {
7667                                 if (vm_map_executable_immutable_verbose) {
7668                                         printf("%d[%s] %s(0x%llx,0x%llx): "
7669                                                    "permanent entry [0x%llx:0x%llx] "
7670                                                    "prot 0x%x/0x%x\n",
7671                                                    proc_selfpid(),
7672                                                    (current_task()->bsd_info
7673                                                         ? proc_name_address(current_task()->bsd_info)
7674                                                         : "?"),
7675                                                    __FUNCTION__,
7676                                                    (uint64_t) start,
7677                                                    (uint64_t) end,
7678                                                    (uint64_t)entry->vme_start,
7679                                                    (uint64_t)entry->vme_end,
7680                                                    entry->protection,
7681                                                    entry->max_protection);
7682                                 }
7683                                 /*
7684                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7685                                  */
7686                                 DTRACE_VM5(vm_map_delete_permanent,
7687                                            vm_map_offset_t, entry->vme_start,
7688                                            vm_map_offset_t, entry->vme_end,
7689                                            vm_prot_t, entry->protection,
7690                                            vm_prot_t, entry->max_protection,
7691                                            int, VME_ALIAS(entry));
7692                         }
7693                 }
7694
7695
7696                 if (entry->in_transition) {
7697                         wait_result_t wait_result;
7698
7699                         /*
7700                          * Another thread is wiring/unwiring this entry.
7701                          * Let the other thread know we are waiting.
7702                          */
7703                         assert(s == entry->vme_start);
7704                         entry->needs_wakeup = TRUE;
7705
7706                         /*
7707                          * wake up anybody waiting on entries that we have
7708                          * already unwired/deleted.
7709                          */
7710                         if (need_wakeup) {
7711                                 vm_map_entry_wakeup(map);
7712                                 need_wakeup = FALSE;
7713                         }
7714
7715                         wait_result = vm_map_entry_wait(map, interruptible);
7716
7717                         if (interruptible &&
7718                             wait_result == THREAD_INTERRUPTED) {
7719                                 /*
7720                                  * We do not clear the needs_wakeup flag,
7721                                  * since we cannot tell if we were the only one.
7722                                  */
7723                                 return KERN_ABORTED;
7724                         }
7725
7726                         /*
7727                          * The entry could have been clipped or it
7728                          * may not exist anymore.  Look it up again.
7729                          */
7730                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
7731                                 /*
7732                                  * User: use the next entry
7733                                  */
7734                                 if (gap_start == FIND_GAP)
7735                                         gap_start = s;
7736                                 entry = first_entry->vme_next;
7737                                 s = entry->vme_start;
7738                         } else {
7739                                 entry = first_entry;
7740                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7741                         }
7742                         last_timestamp = map->timestamp;
7743                         continue;
7744                 } /* end in_transition */
7745
7746                 if (entry->wired_count) {
7747                         boolean_t       user_wire;
7748
7749                         user_wire = entry->user_wired_count > 0;
7750
7751                         /*
7752                          *      Remove a kernel wiring if requested
7753                          */
7754                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
7755                                 entry->wired_count--;
7756                         }
7757
7758                         /*
7759                          *      Remove all user wirings for proper accounting
7760                          */
7761                         if (entry->user_wired_count > 0) {
7762                                 while (entry->user_wired_count)
7763                                         subtract_wire_counts(map, entry, user_wire);
7764                         }
7765
7766                         if (entry->wired_count != 0) {
7767                                 assert(map != kernel_map);
7768                                 /*
7769                                  * Cannot continue.  Typical case is when
7770                                  * a user thread has physical io pending on
7771                                  * on this page.  Either wait for the
7772                                  * kernel wiring to go away or return an
7773                                  * error.
7774                                  */
7775                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7776                                         wait_result_t wait_result;
7777
7778                                         assert(s == entry->vme_start);
7779                                         entry->needs_wakeup = TRUE;
7780                                         wait_result = vm_map_entry_wait(map,
7781                                                                         interruptible);
7782
7783                                         if (interruptible &&
7784                                             wait_result == THREAD_INTERRUPTED) {
7785                                                 /*
7786                                                  * We do not clear the
7787                                                  * needs_wakeup flag, since we
7788                                                  * cannot tell if we were the
7789                                                  * only one.
7790                                                  */
7791                                                 return KERN_ABORTED;
7792                                         }
7793
7794                                         /*
7795                                          * The entry could have been clipped or
7796                                          * it may not exist anymore.  Look it
7797                                          * up again.
7798                                          */
7799                                         if (!vm_map_lookup_entry(map, s,
7800                                                                  &first_entry)) {
7801                                                 assert(map != kernel_map);
7802                                                 /*
7803                                                  * User: use the next entry
7804                                                  */
7805                                                 if (gap_start == FIND_GAP)
7806                                                         gap_start = s;
7807                                                 entry = first_entry->vme_next;
7808                                                 s = entry->vme_start;
7809                                         } else {
7810                                                 entry = first_entry;
7811                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7812                                         }
7813                                         last_timestamp = map->timestamp;
7814                                         continue;
7815                                 }
7816                                 else {
7817                                         return KERN_FAILURE;
7818                                 }
7819                         }
7820
7821                         entry->in_transition = TRUE;
7822                         /*
7823                          * copy current entry.  see comment in vm_map_wire()
7824                          */
7825                         tmp_entry = *entry;
7826                         assert(s == entry->vme_start);
7827
7828                         /*
7829                          * We can unlock the map now. The in_transition
7830                          * state guarentees existance of the entry.
7831                          */
7832                         vm_map_unlock(map);
7833
7834                         if (tmp_entry.is_sub_map) {
7835                                 vm_map_t sub_map;
7836                                 vm_map_offset_t sub_start, sub_end;
7837                                 pmap_t pmap;
7838                                 vm_map_offset_t pmap_addr;
7839
7840
7841                                 sub_map = VME_SUBMAP(&tmp_entry);
7842                                 sub_start = VME_OFFSET(&tmp_entry);
7843                                 sub_end = sub_start + (tmp_entry.vme_end -
7844                                                        tmp_entry.vme_start);
7845                                 if (tmp_entry.use_pmap) {
7846                                         pmap = sub_map->pmap;
7847                                         pmap_addr = tmp_entry.vme_start;
7848                                 } else {
7849                                         pmap = map->pmap;
7850                                         pmap_addr = tmp_entry.vme_start;
7851                                 }
7852                                 (void) vm_map_unwire_nested(sub_map,
7853                                                             sub_start, sub_end,
7854                                                             user_wire,
7855                                                             pmap, pmap_addr);
7856                         } else {
7857
7858                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
7859                                         pmap_protect_options(
7860                                                 map->pmap,
7861                                                 tmp_entry.vme_start,
7862                                                 tmp_entry.vme_end,
7863                                                 VM_PROT_NONE,
7864                                                 PMAP_OPTIONS_REMOVE,
7865                                                 NULL);
7866                                 }
7867                                 vm_fault_unwire(map, &tmp_entry,
7868                                                 VME_OBJECT(&tmp_entry) == kernel_object,
7869                                                 map->pmap, tmp_entry.vme_start);
7870                         }
7871
7872                         vm_map_lock(map);
7873
7874                         if (last_timestamp+1 != map->timestamp) {
7875                                 /*
7876                                  * Find the entry again.  It could have
7877                                  * been clipped after we unlocked the map.
7878                                  */
7879                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
7880                                         assert((map != kernel_map) &&
7881                                                (!entry->is_sub_map));
7882                                         if (gap_start == FIND_GAP)
7883                                                 gap_start = s;
7884                                         first_entry = first_entry->vme_next;
7885                                         s = first_entry->vme_start;
7886                                 } else {
7887                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7888                                 }
7889                         } else {
7890                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7891                                 first_entry = entry;
7892                         }
7893
7894                         last_timestamp = map->timestamp;
7895
7896                         entry = first_entry;
7897                         while ((entry != vm_map_to_entry(map)) &&
7898                                (entry->vme_start < tmp_entry.vme_end)) {
7899                                 assert(entry->in_transition);
7900                                 entry->in_transition = FALSE;
7901                                 if (entry->needs_wakeup) {
7902                                         entry->needs_wakeup = FALSE;
7903                                         need_wakeup = TRUE;
7904                                 }
7905                                 entry = entry->vme_next;
7906                         }
7907                         /*
7908                          * We have unwired the entry(s).  Go back and
7909                          * delete them.
7910                          */
7911                         entry = first_entry;
7912                         continue;
7913                 }
7914
7915                 /* entry is unwired */
7916                 assert(entry->wired_count == 0);
7917                 assert(entry->user_wired_count == 0);
7918
7919                 assert(s == entry->vme_start);
7920
7921                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
7922                         /*
7923                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7924                          * vm_map_delete(), some map entries might have been
7925                          * transferred to a "zap_map", which doesn't have a
7926                          * pmap.  The original pmap has already been flushed
7927                          * in the vm_map_delete() call targeting the original
7928                          * map, but when we get to destroying the "zap_map",
7929                          * we don't have any pmap to flush, so let's just skip
7930                          * all this.
7931                          */
7932                 } else if (entry->is_sub_map) {
7933                         if (entry->use_pmap) {
7934 #ifndef NO_NESTED_PMAP
7935                                 int pmap_flags;
7936
7937                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
7938                                         /*
7939                                          * This is the final cleanup of the
7940                                          * address space being terminated.
7941                                          * No new mappings are expected and
7942                                          * we don't really need to unnest the
7943                                          * shared region (and lose the "global"
7944                                          * pmap mappings, if applicable).
7945                                          *
7946                                          * Tell the pmap layer that we're
7947                                          * "clean" wrt nesting.
7948                                          */
7949                                         pmap_flags = PMAP_UNNEST_CLEAN;
7950                                 } else {
7951                                         /*
7952                                          * We're unmapping part of the nested
7953                                          * shared region, so we can't keep the
7954                                          * nested pmap.
7955                                          */
7956                                         pmap_flags = 0;
7957                                 }
7958                                 pmap_unnest_options(
7959                                         map->pmap,
7960                                         (addr64_t)entry->vme_start,
7961                                         entry->vme_end - entry->vme_start,
7962                                         pmap_flags);
7963 #endif  /* NO_NESTED_PMAP */
7964                                 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
7965                                         /* clean up parent map/maps */
7966                                         vm_map_submap_pmap_clean(
7967                                                 map, entry->vme_start,
7968                                                 entry->vme_end,
7969                                                 VME_SUBMAP(entry),
7970                                                 VME_OFFSET(entry));
7971                                 }
7972                         } else {
7973                                 vm_map_submap_pmap_clean(
7974                                         map, entry->vme_start, entry->vme_end,
7975                                         VME_SUBMAP(entry),
7976                                         VME_OFFSET(entry));
7977                         }
7978                 } else if (VME_OBJECT(entry) != kernel_object &&
7979                            VME_OBJECT(entry) != compressor_object) {
7980                         object = VME_OBJECT(entry);
7981                         if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
7982                                 vm_object_pmap_protect_options(
7983                                         object, VME_OFFSET(entry),
7984                                         entry->vme_end - entry->vme_start,
7985                                         PMAP_NULL,
7986                                         entry->vme_start,
7987                                         VM_PROT_NONE,
7988                                         PMAP_OPTIONS_REMOVE);
7989                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
7990                                    (map->pmap == kernel_pmap)) {
7991                                 /* Remove translations associated
7992                                  * with this range unless the entry
7993                                  * does not have an object, or
7994                                  * it's the kernel map or a descendant
7995                                  * since the platform could potentially
7996                                  * create "backdoor" mappings invisible
7997                                  * to the VM. It is expected that
7998                                  * objectless, non-kernel ranges
7999                                  * do not have such VM invisible
8000                                  * translations.
8001                                  */
8002                                 pmap_remove_options(map->pmap,
8003                                                     (addr64_t)entry->vme_start,
8004                                                     (addr64_t)entry->vme_end,
8005                                                     PMAP_OPTIONS_REMOVE);
8006                         }
8007                 }
8008
8009                 if (entry->iokit_acct) {
8010                         /* alternate accounting */
8011                         DTRACE_VM4(vm_map_iokit_unmapped_region,
8012                                    vm_map_t, map,
8013                                    vm_map_offset_t, entry->vme_start,
8014                                    vm_map_offset_t, entry->vme_end,
8015                                    int, VME_ALIAS(entry));
8016                         vm_map_iokit_unmapped_region(map,
8017                                                      (entry->vme_end -
8018                                                       entry->vme_start));
8019                         entry->iokit_acct = FALSE;
8020                         entry->use_pmap = FALSE;
8021                 }
8022
8023                 /*
8024                  * All pmap mappings for this map entry must have been
8025                  * cleared by now.
8026                  */
8027 #if DEBUG
8028                 assert(vm_map_pmap_is_empty(map,
8029                                             entry->vme_start,
8030                                             entry->vme_end));
8031 #endif /* DEBUG */
8032
8033                 next = entry->vme_next;
8034
8035                 if (map->pmap == kernel_pmap &&
8036                     map->map_refcnt != 0 &&
8037                     entry->vme_end < end &&
8038                     (next == vm_map_to_entry(map) ||
8039                      next->vme_start != entry->vme_end)) {
8040                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
8041                               "hole after %p at 0x%llx\n",
8042                               map,
8043                               (uint64_t)start,
8044                               (uint64_t)end,
8045                               entry,
8046                               (uint64_t)entry->vme_end);
8047                 }
8048
8049                 /*
8050                  * If the desired range didn't end with "entry", then there is a gap if
8051                  * we wrapped around to the start of the map or if "entry" and "next"
8052                  * aren't contiguous.
8053                  *
8054                  * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8055                  * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8056                  */
8057                 if (gap_start == FIND_GAP &&
8058                     vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8059                     (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8060                         gap_start = entry->vme_end;
8061                 }
8062                 s = next->vme_start;
8063                 last_timestamp = map->timestamp;
8064
8065                 if (entry->permanent) {
8066                         /*
8067                          * A permanent entry can not be removed, so leave it
8068                          * in place but remove all access permissions.
8069                          */
8070                         entry->protection = VM_PROT_NONE;
8071                         entry->max_protection = VM_PROT_NONE;
8072                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8073                            zap_map != VM_MAP_NULL) {
8074                         vm_map_size_t entry_size;
8075                         /*
8076                          * The caller wants to save the affected VM map entries
8077                          * into the "zap_map".  The caller will take care of
8078                          * these entries.
8079                          */
8080                         /* unlink the entry from "map" ... */
8081                         vm_map_store_entry_unlink(map, entry);
8082                         /* ... and add it to the end of the "zap_map" */
8083                         vm_map_store_entry_link(zap_map,
8084                                                 vm_map_last_entry(zap_map),
8085                                                 entry,
8086                                                 VM_MAP_KERNEL_FLAGS_NONE);
8087                         entry_size = entry->vme_end - entry->vme_start;
8088                         map->size -= entry_size;
8089                         zap_map->size += entry_size;
8090                         /* we didn't unlock the map, so no timestamp increase */
8091                         last_timestamp--;
8092                 } else {
8093                         vm_map_entry_delete(map, entry);
8094                         /* vm_map_entry_delete unlocks the map */
8095                         vm_map_lock(map);
8096                 }
8097
8098                 entry = next;
8099
8100                 if(entry == vm_map_to_entry(map)) {
8101                         break;
8102                 }
8103                 if (last_timestamp + 1 != map->timestamp) {
8104                         /*
8105                          * We are responsible for deleting everything
8106                          * from the given space. If someone has interfered,
8107                          * we pick up where we left off. Back fills should
8108                          * be all right for anyone, except map_delete, and
8109                          * we have to assume that the task has been fully
8110                          * disabled before we get here
8111                          */
8112                         if (!vm_map_lookup_entry(map, s, &entry)){
8113                                 entry = entry->vme_next;
8114
8115                                 /*
8116                                  * Nothing found for s. If we weren't already done, then there is a gap.
8117                                  */
8118                                 if (gap_start == FIND_GAP && s < end)
8119                                         gap_start = s;
8120                                 s = entry->vme_start;
8121                         } else {
8122                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8123                         }
8124                         /*
8125                          * others can not only allocate behind us, we can
8126                          * also see coalesce while we don't have the map lock
8127                          */
8128                         if (entry == vm_map_to_entry(map)) {
8129                                 break;
8130                         }
8131                 }
8132                 last_timestamp = map->timestamp;
8133         }
8134
8135         if (map->wait_for_space)
8136                 thread_wakeup((event_t) map);
8137         /*
8138          * wake up anybody waiting on entries that we have already deleted.
8139          */
8140         if (need_wakeup)
8141                 vm_map_entry_wakeup(map);
8142
8143         if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8144                 DTRACE_VM3(kern_vm_deallocate_gap,
8145                     vm_map_offset_t, gap_start,
8146                     vm_map_offset_t, save_start,
8147                     vm_map_offset_t, save_end);
8148                 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8149 #if defined(DEVELOPMENT) || defined(DEBUG)
8150                         /* log just once if not checking, otherwise log each one */
8151                         if (!map->warned_delete_gap ||
8152                             (task_exc_guard_default & TASK_EXC_GUARD_VM_ALL) != 0) {
8153                                 printf("vm_map_delete: map %p [%p...%p] nothing at %p\n",
8154                                     (void *)map, (void *)save_start, (void *)save_end,
8155                                     (void *)gap_start);
8156                                 if (!map->warned_delete_gap) {
8157                                         map->warned_delete_gap = 1;
8158                                 }
8159                         }
8160 #endif
8161                         vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8162                 }
8163         }
8164
8165         return KERN_SUCCESS;
8166 }
8167
8168 /*
8169  *      vm_map_remove:
8170  *
8171  *      Remove the given address range from the target map.
8172  *      This is the exported form of vm_map_delete.
8173  */
8174 kern_return_t
8175 vm_map_remove(
8176         vm_map_t        map,
8177         vm_map_offset_t start,
8178         vm_map_offset_t end,
8179          boolean_t      flags)
8180 {
8181         kern_return_t   result;
8182
8183         vm_map_lock(map);
8184         VM_MAP_RANGE_CHECK(map, start, end);
8185         /*
8186          * For the zone_map, the kernel controls the allocation/freeing of memory.
8187          * Any free to the zone_map should be within the bounds of the map and
8188          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8189          * free to the zone_map into a no-op, there is a problem and we should
8190          * panic.
8191          */
8192         if ((map == zone_map) && (start == end))
8193                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8194         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8195         vm_map_unlock(map);
8196
8197         return(result);
8198 }
8199
8200 /*
8201  *      vm_map_remove_locked:
8202  *
8203  *      Remove the given address range from the target locked map.
8204  *      This is the exported form of vm_map_delete.
8205  */
8206 kern_return_t
8207 vm_map_remove_locked(
8208         vm_map_t        map,
8209         vm_map_offset_t start,
8210         vm_map_offset_t end,
8211         boolean_t       flags)
8212 {
8213         kern_return_t   result;
8214
8215         VM_MAP_RANGE_CHECK(map, start, end);
8216         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8217         return(result);
8218 }
8219
8220
8221 /*
8222  *      Routine:        vm_map_copy_allocate
8223  *
8224  *      Description:
8225  *              Allocates and initializes a map copy object.
8226  */
8227 static vm_map_copy_t
8228 vm_map_copy_allocate(void)
8229 {
8230         vm_map_copy_t new_copy;
8231
8232         new_copy = zalloc(vm_map_copy_zone);
8233         bzero(new_copy, sizeof (*new_copy));
8234         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8235         vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8236         vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8237         return new_copy;
8238 }
8239
8240 /*
8241  *      Routine:        vm_map_copy_discard
8242  *
8243  *      Description:
8244  *              Dispose of a map copy object (returned by
8245  *              vm_map_copyin).
8246  */
8247 void
8248 vm_map_copy_discard(
8249         vm_map_copy_t   copy)
8250 {
8251         if (copy == VM_MAP_COPY_NULL)
8252                 return;
8253
8254         switch (copy->type) {
8255         case VM_MAP_COPY_ENTRY_LIST:
8256                 while (vm_map_copy_first_entry(copy) !=
8257                        vm_map_copy_to_entry(copy)) {
8258                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8259
8260                         vm_map_copy_entry_unlink(copy, entry);
8261                         if (entry->is_sub_map) {
8262                                 vm_map_deallocate(VME_SUBMAP(entry));
8263                         } else {
8264                                 vm_object_deallocate(VME_OBJECT(entry));
8265                         }
8266                         vm_map_copy_entry_dispose(copy, entry);
8267                 }
8268                 break;
8269         case VM_MAP_COPY_OBJECT:
8270                 vm_object_deallocate(copy->cpy_object);
8271                 break;
8272         case VM_MAP_COPY_KERNEL_BUFFER:
8273
8274                 /*
8275                  * The vm_map_copy_t and possibly the data buffer were
8276                  * allocated by a single call to kalloc(), i.e. the
8277                  * vm_map_copy_t was not allocated out of the zone.
8278                  */
8279                 if (copy->size > msg_ool_size_small || copy->offset)
8280                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8281                               (long long)copy->size, (long long)copy->offset);
8282                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8283                 return;
8284         }
8285         zfree(vm_map_copy_zone, copy);
8286 }
8287
8288 /*
8289  *      Routine:        vm_map_copy_copy
8290  *
8291  *      Description:
8292  *                      Move the information in a map copy object to
8293  *                      a new map copy object, leaving the old one
8294  *                      empty.
8295  *
8296  *                      This is used by kernel routines that need
8297  *                      to look at out-of-line data (in copyin form)
8298  *                      before deciding whether to return SUCCESS.
8299  *                      If the routine returns FAILURE, the original
8300  *                      copy object will be deallocated; therefore,
8301  *                      these routines must make a copy of the copy
8302  *                      object and leave the original empty so that
8303  *                      deallocation will not fail.
8304  */
8305 vm_map_copy_t
8306 vm_map_copy_copy(
8307         vm_map_copy_t   copy)
8308 {
8309         vm_map_copy_t   new_copy;
8310
8311         if (copy == VM_MAP_COPY_NULL)
8312                 return VM_MAP_COPY_NULL;
8313
8314         /*
8315          * Allocate a new copy object, and copy the information
8316          * from the old one into it.
8317          */
8318
8319         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8320         *new_copy = *copy;
8321
8322         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8323                 /*
8324                  * The links in the entry chain must be
8325                  * changed to point to the new copy object.
8326                  */
8327                 vm_map_copy_first_entry(copy)->vme_prev
8328                         = vm_map_copy_to_entry(new_copy);
8329                 vm_map_copy_last_entry(copy)->vme_next
8330                         = vm_map_copy_to_entry(new_copy);
8331         }
8332
8333         /*
8334          * Change the old copy object into one that contains
8335          * nothing to be deallocated.
8336          */
8337         copy->type = VM_MAP_COPY_OBJECT;
8338         copy->cpy_object = VM_OBJECT_NULL;
8339
8340         /*
8341          * Return the new object.
8342          */
8343         return new_copy;
8344 }
8345
8346 static kern_return_t
8347 vm_map_overwrite_submap_recurse(
8348         vm_map_t        dst_map,
8349         vm_map_offset_t dst_addr,
8350         vm_map_size_t   dst_size)
8351 {
8352         vm_map_offset_t dst_end;
8353         vm_map_entry_t  tmp_entry;
8354         vm_map_entry_t  entry;
8355         kern_return_t   result;
8356         boolean_t       encountered_sub_map = FALSE;
8357
8358
8359
8360         /*
8361          *      Verify that the destination is all writeable
8362          *      initially.  We have to trunc the destination
8363          *      address and round the copy size or we'll end up
8364          *      splitting entries in strange ways.
8365          */
8366
8367         dst_end = vm_map_round_page(dst_addr + dst_size,
8368                                     VM_MAP_PAGE_MASK(dst_map));
8369         vm_map_lock(dst_map);
8370
8371 start_pass_1:
8372         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8373                 vm_map_unlock(dst_map);
8374                 return(KERN_INVALID_ADDRESS);
8375         }
8376
8377         vm_map_clip_start(dst_map,
8378                           tmp_entry,
8379                           vm_map_trunc_page(dst_addr,
8380                                             VM_MAP_PAGE_MASK(dst_map)));
8381         if (tmp_entry->is_sub_map) {
8382                 /* clipping did unnest if needed */
8383                 assert(!tmp_entry->use_pmap);
8384         }
8385
8386         for (entry = tmp_entry;;) {
8387                 vm_map_entry_t  next;
8388
8389                 next = entry->vme_next;
8390                 while(entry->is_sub_map) {
8391                         vm_map_offset_t sub_start;
8392                         vm_map_offset_t sub_end;
8393                         vm_map_offset_t local_end;
8394
8395                         if (entry->in_transition) {
8396                                 /*
8397                                  * Say that we are waiting, and wait for entry.
8398                                  */
8399                                 entry->needs_wakeup = TRUE;
8400                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8401
8402                                 goto start_pass_1;
8403                         }
8404
8405                         encountered_sub_map = TRUE;
8406                         sub_start = VME_OFFSET(entry);
8407
8408                         if(entry->vme_end < dst_end)
8409                                 sub_end = entry->vme_end;
8410                         else
8411                                 sub_end = dst_end;
8412                         sub_end -= entry->vme_start;
8413                         sub_end += VME_OFFSET(entry);
8414                         local_end = entry->vme_end;
8415                         vm_map_unlock(dst_map);
8416
8417                         result = vm_map_overwrite_submap_recurse(
8418                                 VME_SUBMAP(entry),
8419                                 sub_start,
8420                                 sub_end - sub_start);
8421
8422                         if(result != KERN_SUCCESS)
8423                                 return result;
8424                         if (dst_end <= entry->vme_end)
8425                                 return KERN_SUCCESS;
8426                         vm_map_lock(dst_map);
8427                         if(!vm_map_lookup_entry(dst_map, local_end,
8428                                                 &tmp_entry)) {
8429                                 vm_map_unlock(dst_map);
8430                                 return(KERN_INVALID_ADDRESS);
8431                         }
8432                         entry = tmp_entry;
8433                         next = entry->vme_next;
8434                 }
8435
8436                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8437                         vm_map_unlock(dst_map);
8438                         return(KERN_PROTECTION_FAILURE);
8439                 }
8440
8441                 /*
8442                  *      If the entry is in transition, we must wait
8443                  *      for it to exit that state.  Anything could happen
8444                  *      when we unlock the map, so start over.
8445                  */
8446                 if (entry->in_transition) {
8447
8448                         /*
8449                          * Say that we are waiting, and wait for entry.
8450                          */
8451                         entry->needs_wakeup = TRUE;
8452                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8453
8454                         goto start_pass_1;
8455                 }
8456
8457 /*
8458  *              our range is contained completely within this map entry
8459  */
8460                 if (dst_end <= entry->vme_end) {
8461                         vm_map_unlock(dst_map);
8462                         return KERN_SUCCESS;
8463                 }
8464 /*
8465  *              check that range specified is contiguous region
8466  */
8467                 if ((next == vm_map_to_entry(dst_map)) ||
8468                     (next->vme_start != entry->vme_end)) {
8469                         vm_map_unlock(dst_map);
8470                         return(KERN_INVALID_ADDRESS);
8471                 }
8472
8473                 /*
8474                  *      Check for permanent objects in the destination.
8475                  */
8476                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8477                     ((!VME_OBJECT(entry)->internal) ||
8478                      (VME_OBJECT(entry)->true_share))) {
8479                         if(encountered_sub_map) {
8480                                 vm_map_unlock(dst_map);
8481                                 return(KERN_FAILURE);
8482                         }
8483                 }
8484
8485
8486                 entry = next;
8487         }/* for */
8488         vm_map_unlock(dst_map);
8489         return(KERN_SUCCESS);
8490 }
8491
8492 /*
8493  *      Routine:        vm_map_copy_overwrite
8494  *
8495  *      Description:
8496  *              Copy the memory described by the map copy
8497  *              object (copy; returned by vm_map_copyin) onto
8498  *              the specified destination region (dst_map, dst_addr).
8499  *              The destination must be writeable.
8500  *
8501  *              Unlike vm_map_copyout, this routine actually
8502  *              writes over previously-mapped memory.  If the
8503  *              previous mapping was to a permanent (user-supplied)
8504  *              memory object, it is preserved.
8505  *
8506  *              The attributes (protection and inheritance) of the
8507  *              destination region are preserved.
8508  *
8509  *              If successful, consumes the copy object.
8510  *              Otherwise, the caller is responsible for it.
8511  *
8512  *      Implementation notes:
8513  *              To overwrite aligned temporary virtual memory, it is
8514  *              sufficient to remove the previous mapping and insert
8515  *              the new copy.  This replacement is done either on
8516  *              the whole region (if no permanent virtual memory
8517  *              objects are embedded in the destination region) or
8518  *              in individual map entries.
8519  *
8520  *              To overwrite permanent virtual memory , it is necessary
8521  *              to copy each page, as the external memory management
8522  *              interface currently does not provide any optimizations.
8523  *
8524  *              Unaligned memory also has to be copied.  It is possible
8525  *              to use 'vm_trickery' to copy the aligned data.  This is
8526  *              not done but not hard to implement.
8527  *
8528  *              Once a page of permanent memory has been overwritten,
8529  *              it is impossible to interrupt this function; otherwise,
8530  *              the call would be neither atomic nor location-independent.
8531  *              The kernel-state portion of a user thread must be
8532  *              interruptible.
8533  *
8534  *              It may be expensive to forward all requests that might
8535  *              overwrite permanent memory (vm_write, vm_copy) to
8536  *              uninterruptible kernel threads.  This routine may be
8537  *              called by interruptible threads; however, success is
8538  *              not guaranteed -- if the request cannot be performed
8539  *              atomically and interruptibly, an error indication is
8540  *              returned.
8541  */
8542
8543 static kern_return_t
8544 vm_map_copy_overwrite_nested(
8545         vm_map_t                dst_map,
8546         vm_map_address_t        dst_addr,
8547         vm_map_copy_t           copy,
8548         boolean_t               interruptible,
8549         pmap_t                  pmap,
8550         boolean_t               discard_on_success)
8551 {
8552         vm_map_offset_t         dst_end;
8553         vm_map_entry_t          tmp_entry;
8554         vm_map_entry_t          entry;
8555         kern_return_t           kr;
8556         boolean_t               aligned = TRUE;
8557         boolean_t               contains_permanent_objects = FALSE;
8558         boolean_t               encountered_sub_map = FALSE;
8559         vm_map_offset_t         base_addr;
8560         vm_map_size_t           copy_size;
8561         vm_map_size_t           total_size;
8562
8563
8564         /*
8565          *      Check for null copy object.
8566          */
8567
8568         if (copy == VM_MAP_COPY_NULL)
8569                 return(KERN_SUCCESS);
8570
8571         /*
8572          *      Check for special kernel buffer allocated
8573          *      by new_ipc_kmsg_copyin.
8574          */
8575
8576         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8577                 return(vm_map_copyout_kernel_buffer(
8578                                dst_map, &dst_addr,
8579                                copy, copy->size, TRUE, discard_on_success));
8580         }
8581
8582         /*
8583          *      Only works for entry lists at the moment.  Will
8584          *      support page lists later.
8585          */
8586
8587         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8588
8589         if (copy->size == 0) {
8590                 if (discard_on_success)
8591                         vm_map_copy_discard(copy);
8592                 return(KERN_SUCCESS);
8593         }
8594
8595         /*
8596          *      Verify that the destination is all writeable
8597          *      initially.  We have to trunc the destination
8598          *      address and round the copy size or we'll end up
8599          *      splitting entries in strange ways.
8600          */
8601
8602         if (!VM_MAP_PAGE_ALIGNED(copy->size,
8603                                  VM_MAP_PAGE_MASK(dst_map)) ||
8604             !VM_MAP_PAGE_ALIGNED(copy->offset,
8605                                  VM_MAP_PAGE_MASK(dst_map)) ||
8606             !VM_MAP_PAGE_ALIGNED(dst_addr,
8607                                  VM_MAP_PAGE_MASK(dst_map)))
8608         {
8609                 aligned = FALSE;
8610                 dst_end = vm_map_round_page(dst_addr + copy->size,
8611                                             VM_MAP_PAGE_MASK(dst_map));
8612         } else {
8613                 dst_end = dst_addr + copy->size;
8614         }
8615
8616         vm_map_lock(dst_map);
8617
8618         /* LP64todo - remove this check when vm_map_commpage64()
8619          * no longer has to stuff in a map_entry for the commpage
8620          * above the map's max_offset.
8621          */
8622         if (dst_addr >= dst_map->max_offset) {
8623                 vm_map_unlock(dst_map);
8624                 return(KERN_INVALID_ADDRESS);
8625         }
8626
8627 start_pass_1:
8628         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8629                 vm_map_unlock(dst_map);
8630                 return(KERN_INVALID_ADDRESS);
8631         }
8632         vm_map_clip_start(dst_map,
8633                           tmp_entry,
8634                           vm_map_trunc_page(dst_addr,
8635                                             VM_MAP_PAGE_MASK(dst_map)));
8636         for (entry = tmp_entry;;) {
8637                 vm_map_entry_t  next = entry->vme_next;
8638
8639                 while(entry->is_sub_map) {
8640                         vm_map_offset_t sub_start;
8641                         vm_map_offset_t sub_end;
8642                         vm_map_offset_t local_end;
8643
8644                         if (entry->in_transition) {
8645
8646                                 /*
8647                                  * Say that we are waiting, and wait for entry.
8648                                  */
8649                                 entry->needs_wakeup = TRUE;
8650                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8651
8652                                 goto start_pass_1;
8653                         }
8654
8655                         local_end = entry->vme_end;
8656                         if (!(entry->needs_copy)) {
8657                                 /* if needs_copy we are a COW submap */
8658                                 /* in such a case we just replace so */
8659                                 /* there is no need for the follow-  */
8660                                 /* ing check.                        */
8661                                 encountered_sub_map = TRUE;
8662                                 sub_start = VME_OFFSET(entry);
8663
8664                                 if(entry->vme_end < dst_end)
8665                                         sub_end = entry->vme_end;
8666                                 else
8667                                         sub_end = dst_end;
8668                                 sub_end -= entry->vme_start;
8669                                 sub_end += VME_OFFSET(entry);
8670                                 vm_map_unlock(dst_map);
8671
8672                                 kr = vm_map_overwrite_submap_recurse(
8673                                         VME_SUBMAP(entry),
8674                                         sub_start,
8675                                         sub_end - sub_start);
8676                                 if(kr != KERN_SUCCESS)
8677                                         return kr;
8678                                 vm_map_lock(dst_map);
8679                         }
8680
8681                         if (dst_end <= entry->vme_end)
8682                                 goto start_overwrite;
8683                         if(!vm_map_lookup_entry(dst_map, local_end,
8684                                                 &entry)) {
8685                                 vm_map_unlock(dst_map);
8686                                 return(KERN_INVALID_ADDRESS);
8687                         }
8688                         next = entry->vme_next;
8689                 }
8690
8691                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8692                         vm_map_unlock(dst_map);
8693                         return(KERN_PROTECTION_FAILURE);
8694                 }
8695
8696                 /*
8697                  *      If the entry is in transition, we must wait
8698                  *      for it to exit that state.  Anything could happen
8699                  *      when we unlock the map, so start over.
8700                  */
8701                 if (entry->in_transition) {
8702
8703                         /*
8704                          * Say that we are waiting, and wait for entry.
8705                          */
8706                         entry->needs_wakeup = TRUE;
8707                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8708
8709                         goto start_pass_1;
8710                 }
8711
8712 /*
8713  *              our range is contained completely within this map entry
8714  */
8715                 if (dst_end <= entry->vme_end)
8716                         break;
8717 /*
8718  *              check that range specified is contiguous region
8719  */
8720                 if ((next == vm_map_to_entry(dst_map)) ||
8721                     (next->vme_start != entry->vme_end)) {
8722                         vm_map_unlock(dst_map);
8723                         return(KERN_INVALID_ADDRESS);
8724                 }
8725
8726
8727                 /*
8728                  *      Check for permanent objects in the destination.
8729                  */
8730                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8731                     ((!VME_OBJECT(entry)->internal) ||
8732                      (VME_OBJECT(entry)->true_share))) {
8733                         contains_permanent_objects = TRUE;
8734                 }
8735
8736                 entry = next;
8737         }/* for */
8738
8739 start_overwrite:
8740         /*
8741          *      If there are permanent objects in the destination, then
8742          *      the copy cannot be interrupted.
8743          */
8744
8745         if (interruptible && contains_permanent_objects) {
8746                 vm_map_unlock(dst_map);
8747                 return(KERN_FAILURE);   /* XXX */
8748         }
8749
8750         /*
8751          *
8752          *      Make a second pass, overwriting the data
8753          *      At the beginning of each loop iteration,
8754          *      the next entry to be overwritten is "tmp_entry"
8755          *      (initially, the value returned from the lookup above),
8756          *      and the starting address expected in that entry
8757          *      is "start".
8758          */
8759
8760         total_size = copy->size;
8761         if(encountered_sub_map) {
8762                 copy_size = 0;
8763                 /* re-calculate tmp_entry since we've had the map */
8764                 /* unlocked */
8765                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8766                         vm_map_unlock(dst_map);
8767                         return(KERN_INVALID_ADDRESS);
8768                 }
8769         } else {
8770                 copy_size = copy->size;
8771         }
8772
8773         base_addr = dst_addr;
8774         while(TRUE) {
8775                 /* deconstruct the copy object and do in parts */
8776                 /* only in sub_map, interruptable case */
8777                 vm_map_entry_t  copy_entry;
8778                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
8779                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
8780                 int             nentries;
8781                 int             remaining_entries = 0;
8782                 vm_map_offset_t new_offset = 0;
8783
8784                 for (entry = tmp_entry; copy_size == 0;) {
8785                         vm_map_entry_t  next;
8786
8787                         next = entry->vme_next;
8788
8789                         /* tmp_entry and base address are moved along */
8790                         /* each time we encounter a sub-map.  Otherwise */
8791                         /* entry can outpase tmp_entry, and the copy_size */
8792                         /* may reflect the distance between them */
8793                         /* if the current entry is found to be in transition */
8794                         /* we will start over at the beginning or the last */
8795                         /* encounter of a submap as dictated by base_addr */
8796                         /* we will zero copy_size accordingly. */
8797                         if (entry->in_transition) {
8798                                 /*
8799                                  * Say that we are waiting, and wait for entry.
8800                                  */
8801                                 entry->needs_wakeup = TRUE;
8802                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8803
8804                                 if(!vm_map_lookup_entry(dst_map, base_addr,
8805                                                         &tmp_entry)) {
8806                                         vm_map_unlock(dst_map);
8807                                         return(KERN_INVALID_ADDRESS);
8808                                 }
8809                                 copy_size = 0;
8810                                 entry = tmp_entry;
8811                                 continue;
8812                         }
8813                         if (entry->is_sub_map) {
8814                                 vm_map_offset_t sub_start;
8815                                 vm_map_offset_t sub_end;
8816                                 vm_map_offset_t local_end;
8817
8818                                 if (entry->needs_copy) {
8819                                         /* if this is a COW submap */
8820                                         /* just back the range with a */
8821                                         /* anonymous entry */
8822                                         if(entry->vme_end < dst_end)
8823                                                 sub_end = entry->vme_end;
8824                                         else
8825                                                 sub_end = dst_end;
8826                                         if(entry->vme_start < base_addr)
8827                                                 sub_start = base_addr;
8828                                         else
8829                                                 sub_start = entry->vme_start;
8830                                         vm_map_clip_end(
8831                                                 dst_map, entry, sub_end);
8832                                         vm_map_clip_start(
8833                                                 dst_map, entry, sub_start);
8834                                         assert(!entry->use_pmap);
8835                                         assert(!entry->iokit_acct);
8836                                         entry->use_pmap = TRUE;
8837                                         entry->is_sub_map = FALSE;
8838                                         vm_map_deallocate(
8839                                                 VME_SUBMAP(entry));
8840                                         VME_OBJECT_SET(entry, NULL);
8841                                         VME_OFFSET_SET(entry, 0);
8842                                         entry->is_shared = FALSE;
8843                                         entry->needs_copy = FALSE;
8844                                         entry->protection = VM_PROT_DEFAULT;
8845                                         entry->max_protection = VM_PROT_ALL;
8846                                         entry->wired_count = 0;
8847                                         entry->user_wired_count = 0;
8848                                         if(entry->inheritance
8849                                            == VM_INHERIT_SHARE)
8850                                                 entry->inheritance = VM_INHERIT_COPY;
8851                                         continue;
8852                                 }
8853                                 /* first take care of any non-sub_map */
8854                                 /* entries to send */
8855                                 if(base_addr < entry->vme_start) {
8856                                         /* stuff to send */
8857                                         copy_size =
8858                                                 entry->vme_start - base_addr;
8859                                         break;
8860                                 }
8861                                 sub_start = VME_OFFSET(entry);
8862
8863                                 if(entry->vme_end < dst_end)
8864                                         sub_end = entry->vme_end;
8865                                 else
8866                                         sub_end = dst_end;
8867                                 sub_end -= entry->vme_start;
8868                                 sub_end += VME_OFFSET(entry);
8869                                 local_end = entry->vme_end;
8870                                 vm_map_unlock(dst_map);
8871                                 copy_size = sub_end - sub_start;
8872
8873                                 /* adjust the copy object */
8874                                 if (total_size > copy_size) {
8875                                         vm_map_size_t   local_size = 0;
8876                                         vm_map_size_t   entry_size;
8877
8878                                         nentries = 1;
8879                                         new_offset = copy->offset;
8880                                         copy_entry = vm_map_copy_first_entry(copy);
8881                                         while(copy_entry !=
8882                                               vm_map_copy_to_entry(copy)){
8883                                                 entry_size = copy_entry->vme_end -
8884                                                         copy_entry->vme_start;
8885                                                 if((local_size < copy_size) &&
8886                                                    ((local_size + entry_size)
8887                                                     >= copy_size)) {
8888                                                         vm_map_copy_clip_end(copy,
8889                                                                              copy_entry,
8890                                                                              copy_entry->vme_start +
8891                                                                              (copy_size - local_size));
8892                                                         entry_size = copy_entry->vme_end -
8893                                                                 copy_entry->vme_start;
8894                                                         local_size += entry_size;
8895                                                         new_offset += entry_size;
8896                                                 }
8897                                                 if(local_size >= copy_size) {
8898                                                         next_copy = copy_entry->vme_next;
8899                                                         copy_entry->vme_next =
8900                                                                 vm_map_copy_to_entry(copy);
8901                                                         previous_prev =
8902                                                                 copy->cpy_hdr.links.prev;
8903                                                         copy->cpy_hdr.links.prev = copy_entry;
8904                                                         copy->size = copy_size;
8905                                                         remaining_entries =
8906                                                                 copy->cpy_hdr.nentries;
8907                                                         remaining_entries -= nentries;
8908                                                         copy->cpy_hdr.nentries = nentries;
8909                                                         break;
8910                                                 } else {
8911                                                         local_size += entry_size;
8912                                                         new_offset += entry_size;
8913                                                         nentries++;
8914                                                 }
8915                                                 copy_entry = copy_entry->vme_next;
8916                                         }
8917                                 }
8918
8919                                 if((entry->use_pmap) && (pmap == NULL)) {
8920                                         kr = vm_map_copy_overwrite_nested(
8921                                                 VME_SUBMAP(entry),
8922                                                 sub_start,
8923                                                 copy,
8924                                                 interruptible,
8925                                                 VME_SUBMAP(entry)->pmap,
8926                                                 TRUE);
8927                                 } else if (pmap != NULL) {
8928                                         kr = vm_map_copy_overwrite_nested(
8929                                                 VME_SUBMAP(entry),
8930                                                 sub_start,
8931                                                 copy,
8932                                                 interruptible, pmap,
8933                                                 TRUE);
8934                                 } else {
8935                                         kr = vm_map_copy_overwrite_nested(
8936                                                 VME_SUBMAP(entry),
8937                                                 sub_start,
8938                                                 copy,
8939                                                 interruptible,
8940                                                 dst_map->pmap,
8941                                                 TRUE);
8942                                 }
8943                                 if(kr != KERN_SUCCESS) {
8944                                         if(next_copy != NULL) {
8945                                                 copy->cpy_hdr.nentries +=
8946                                                         remaining_entries;
8947                                                 copy->cpy_hdr.links.prev->vme_next =
8948                                                         next_copy;
8949                                                 copy->cpy_hdr.links.prev
8950                                                         = previous_prev;
8951                                                 copy->size = total_size;
8952                                         }
8953                                         return kr;
8954                                 }
8955                                 if (dst_end <= local_end) {
8956                                         return(KERN_SUCCESS);
8957                                 }
8958                                 /* otherwise copy no longer exists, it was */
8959                                 /* destroyed after successful copy_overwrite */
8960                                 copy = vm_map_copy_allocate();
8961                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
8962                                 copy->offset = new_offset;
8963
8964                                 /*
8965                                  * XXX FBDP
8966                                  * this does not seem to deal with
8967                                  * the VM map store (R&B tree)
8968                                  */
8969
8970                                 total_size -= copy_size;
8971                                 copy_size = 0;
8972                                 /* put back remainder of copy in container */
8973                                 if(next_copy != NULL) {
8974                                         copy->cpy_hdr.nentries = remaining_entries;
8975                                         copy->cpy_hdr.links.next = next_copy;
8976                                         copy->cpy_hdr.links.prev = previous_prev;
8977                                         copy->size = total_size;
8978                                         next_copy->vme_prev =
8979                                                 vm_map_copy_to_entry(copy);
8980                                         next_copy = NULL;
8981                                 }
8982                                 base_addr = local_end;
8983                                 vm_map_lock(dst_map);
8984                                 if(!vm_map_lookup_entry(dst_map,
8985                                                         local_end, &tmp_entry)) {
8986                                         vm_map_unlock(dst_map);
8987                                         return(KERN_INVALID_ADDRESS);
8988                                 }
8989                                 entry = tmp_entry;
8990                                 continue;
8991                         }
8992                         if (dst_end <= entry->vme_end) {
8993                                 copy_size = dst_end - base_addr;
8994                                 break;
8995                         }
8996
8997                         if ((next == vm_map_to_entry(dst_map)) ||
8998                             (next->vme_start != entry->vme_end)) {
8999                                 vm_map_unlock(dst_map);
9000                                 return(KERN_INVALID_ADDRESS);
9001                         }
9002
9003                         entry = next;
9004                 }/* for */
9005
9006                 next_copy = NULL;
9007                 nentries = 1;
9008
9009                 /* adjust the copy object */
9010                 if (total_size > copy_size) {
9011                         vm_map_size_t   local_size = 0;
9012                         vm_map_size_t   entry_size;
9013
9014                         new_offset = copy->offset;
9015                         copy_entry = vm_map_copy_first_entry(copy);
9016                         while(copy_entry != vm_map_copy_to_entry(copy)) {
9017                                 entry_size = copy_entry->vme_end -
9018                                         copy_entry->vme_start;
9019                                 if((local_size < copy_size) &&
9020                                    ((local_size + entry_size)
9021                                     >= copy_size)) {
9022                                         vm_map_copy_clip_end(copy, copy_entry,
9023                                                              copy_entry->vme_start +
9024                                                              (copy_size - local_size));
9025                                         entry_size = copy_entry->vme_end -
9026                                                 copy_entry->vme_start;
9027                                         local_size += entry_size;
9028                                         new_offset += entry_size;
9029                                 }
9030                                 if(local_size >= copy_size) {
9031                                         next_copy = copy_entry->vme_next;
9032                                         copy_entry->vme_next =
9033                                                 vm_map_copy_to_entry(copy);
9034                                         previous_prev =
9035                                                 copy->cpy_hdr.links.prev;
9036                                         copy->cpy_hdr.links.prev = copy_entry;
9037                                         copy->size = copy_size;
9038                                         remaining_entries =
9039                                                 copy->cpy_hdr.nentries;
9040                                         remaining_entries -= nentries;
9041                                         copy->cpy_hdr.nentries = nentries;
9042                                         break;
9043                                 } else {
9044                                         local_size += entry_size;
9045                                         new_offset += entry_size;
9046                                         nentries++;
9047                                 }
9048                                 copy_entry = copy_entry->vme_next;
9049                         }
9050                 }
9051
9052                 if (aligned) {
9053                         pmap_t  local_pmap;
9054
9055                         if(pmap)
9056                                 local_pmap = pmap;
9057                         else
9058                                 local_pmap = dst_map->pmap;
9059
9060                         if ((kr =  vm_map_copy_overwrite_aligned(
9061                                      dst_map, tmp_entry, copy,
9062                                      base_addr, local_pmap)) != KERN_SUCCESS) {
9063                                 if(next_copy != NULL) {
9064                                         copy->cpy_hdr.nentries +=
9065                                                 remaining_entries;
9066                                         copy->cpy_hdr.links.prev->vme_next =
9067                                                 next_copy;
9068                                         copy->cpy_hdr.links.prev =
9069                                                 previous_prev;
9070                                         copy->size += copy_size;
9071                                 }
9072                                 return kr;
9073                         }
9074                         vm_map_unlock(dst_map);
9075                 } else {
9076                         /*
9077                          * Performance gain:
9078                          *
9079                          * if the copy and dst address are misaligned but the same
9080                          * offset within the page we can copy_not_aligned the
9081                          * misaligned parts and copy aligned the rest.  If they are
9082                          * aligned but len is unaligned we simply need to copy
9083                          * the end bit unaligned.  We'll need to split the misaligned
9084                          * bits of the region in this case !
9085                          */
9086                         /* ALWAYS UNLOCKS THE dst_map MAP */
9087                         kr = vm_map_copy_overwrite_unaligned(
9088                                 dst_map,
9089                                 tmp_entry,
9090                                 copy,
9091                                 base_addr,
9092                                 discard_on_success);
9093                         if (kr != KERN_SUCCESS) {
9094                                 if(next_copy != NULL) {
9095                                         copy->cpy_hdr.nentries +=
9096                                                 remaining_entries;
9097                                         copy->cpy_hdr.links.prev->vme_next =
9098                                                 next_copy;
9099                                         copy->cpy_hdr.links.prev =
9100                                                 previous_prev;
9101                                         copy->size += copy_size;
9102                                 }
9103                                 return kr;
9104                         }
9105                 }
9106                 total_size -= copy_size;
9107                 if(total_size == 0)
9108                         break;
9109                 base_addr += copy_size;
9110                 copy_size = 0;
9111                 copy->offset = new_offset;
9112                 if(next_copy != NULL) {
9113                         copy->cpy_hdr.nentries = remaining_entries;
9114                         copy->cpy_hdr.links.next = next_copy;
9115                         copy->cpy_hdr.links.prev = previous_prev;
9116                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
9117                         copy->size = total_size;
9118                 }
9119                 vm_map_lock(dst_map);
9120                 while(TRUE) {
9121                         if (!vm_map_lookup_entry(dst_map,
9122                                                  base_addr, &tmp_entry)) {
9123                                 vm_map_unlock(dst_map);
9124                                 return(KERN_INVALID_ADDRESS);
9125                         }
9126                         if (tmp_entry->in_transition) {
9127                                 entry->needs_wakeup = TRUE;
9128                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9129                         } else {
9130                                 break;
9131                         }
9132                 }
9133                 vm_map_clip_start(dst_map,
9134                                   tmp_entry,
9135                                   vm_map_trunc_page(base_addr,
9136                                                     VM_MAP_PAGE_MASK(dst_map)));
9137
9138                 entry = tmp_entry;
9139         } /* while */
9140
9141         /*
9142          *      Throw away the vm_map_copy object
9143          */
9144         if (discard_on_success)
9145                 vm_map_copy_discard(copy);
9146
9147         return(KERN_SUCCESS);
9148 }/* vm_map_copy_overwrite */
9149
9150 kern_return_t
9151 vm_map_copy_overwrite(
9152         vm_map_t        dst_map,
9153         vm_map_offset_t dst_addr,
9154         vm_map_copy_t   copy,
9155         boolean_t       interruptible)
9156 {
9157         vm_map_size_t   head_size, tail_size;
9158         vm_map_copy_t   head_copy, tail_copy;
9159         vm_map_offset_t head_addr, tail_addr;
9160         vm_map_entry_t  entry;
9161         kern_return_t   kr;
9162         vm_map_offset_t effective_page_mask, effective_page_size;
9163
9164         head_size = 0;
9165         tail_size = 0;
9166         head_copy = NULL;
9167         tail_copy = NULL;
9168         head_addr = 0;
9169         tail_addr = 0;
9170
9171         if (interruptible ||
9172             copy == VM_MAP_COPY_NULL ||
9173             copy->type != VM_MAP_COPY_ENTRY_LIST) {
9174                 /*
9175                  * We can't split the "copy" map if we're interruptible
9176                  * or if we don't have a "copy" map...
9177                  */
9178         blunt_copy:
9179                 return vm_map_copy_overwrite_nested(dst_map,
9180                                                     dst_addr,
9181                                                     copy,
9182                                                     interruptible,
9183                                                     (pmap_t) NULL,
9184                                                     TRUE);
9185         }
9186
9187         effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9188         effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9189                                   effective_page_mask);
9190         effective_page_size = effective_page_mask + 1;
9191
9192         if (copy->size < 3 * effective_page_size) {
9193                 /*
9194                  * Too small to bother with optimizing...
9195                  */
9196                 goto blunt_copy;
9197         }
9198
9199         if ((dst_addr & effective_page_mask) !=
9200             (copy->offset & effective_page_mask)) {
9201                 /*
9202                  * Incompatible mis-alignment of source and destination...
9203                  */
9204                 goto blunt_copy;
9205         }
9206
9207         /*
9208          * Proper alignment or identical mis-alignment at the beginning.
9209          * Let's try and do a small unaligned copy first (if needed)
9210          * and then an aligned copy for the rest.
9211          */
9212         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9213                 head_addr = dst_addr;
9214                 head_size = (effective_page_size -
9215                              (copy->offset & effective_page_mask));
9216                 head_size = MIN(head_size, copy->size);
9217         }
9218         if (!vm_map_page_aligned(copy->offset + copy->size,
9219                                   effective_page_mask)) {
9220                 /*
9221                  * Mis-alignment at the end.
9222                  * Do an aligned copy up to the last page and
9223                  * then an unaligned copy for the remaining bytes.
9224                  */
9225                 tail_size = ((copy->offset + copy->size) &
9226                              effective_page_mask);
9227                 tail_size = MIN(tail_size, copy->size);
9228                 tail_addr = dst_addr + copy->size - tail_size;
9229                 assert(tail_addr >= head_addr + head_size);
9230         }
9231         assert(head_size + tail_size <= copy->size);
9232
9233         if (head_size + tail_size == copy->size) {
9234                 /*
9235                  * It's all unaligned, no optimization possible...
9236                  */
9237                 goto blunt_copy;
9238         }
9239
9240         /*
9241          * Can't optimize if there are any submaps in the
9242          * destination due to the way we free the "copy" map
9243          * progressively in vm_map_copy_overwrite_nested()
9244          * in that case.
9245          */
9246         vm_map_lock_read(dst_map);
9247         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9248                 vm_map_unlock_read(dst_map);
9249                 goto blunt_copy;
9250         }
9251         for (;
9252              (entry != vm_map_copy_to_entry(copy) &&
9253               entry->vme_start < dst_addr + copy->size);
9254              entry = entry->vme_next) {
9255                 if (entry->is_sub_map) {
9256                         vm_map_unlock_read(dst_map);
9257                         goto blunt_copy;
9258                 }
9259         }
9260         vm_map_unlock_read(dst_map);
9261
9262         if (head_size) {
9263                 /*
9264                  * Unaligned copy of the first "head_size" bytes, to reach
9265                  * a page boundary.
9266                  */
9267
9268                 /*
9269                  * Extract "head_copy" out of "copy".
9270                  */
9271                 head_copy = vm_map_copy_allocate();
9272                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9273                 head_copy->cpy_hdr.entries_pageable =
9274                         copy->cpy_hdr.entries_pageable;
9275                 vm_map_store_init(&head_copy->cpy_hdr);
9276
9277                 entry = vm_map_copy_first_entry(copy);
9278                 if (entry->vme_end < copy->offset + head_size) {
9279                         head_size = entry->vme_end - copy->offset;
9280                 }
9281
9282                 head_copy->offset = copy->offset;
9283                 head_copy->size = head_size;
9284                 copy->offset += head_size;
9285                 copy->size -= head_size;
9286
9287                 vm_map_copy_clip_end(copy, entry, copy->offset);
9288                 vm_map_copy_entry_unlink(copy, entry);
9289                 vm_map_copy_entry_link(head_copy,
9290                                        vm_map_copy_to_entry(head_copy),
9291                                        entry);
9292
9293                 /*
9294                  * Do the unaligned copy.
9295                  */
9296                 kr = vm_map_copy_overwrite_nested(dst_map,
9297                                                   head_addr,
9298                                                   head_copy,
9299                                                   interruptible,
9300                                                   (pmap_t) NULL,
9301                                                   FALSE);
9302                 if (kr != KERN_SUCCESS)
9303                         goto done;
9304         }
9305
9306         if (tail_size) {
9307                 /*
9308                  * Extract "tail_copy" out of "copy".
9309                  */
9310                 tail_copy = vm_map_copy_allocate();
9311                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9312                 tail_copy->cpy_hdr.entries_pageable =
9313                         copy->cpy_hdr.entries_pageable;
9314                 vm_map_store_init(&tail_copy->cpy_hdr);
9315
9316                 tail_copy->offset = copy->offset + copy->size - tail_size;
9317                 tail_copy->size = tail_size;
9318
9319                 copy->size -= tail_size;
9320
9321                 entry = vm_map_copy_last_entry(copy);
9322                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9323                 entry = vm_map_copy_last_entry(copy);
9324                 vm_map_copy_entry_unlink(copy, entry);
9325                 vm_map_copy_entry_link(tail_copy,
9326                                        vm_map_copy_last_entry(tail_copy),
9327                                        entry);
9328         }
9329
9330         /*
9331          * Copy most (or possibly all) of the data.
9332          */
9333         kr = vm_map_copy_overwrite_nested(dst_map,
9334                                           dst_addr + head_size,
9335                                           copy,
9336                                           interruptible,
9337                                           (pmap_t) NULL,
9338                                           FALSE);
9339         if (kr != KERN_SUCCESS) {
9340                 goto done;
9341         }
9342
9343         if (tail_size) {
9344                 kr = vm_map_copy_overwrite_nested(dst_map,
9345                                                   tail_addr,
9346                                                   tail_copy,
9347                                                   interruptible,
9348                                                   (pmap_t) NULL,
9349                                                   FALSE);
9350         }
9351
9352 done:
9353         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9354         if (kr == KERN_SUCCESS) {
9355                 /*
9356                  * Discard all the copy maps.
9357                  */
9358                 if (head_copy) {
9359                         vm_map_copy_discard(head_copy);
9360                         head_copy = NULL;
9361                 }
9362                 vm_map_copy_discard(copy);
9363                 if (tail_copy) {
9364                         vm_map_copy_discard(tail_copy);
9365                         tail_copy = NULL;
9366                 }
9367         } else {
9368                 /*
9369                  * Re-assemble the original copy map.
9370                  */
9371                 if (head_copy) {
9372                         entry = vm_map_copy_first_entry(head_copy);
9373                         vm_map_copy_entry_unlink(head_copy, entry);
9374                         vm_map_copy_entry_link(copy,
9375                                                vm_map_copy_to_entry(copy),
9376                                                entry);
9377                         copy->offset -= head_size;
9378                         copy->size += head_size;
9379                         vm_map_copy_discard(head_copy);
9380                         head_copy = NULL;
9381                 }
9382                 if (tail_copy) {
9383                         entry = vm_map_copy_last_entry(tail_copy);
9384                         vm_map_copy_entry_unlink(tail_copy, entry);
9385                         vm_map_copy_entry_link(copy,
9386                                                vm_map_copy_last_entry(copy),
9387                                                entry);
9388                         copy->size += tail_size;
9389                         vm_map_copy_discard(tail_copy);
9390                         tail_copy = NULL;
9391                 }
9392         }
9393         return kr;
9394 }
9395
9396
9397 /*
9398  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
9399  *
9400  *      Decription:
9401  *      Physically copy unaligned data
9402  *
9403  *      Implementation:
9404  *      Unaligned parts of pages have to be physically copied.  We use
9405  *      a modified form of vm_fault_copy (which understands none-aligned
9406  *      page offsets and sizes) to do the copy.  We attempt to copy as
9407  *      much memory in one go as possibly, however vm_fault_copy copies
9408  *      within 1 memory object so we have to find the smaller of "amount left"
9409  *      "source object data size" and "target object data size".  With
9410  *      unaligned data we don't need to split regions, therefore the source
9411  *      (copy) object should be one map entry, the target range may be split
9412  *      over multiple map entries however.  In any event we are pessimistic
9413  *      about these assumptions.
9414  *
9415  *      Assumptions:
9416  *      dst_map is locked on entry and is return locked on success,
9417  *      unlocked on error.
9418  */
9419
9420 static kern_return_t
9421 vm_map_copy_overwrite_unaligned(
9422         vm_map_t        dst_map,
9423         vm_map_entry_t  entry,
9424         vm_map_copy_t   copy,
9425         vm_map_offset_t start,
9426         boolean_t       discard_on_success)
9427 {
9428         vm_map_entry_t          copy_entry;
9429         vm_map_entry_t          copy_entry_next;
9430         vm_map_version_t        version;
9431         vm_object_t             dst_object;
9432         vm_object_offset_t      dst_offset;
9433         vm_object_offset_t      src_offset;
9434         vm_object_offset_t      entry_offset;
9435         vm_map_offset_t         entry_end;
9436         vm_map_size_t           src_size,
9437                                 dst_size,
9438                                 copy_size,
9439                                 amount_left;
9440         kern_return_t           kr = KERN_SUCCESS;
9441
9442
9443         copy_entry = vm_map_copy_first_entry(copy);
9444
9445         vm_map_lock_write_to_read(dst_map);
9446
9447         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9448         amount_left = copy->size;
9449 /*
9450  *      unaligned so we never clipped this entry, we need the offset into
9451  *      the vm_object not just the data.
9452  */
9453         while (amount_left > 0) {
9454
9455                 if (entry == vm_map_to_entry(dst_map)) {
9456                         vm_map_unlock_read(dst_map);
9457                         return KERN_INVALID_ADDRESS;
9458                 }
9459
9460                 /* "start" must be within the current map entry */
9461                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
9462
9463                 dst_offset = start - entry->vme_start;
9464
9465                 dst_size = entry->vme_end - start;
9466
9467                 src_size = copy_entry->vme_end -
9468                         (copy_entry->vme_start + src_offset);
9469
9470                 if (dst_size < src_size) {
9471 /*
9472  *                      we can only copy dst_size bytes before
9473  *                      we have to get the next destination entry
9474  */
9475                         copy_size = dst_size;
9476                 } else {
9477 /*
9478  *                      we can only copy src_size bytes before
9479  *                      we have to get the next source copy entry
9480  */
9481                         copy_size = src_size;
9482                 }
9483
9484                 if (copy_size > amount_left) {
9485                         copy_size = amount_left;
9486                 }
9487 /*
9488  *              Entry needs copy, create a shadow shadow object for
9489  *              Copy on write region.
9490  */
9491                 if (entry->needs_copy &&
9492                     ((entry->protection & VM_PROT_WRITE) != 0))
9493                 {
9494                         if (vm_map_lock_read_to_write(dst_map)) {
9495                                 vm_map_lock_read(dst_map);
9496                                 goto RetryLookup;
9497                         }
9498                         VME_OBJECT_SHADOW(entry,
9499                                           (vm_map_size_t)(entry->vme_end
9500                                                           - entry->vme_start));
9501                         entry->needs_copy = FALSE;
9502                         vm_map_lock_write_to_read(dst_map);
9503                 }
9504                 dst_object = VME_OBJECT(entry);
9505 /*
9506  *              unlike with the virtual (aligned) copy we're going
9507  *              to fault on it therefore we need a target object.
9508  */
9509                 if (dst_object == VM_OBJECT_NULL) {
9510                         if (vm_map_lock_read_to_write(dst_map)) {
9511                                 vm_map_lock_read(dst_map);
9512                                 goto RetryLookup;
9513                         }
9514                         dst_object = vm_object_allocate((vm_map_size_t)
9515                                                         entry->vme_end - entry->vme_start);
9516                         VME_OBJECT(entry) = dst_object;
9517                         VME_OFFSET_SET(entry, 0);
9518                         assert(entry->use_pmap);
9519                         vm_map_lock_write_to_read(dst_map);
9520                 }
9521 /*
9522  *              Take an object reference and unlock map. The "entry" may
9523  *              disappear or change when the map is unlocked.
9524  */
9525                 vm_object_reference(dst_object);
9526                 version.main_timestamp = dst_map->timestamp;
9527                 entry_offset = VME_OFFSET(entry);
9528                 entry_end = entry->vme_end;
9529                 vm_map_unlock_read(dst_map);
9530 /*
9531  *              Copy as much as possible in one pass
9532  */
9533                 kr = vm_fault_copy(
9534                         VME_OBJECT(copy_entry),
9535                         VME_OFFSET(copy_entry) + src_offset,
9536                         &copy_size,
9537                         dst_object,
9538                         entry_offset + dst_offset,
9539                         dst_map,
9540                         &version,
9541                         THREAD_UNINT );
9542
9543                 start += copy_size;
9544                 src_offset += copy_size;
9545                 amount_left -= copy_size;
9546 /*
9547  *              Release the object reference
9548  */
9549                 vm_object_deallocate(dst_object);
9550 /*
9551  *              If a hard error occurred, return it now
9552  */
9553                 if (kr != KERN_SUCCESS)
9554                         return kr;
9555
9556                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9557                     || amount_left == 0)
9558                 {
9559 /*
9560  *                      all done with this copy entry, dispose.
9561  */
9562                         copy_entry_next = copy_entry->vme_next;
9563
9564                         if (discard_on_success) {
9565                                 vm_map_copy_entry_unlink(copy, copy_entry);
9566                                 assert(!copy_entry->is_sub_map);
9567                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9568                                 vm_map_copy_entry_dispose(copy, copy_entry);
9569                         }
9570
9571                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9572                             amount_left) {
9573 /*
9574  *                              not finished copying but run out of source
9575  */
9576                                 return KERN_INVALID_ADDRESS;
9577                         }
9578
9579                         copy_entry = copy_entry_next;
9580
9581                         src_offset = 0;
9582                 }
9583
9584                 if (amount_left == 0)
9585                         return KERN_SUCCESS;
9586
9587                 vm_map_lock_read(dst_map);
9588                 if (version.main_timestamp == dst_map->timestamp) {
9589                         if (start == entry_end) {
9590 /*
9591  *                              destination region is split.  Use the version
9592  *                              information to avoid a lookup in the normal
9593  *                              case.
9594  */
9595                                 entry = entry->vme_next;
9596 /*
9597  *                              should be contiguous. Fail if we encounter
9598  *                              a hole in the destination.
9599  */
9600                                 if (start != entry->vme_start) {
9601                                         vm_map_unlock_read(dst_map);
9602                                         return KERN_INVALID_ADDRESS ;
9603                                 }
9604                         }
9605                 } else {
9606 /*
9607  *                      Map version check failed.
9608  *                      we must lookup the entry because somebody
9609  *                      might have changed the map behind our backs.
9610  */
9611                 RetryLookup:
9612                         if (!vm_map_lookup_entry(dst_map, start, &entry))
9613                         {
9614                                 vm_map_unlock_read(dst_map);
9615                                 return KERN_INVALID_ADDRESS ;
9616                         }
9617                 }
9618         }/* while */
9619
9620         return KERN_SUCCESS;
9621 }/* vm_map_copy_overwrite_unaligned */
9622
9623 /*
9624  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
9625  *
9626  *      Description:
9627  *      Does all the vm_trickery possible for whole pages.
9628  *
9629  *      Implementation:
9630  *
9631  *      If there are no permanent objects in the destination,
9632  *      and the source and destination map entry zones match,
9633  *      and the destination map entry is not shared,
9634  *      then the map entries can be deleted and replaced
9635  *      with those from the copy.  The following code is the
9636  *      basic idea of what to do, but there are lots of annoying
9637  *      little details about getting protection and inheritance
9638  *      right.  Should add protection, inheritance, and sharing checks
9639  *      to the above pass and make sure that no wiring is involved.
9640  */
9641
9642 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9643 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9644 int vm_map_copy_overwrite_aligned_src_large = 0;
9645
9646 static kern_return_t
9647 vm_map_copy_overwrite_aligned(
9648         vm_map_t        dst_map,
9649         vm_map_entry_t  tmp_entry,
9650         vm_map_copy_t   copy,
9651         vm_map_offset_t start,
9652         __unused pmap_t pmap)
9653 {
9654         vm_object_t     object;
9655         vm_map_entry_t  copy_entry;
9656         vm_map_size_t   copy_size;
9657         vm_map_size_t   size;
9658         vm_map_entry_t  entry;
9659
9660         while ((copy_entry = vm_map_copy_first_entry(copy))
9661                != vm_map_copy_to_entry(copy))
9662         {
9663                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9664
9665                 entry = tmp_entry;
9666                 if (entry->is_sub_map) {
9667                         /* unnested when clipped earlier */
9668                         assert(!entry->use_pmap);
9669                 }
9670                 if (entry == vm_map_to_entry(dst_map)) {
9671                         vm_map_unlock(dst_map);
9672                         return KERN_INVALID_ADDRESS;
9673                 }
9674                 size = (entry->vme_end - entry->vme_start);
9675                 /*
9676                  *      Make sure that no holes popped up in the
9677                  *      address map, and that the protection is
9678                  *      still valid, in case the map was unlocked
9679                  *      earlier.
9680                  */
9681
9682                 if ((entry->vme_start != start) || ((entry->is_sub_map)
9683                                                     && !entry->needs_copy)) {
9684                         vm_map_unlock(dst_map);
9685                         return(KERN_INVALID_ADDRESS);
9686                 }
9687                 assert(entry != vm_map_to_entry(dst_map));
9688
9689                 /*
9690                  *      Check protection again
9691                  */
9692
9693                 if ( ! (entry->protection & VM_PROT_WRITE)) {
9694                         vm_map_unlock(dst_map);
9695                         return(KERN_PROTECTION_FAILURE);
9696                 }
9697
9698                 /*
9699                  *      Adjust to source size first
9700                  */
9701
9702                 if (copy_size < size) {
9703                         if (entry->map_aligned &&
9704                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9705                                                  VM_MAP_PAGE_MASK(dst_map))) {
9706                                 /* no longer map-aligned */
9707                                 entry->map_aligned = FALSE;
9708                         }
9709                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9710                         size = copy_size;
9711                 }
9712
9713                 /*
9714                  *      Adjust to destination size
9715                  */
9716
9717                 if (size < copy_size) {
9718                         vm_map_copy_clip_end(copy, copy_entry,
9719                                              copy_entry->vme_start + size);
9720                         copy_size = size;
9721                 }
9722
9723                 assert((entry->vme_end - entry->vme_start) == size);
9724                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9725                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9726
9727                 /*
9728                  *      If the destination contains temporary unshared memory,
9729                  *      we can perform the copy by throwing it away and
9730                  *      installing the source data.
9731                  */
9732
9733                 object = VME_OBJECT(entry);
9734                 if ((!entry->is_shared &&
9735                      ((object == VM_OBJECT_NULL) ||
9736                       (object->internal && !object->true_share))) ||
9737                     entry->needs_copy) {
9738                         vm_object_t     old_object = VME_OBJECT(entry);
9739                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
9740                         vm_object_offset_t      offset;
9741
9742                         /*
9743                          * Ensure that the source and destination aren't
9744                          * identical
9745                          */
9746                         if (old_object == VME_OBJECT(copy_entry) &&
9747                             old_offset == VME_OFFSET(copy_entry)) {
9748                                 vm_map_copy_entry_unlink(copy, copy_entry);
9749                                 vm_map_copy_entry_dispose(copy, copy_entry);
9750
9751                                 if (old_object != VM_OBJECT_NULL)
9752                                         vm_object_deallocate(old_object);
9753
9754                                 start = tmp_entry->vme_end;
9755                                 tmp_entry = tmp_entry->vme_next;
9756                                 continue;
9757                         }
9758
9759 #if !CONFIG_EMBEDDED
9760 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9761 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
9762                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9763                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9764                             copy_size <= __TRADEOFF1_COPY_SIZE) {
9765                                 /*
9766                                  * Virtual vs. Physical copy tradeoff #1.
9767                                  *
9768                                  * Copying only a few pages out of a large
9769                                  * object:  do a physical copy instead of
9770                                  * a virtual copy, to avoid possibly keeping
9771                                  * the entire large object alive because of
9772                                  * those few copy-on-write pages.
9773                                  */
9774                                 vm_map_copy_overwrite_aligned_src_large++;
9775                                 goto slow_copy;
9776                         }
9777 #endif /* !CONFIG_EMBEDDED */
9778
9779                         if ((dst_map->pmap != kernel_pmap) &&
9780                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9781                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
9782                                 vm_object_t new_object, new_shadow;
9783
9784                                 /*
9785                                  * We're about to map something over a mapping
9786                                  * established by malloc()...
9787                                  */
9788                                 new_object = VME_OBJECT(copy_entry);
9789                                 if (new_object != VM_OBJECT_NULL) {
9790                                         vm_object_lock_shared(new_object);
9791                                 }
9792                                 while (new_object != VM_OBJECT_NULL &&
9793 #if !CONFIG_EMBEDDED
9794                                        !new_object->true_share &&
9795                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9796 #endif /* !CONFIG_EMBEDDED */
9797                                        new_object->internal) {
9798                                         new_shadow = new_object->shadow;
9799                                         if (new_shadow == VM_OBJECT_NULL) {
9800                                                 break;
9801                                         }
9802                                         vm_object_lock_shared(new_shadow);
9803                                         vm_object_unlock(new_object);
9804                                         new_object = new_shadow;
9805                                 }
9806                                 if (new_object != VM_OBJECT_NULL) {
9807                                         if (!new_object->internal) {
9808                                                 /*
9809                                                  * The new mapping is backed
9810                                                  * by an external object.  We
9811                                                  * don't want malloc'ed memory
9812                                                  * to be replaced with such a
9813                                                  * non-anonymous mapping, so
9814                                                  * let's go off the optimized
9815                                                  * path...
9816                                                  */
9817                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
9818                                                 vm_object_unlock(new_object);
9819                                                 goto slow_copy;
9820                                         }
9821 #if !CONFIG_EMBEDDED
9822                                         if (new_object->true_share ||
9823                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9824                                                 /*
9825                                                  * Same if there's a "true_share"
9826                                                  * object in the shadow chain, or
9827                                                  * an object with a non-default
9828                                                  * (SYMMETRIC) copy strategy.
9829                                                  */
9830                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9831                                                 vm_object_unlock(new_object);
9832                                                 goto slow_copy;
9833                                         }
9834 #endif /* !CONFIG_EMBEDDED */
9835                                         vm_object_unlock(new_object);
9836                                 }
9837                                 /*
9838                                  * The new mapping is still backed by
9839                                  * anonymous (internal) memory, so it's
9840                                  * OK to substitute it for the original
9841                                  * malloc() mapping.
9842                                  */
9843                         }
9844
9845                         if (old_object != VM_OBJECT_NULL) {
9846                                 if(entry->is_sub_map) {
9847                                         if(entry->use_pmap) {
9848 #ifndef NO_NESTED_PMAP
9849                                                 pmap_unnest(dst_map->pmap,
9850                                                             (addr64_t)entry->vme_start,
9851                                                             entry->vme_end - entry->vme_start);
9852 #endif  /* NO_NESTED_PMAP */
9853                                                 if(dst_map->mapped_in_other_pmaps) {
9854                                                         /* clean up parent */
9855                                                         /* map/maps */
9856                                                         vm_map_submap_pmap_clean(
9857                                                                 dst_map, entry->vme_start,
9858                                                                 entry->vme_end,
9859                                                                 VME_SUBMAP(entry),
9860                                                                 VME_OFFSET(entry));
9861                                                 }
9862                                         } else {
9863                                                 vm_map_submap_pmap_clean(
9864                                                         dst_map, entry->vme_start,
9865                                                         entry->vme_end,
9866                                                         VME_SUBMAP(entry),
9867                                                         VME_OFFSET(entry));
9868                                         }
9869                                         vm_map_deallocate(VME_SUBMAP(entry));
9870                                 } else {
9871                                         if(dst_map->mapped_in_other_pmaps) {
9872                                                 vm_object_pmap_protect_options(
9873                                                         VME_OBJECT(entry),
9874                                                         VME_OFFSET(entry),
9875                                                         entry->vme_end
9876                                                         - entry->vme_start,
9877                                                         PMAP_NULL,
9878                                                         entry->vme_start,
9879                                                         VM_PROT_NONE,
9880                                                         PMAP_OPTIONS_REMOVE);
9881                                         } else {
9882                                                 pmap_remove_options(
9883                                                         dst_map->pmap,
9884                                                         (addr64_t)(entry->vme_start),
9885                                                         (addr64_t)(entry->vme_end),
9886                                                         PMAP_OPTIONS_REMOVE);
9887                                         }
9888                                         vm_object_deallocate(old_object);
9889                                 }
9890                         }
9891
9892                         if (entry->iokit_acct) {
9893                                 /* keep using iokit accounting */
9894                                 entry->use_pmap = FALSE;
9895                         } else {
9896                                 /* use pmap accounting */
9897                                 entry->use_pmap = TRUE;
9898                         }
9899                         entry->is_sub_map = FALSE;
9900                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9901                         object = VME_OBJECT(entry);
9902                         entry->needs_copy = copy_entry->needs_copy;
9903                         entry->wired_count = 0;
9904                         entry->user_wired_count = 0;
9905                         offset = VME_OFFSET(copy_entry);
9906                         VME_OFFSET_SET(entry, offset);
9907
9908                         vm_map_copy_entry_unlink(copy, copy_entry);
9909                         vm_map_copy_entry_dispose(copy, copy_entry);
9910
9911                         /*
9912                          * we could try to push pages into the pmap at this point, BUT
9913                          * this optimization only saved on average 2 us per page if ALL
9914                          * the pages in the source were currently mapped
9915                          * and ALL the pages in the dest were touched, if there were fewer
9916                          * than 2/3 of the pages touched, this optimization actually cost more cycles
9917                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
9918                          */
9919
9920                         /*
9921                          *      Set up for the next iteration.  The map
9922                          *      has not been unlocked, so the next
9923                          *      address should be at the end of this
9924                          *      entry, and the next map entry should be
9925                          *      the one following it.
9926                          */
9927
9928                         start = tmp_entry->vme_end;
9929                         tmp_entry = tmp_entry->vme_next;
9930                 } else {
9931                         vm_map_version_t        version;
9932                         vm_object_t             dst_object;
9933                         vm_object_offset_t      dst_offset;
9934                         kern_return_t           r;
9935
9936                 slow_copy:
9937                         if (entry->needs_copy) {
9938                                 VME_OBJECT_SHADOW(entry,
9939                                                   (entry->vme_end -
9940                                                    entry->vme_start));
9941                                 entry->needs_copy = FALSE;
9942                         }
9943
9944                         dst_object = VME_OBJECT(entry);
9945                         dst_offset = VME_OFFSET(entry);
9946
9947                         /*
9948                          *      Take an object reference, and record
9949                          *      the map version information so that the
9950                          *      map can be safely unlocked.
9951                          */
9952
9953                         if (dst_object == VM_OBJECT_NULL) {
9954                                 /*
9955                                  * We would usually have just taken the
9956                                  * optimized path above if the destination
9957                                  * object has not been allocated yet.  But we
9958                                  * now disable that optimization if the copy
9959                                  * entry's object is not backed by anonymous
9960                                  * memory to avoid replacing malloc'ed
9961                                  * (i.e. re-usable) anonymous memory with a
9962                                  * not-so-anonymous mapping.
9963                                  * So we have to handle this case here and
9964                                  * allocate a new VM object for this map entry.
9965                                  */
9966                                 dst_object = vm_object_allocate(
9967                                         entry->vme_end - entry->vme_start);
9968                                 dst_offset = 0;
9969                                 VME_OBJECT_SET(entry, dst_object);
9970                                 VME_OFFSET_SET(entry, dst_offset);
9971                                 assert(entry->use_pmap);
9972
9973                         }
9974
9975                         vm_object_reference(dst_object);
9976
9977                         /* account for unlock bumping up timestamp */
9978                         version.main_timestamp = dst_map->timestamp + 1;
9979
9980                         vm_map_unlock(dst_map);
9981
9982                         /*
9983                          *      Copy as much as possible in one pass
9984                          */
9985
9986                         copy_size = size;
9987                         r = vm_fault_copy(
9988                                 VME_OBJECT(copy_entry),
9989                                 VME_OFFSET(copy_entry),
9990                                 &copy_size,
9991                                 dst_object,
9992                                 dst_offset,
9993                                 dst_map,
9994                                 &version,
9995                                 THREAD_UNINT );
9996
9997                         /*
9998                          *      Release the object reference
9999                          */
10000
10001                         vm_object_deallocate(dst_object);
10002
10003                         /*
10004                          *      If a hard error occurred, return it now
10005                          */
10006
10007                         if (r != KERN_SUCCESS)
10008                                 return(r);
10009
10010                         if (copy_size != 0) {
10011                                 /*
10012                                  *      Dispose of the copied region
10013                                  */
10014
10015                                 vm_map_copy_clip_end(copy, copy_entry,
10016                                                      copy_entry->vme_start + copy_size);
10017                                 vm_map_copy_entry_unlink(copy, copy_entry);
10018                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10019                                 vm_map_copy_entry_dispose(copy, copy_entry);
10020                         }
10021
10022                         /*
10023                          *      Pick up in the destination map where we left off.
10024                          *
10025                          *      Use the version information to avoid a lookup
10026                          *      in the normal case.
10027                          */
10028
10029                         start += copy_size;
10030                         vm_map_lock(dst_map);
10031                         if (version.main_timestamp == dst_map->timestamp &&
10032                             copy_size != 0) {
10033                                 /* We can safely use saved tmp_entry value */
10034
10035                                 if (tmp_entry->map_aligned &&
10036                                     !VM_MAP_PAGE_ALIGNED(
10037                                             start,
10038                                             VM_MAP_PAGE_MASK(dst_map))) {
10039                                         /* no longer map-aligned */
10040                                         tmp_entry->map_aligned = FALSE;
10041                                 }
10042                                 vm_map_clip_end(dst_map, tmp_entry, start);
10043                                 tmp_entry = tmp_entry->vme_next;
10044                         } else {
10045                                 /* Must do lookup of tmp_entry */
10046
10047                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10048                                         vm_map_unlock(dst_map);
10049                                         return(KERN_INVALID_ADDRESS);
10050                                 }
10051                                 if (tmp_entry->map_aligned &&
10052                                     !VM_MAP_PAGE_ALIGNED(
10053                                             start,
10054                                             VM_MAP_PAGE_MASK(dst_map))) {
10055                                         /* no longer map-aligned */
10056                                         tmp_entry->map_aligned = FALSE;
10057                                 }
10058                                 vm_map_clip_start(dst_map, tmp_entry, start);
10059                         }
10060                 }
10061         }/* while */
10062
10063         return(KERN_SUCCESS);
10064 }/* vm_map_copy_overwrite_aligned */
10065
10066 /*
10067  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
10068  *
10069  *      Description:
10070  *              Copy in data to a kernel buffer from space in the
10071  *              source map. The original space may be optionally
10072  *              deallocated.
10073  *
10074  *              If successful, returns a new copy object.
10075  */
10076 static kern_return_t
10077 vm_map_copyin_kernel_buffer(
10078         vm_map_t        src_map,
10079         vm_map_offset_t src_addr,
10080         vm_map_size_t   len,
10081         boolean_t       src_destroy,
10082         vm_map_copy_t   *copy_result)
10083 {
10084         kern_return_t kr;
10085         vm_map_copy_t copy;
10086         vm_size_t kalloc_size;
10087
10088         if (len > msg_ool_size_small)
10089                 return KERN_INVALID_ARGUMENT;
10090
10091         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10092
10093         copy = (vm_map_copy_t)kalloc(kalloc_size);
10094         if (copy == VM_MAP_COPY_NULL)
10095                 return KERN_RESOURCE_SHORTAGE;
10096         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10097         copy->size = len;
10098         copy->offset = 0;
10099
10100         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10101         if (kr != KERN_SUCCESS) {
10102                 kfree(copy, kalloc_size);
10103                 return kr;
10104         }
10105         if (src_destroy) {
10106                 (void) vm_map_remove(
10107                         src_map,
10108                         vm_map_trunc_page(src_addr,
10109                                           VM_MAP_PAGE_MASK(src_map)),
10110                         vm_map_round_page(src_addr + len,
10111                                           VM_MAP_PAGE_MASK(src_map)),
10112                         (VM_MAP_REMOVE_INTERRUPTIBLE |
10113                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10114                          ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10115         }
10116         *copy_result = copy;
10117         return KERN_SUCCESS;
10118 }
10119
10120 /*
10121  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
10122  *
10123  *      Description:
10124  *              Copy out data from a kernel buffer into space in the
10125  *              destination map. The space may be otpionally dynamically
10126  *              allocated.
10127  *
10128  *              If successful, consumes the copy object.
10129  *              Otherwise, the caller is responsible for it.
10130  */
10131 static int vm_map_copyout_kernel_buffer_failures = 0;
10132 static kern_return_t
10133 vm_map_copyout_kernel_buffer(
10134         vm_map_t                map,
10135         vm_map_address_t        *addr,  /* IN/OUT */
10136         vm_map_copy_t           copy,
10137         vm_map_size_t           copy_size,
10138         boolean_t               overwrite,
10139         boolean_t               consume_on_success)
10140 {
10141         kern_return_t kr = KERN_SUCCESS;
10142         thread_t thread = current_thread();
10143
10144         assert(copy->size == copy_size);
10145
10146         /*
10147          * check for corrupted vm_map_copy structure
10148          */
10149         if (copy_size > msg_ool_size_small || copy->offset)
10150                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10151                       (long long)copy->size, (long long)copy->offset);
10152
10153         if (!overwrite) {
10154
10155                 /*
10156                  * Allocate space in the target map for the data
10157                  */
10158                 *addr = 0;
10159                 kr = vm_map_enter(map,
10160                                   addr,
10161                                   vm_map_round_page(copy_size,
10162                                                     VM_MAP_PAGE_MASK(map)),
10163                                   (vm_map_offset_t) 0,
10164                                   VM_FLAGS_ANYWHERE,
10165                                   VM_MAP_KERNEL_FLAGS_NONE,
10166                                   VM_KERN_MEMORY_NONE,
10167                                   VM_OBJECT_NULL,
10168                                   (vm_object_offset_t) 0,
10169                                   FALSE,
10170                                   VM_PROT_DEFAULT,
10171                                   VM_PROT_ALL,
10172                                   VM_INHERIT_DEFAULT);
10173                 if (kr != KERN_SUCCESS)
10174                         return kr;
10175 #if KASAN
10176                 if (map->pmap == kernel_pmap) {
10177                         kasan_notify_address(*addr, copy->size);
10178                 }
10179 #endif
10180         }
10181
10182         /*
10183          * Copyout the data from the kernel buffer to the target map.
10184          */
10185         if (thread->map == map) {
10186
10187                 /*
10188                  * If the target map is the current map, just do
10189                  * the copy.
10190                  */
10191                 assert((vm_size_t)copy_size == copy_size);
10192                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10193                         kr = KERN_INVALID_ADDRESS;
10194                 }
10195         }
10196         else {
10197                 vm_map_t oldmap;
10198
10199                 /*
10200                  * If the target map is another map, assume the
10201                  * target's address space identity for the duration
10202                  * of the copy.
10203                  */
10204                 vm_map_reference(map);
10205                 oldmap = vm_map_switch(map);
10206
10207                 assert((vm_size_t)copy_size == copy_size);
10208                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10209                         vm_map_copyout_kernel_buffer_failures++;
10210                         kr = KERN_INVALID_ADDRESS;
10211                 }
10212
10213                 (void) vm_map_switch(oldmap);
10214                 vm_map_deallocate(map);
10215         }
10216
10217         if (kr != KERN_SUCCESS) {
10218                 /* the copy failed, clean up */
10219                 if (!overwrite) {
10220                         /*
10221                          * Deallocate the space we allocated in the target map.
10222                          */
10223                         (void) vm_map_remove(
10224                                 map,
10225                                 vm_map_trunc_page(*addr,
10226                                                   VM_MAP_PAGE_MASK(map)),
10227                                 vm_map_round_page((*addr +
10228                                                    vm_map_round_page(copy_size,
10229                                                                      VM_MAP_PAGE_MASK(map))),
10230                                                   VM_MAP_PAGE_MASK(map)),
10231                                 VM_MAP_REMOVE_NO_FLAGS);
10232                         *addr = 0;
10233                 }
10234         } else {
10235                 /* copy was successful, dicard the copy structure */
10236                 if (consume_on_success) {
10237                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
10238                 }
10239         }
10240
10241         return kr;
10242 }
10243
10244 /*
10245  *      Routine:        vm_map_copy_insert      [internal use only]
10246  *
10247  *      Description:
10248  *              Link a copy chain ("copy") into a map at the
10249  *              specified location (after "where").
10250  *      Side effects:
10251  *              The copy chain is destroyed.
10252  */
10253 static void
10254 vm_map_copy_insert(
10255         vm_map_t        map,
10256         vm_map_entry_t  after_where,
10257         vm_map_copy_t   copy)
10258 {
10259         vm_map_entry_t  entry;
10260
10261         while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10262                 entry = vm_map_copy_first_entry(copy);
10263                 vm_map_copy_entry_unlink(copy, entry);
10264                 vm_map_store_entry_link(map, after_where, entry,
10265                                         VM_MAP_KERNEL_FLAGS_NONE);
10266                 after_where = entry;
10267         }
10268         zfree(vm_map_copy_zone, copy);
10269 }
10270
10271 void
10272 vm_map_copy_remap(
10273         vm_map_t        map,
10274         vm_map_entry_t  where,
10275         vm_map_copy_t   copy,
10276         vm_map_offset_t adjustment,
10277         vm_prot_t       cur_prot,
10278         vm_prot_t       max_prot,
10279         vm_inherit_t    inheritance)
10280 {
10281         vm_map_entry_t  copy_entry, new_entry;
10282
10283         for (copy_entry = vm_map_copy_first_entry(copy);
10284              copy_entry != vm_map_copy_to_entry(copy);
10285              copy_entry = copy_entry->vme_next) {
10286                 /* get a new VM map entry for the map */
10287                 new_entry = vm_map_entry_create(map,
10288                                                 !map->hdr.entries_pageable);
10289                 /* copy the "copy entry" to the new entry */
10290                 vm_map_entry_copy(new_entry, copy_entry);
10291                 /* adjust "start" and "end" */
10292                 new_entry->vme_start += adjustment;
10293                 new_entry->vme_end += adjustment;
10294                 /* clear some attributes */
10295                 new_entry->inheritance = inheritance;
10296                 new_entry->protection = cur_prot;
10297                 new_entry->max_protection = max_prot;
10298                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10299                 /* take an extra reference on the entry's "object" */
10300                 if (new_entry->is_sub_map) {
10301                         assert(!new_entry->use_pmap); /* not nested */
10302                         vm_map_lock(VME_SUBMAP(new_entry));
10303                         vm_map_reference(VME_SUBMAP(new_entry));
10304                         vm_map_unlock(VME_SUBMAP(new_entry));
10305                 } else {
10306                         vm_object_reference(VME_OBJECT(new_entry));
10307                 }
10308                 /* insert the new entry in the map */
10309                 vm_map_store_entry_link(map, where, new_entry,
10310                                         VM_MAP_KERNEL_FLAGS_NONE);
10311                 /* continue inserting the "copy entries" after the new entry */
10312                 where = new_entry;
10313         }
10314 }
10315
10316
10317 /*
10318  * Returns true if *size matches (or is in the range of) copy->size.
10319  * Upon returning true, the *size field is updated with the actual size of the
10320  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10321  */
10322 boolean_t
10323 vm_map_copy_validate_size(
10324         vm_map_t                dst_map,
10325         vm_map_copy_t           copy,
10326         vm_map_size_t           *size)
10327 {
10328         if (copy == VM_MAP_COPY_NULL)
10329                 return FALSE;
10330         vm_map_size_t copy_sz = copy->size;
10331         vm_map_size_t sz = *size;
10332         switch (copy->type) {
10333         case VM_MAP_COPY_OBJECT:
10334         case VM_MAP_COPY_KERNEL_BUFFER:
10335                 if (sz == copy_sz)
10336                         return TRUE;
10337                 break;
10338         case VM_MAP_COPY_ENTRY_LIST:
10339                 /*
10340                  * potential page-size rounding prevents us from exactly
10341                  * validating this flavor of vm_map_copy, but we can at least
10342                  * assert that it's within a range.
10343                  */
10344                 if (copy_sz >= sz &&
10345                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10346                         *size = copy_sz;
10347                         return TRUE;
10348                 }
10349                 break;
10350         default:
10351                 break;
10352         }
10353         return FALSE;
10354 }
10355
10356 /*
10357  *      Routine:        vm_map_copyout_size
10358  *
10359  *      Description:
10360  *              Copy out a copy chain ("copy") into newly-allocated
10361  *              space in the destination map. Uses a prevalidated
10362  *              size for the copy object (vm_map_copy_validate_size).
10363  *
10364  *              If successful, consumes the copy object.
10365  *              Otherwise, the caller is responsible for it.
10366  */
10367 kern_return_t
10368 vm_map_copyout_size(
10369         vm_map_t                dst_map,
10370         vm_map_address_t        *dst_addr,      /* OUT */
10371         vm_map_copy_t           copy,
10372         vm_map_size_t           copy_size)
10373 {
10374         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10375                                        TRUE, /* consume_on_success */
10376                                        VM_PROT_DEFAULT,
10377                                        VM_PROT_ALL,
10378                                        VM_INHERIT_DEFAULT);
10379 }
10380
10381 /*
10382  *      Routine:        vm_map_copyout
10383  *
10384  *      Description:
10385  *              Copy out a copy chain ("copy") into newly-allocated
10386  *              space in the destination map.
10387  *
10388  *              If successful, consumes the copy object.
10389  *              Otherwise, the caller is responsible for it.
10390  */
10391 kern_return_t
10392 vm_map_copyout(
10393         vm_map_t                dst_map,
10394         vm_map_address_t        *dst_addr,      /* OUT */
10395         vm_map_copy_t           copy)
10396 {
10397         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10398                                        TRUE, /* consume_on_success */
10399                                        VM_PROT_DEFAULT,
10400                                        VM_PROT_ALL,
10401                                        VM_INHERIT_DEFAULT);
10402 }
10403
10404 kern_return_t
10405 vm_map_copyout_internal(
10406         vm_map_t                dst_map,
10407         vm_map_address_t        *dst_addr,      /* OUT */
10408         vm_map_copy_t           copy,
10409         vm_map_size_t           copy_size,
10410         boolean_t               consume_on_success,
10411         vm_prot_t               cur_protection,
10412         vm_prot_t               max_protection,
10413         vm_inherit_t            inheritance)
10414 {
10415         vm_map_size_t           size;
10416         vm_map_size_t           adjustment;
10417         vm_map_offset_t         start;
10418         vm_object_offset_t      vm_copy_start;
10419         vm_map_entry_t          last;
10420         vm_map_entry_t          entry;
10421         vm_map_entry_t          hole_entry;
10422
10423         /*
10424          *      Check for null copy object.
10425          */
10426
10427         if (copy == VM_MAP_COPY_NULL) {
10428                 *dst_addr = 0;
10429                 return(KERN_SUCCESS);
10430         }
10431
10432         if (copy->size != copy_size) {
10433                 *dst_addr = 0;
10434                 return KERN_FAILURE;
10435         }
10436
10437         /*
10438          *      Check for special copy object, created
10439          *      by vm_map_copyin_object.
10440          */
10441
10442         if (copy->type == VM_MAP_COPY_OBJECT) {
10443                 vm_object_t             object = copy->cpy_object;
10444                 kern_return_t           kr;
10445                 vm_object_offset_t      offset;
10446
10447                 offset = vm_object_trunc_page(copy->offset);
10448                 size = vm_map_round_page((copy_size +
10449                                           (vm_map_size_t)(copy->offset -
10450                                                           offset)),
10451                                          VM_MAP_PAGE_MASK(dst_map));
10452                 *dst_addr = 0;
10453                 kr = vm_map_enter(dst_map, dst_addr, size,
10454                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10455                                   VM_MAP_KERNEL_FLAGS_NONE,
10456                                   VM_KERN_MEMORY_NONE,
10457                                   object, offset, FALSE,
10458                                   VM_PROT_DEFAULT, VM_PROT_ALL,
10459                                   VM_INHERIT_DEFAULT);
10460                 if (kr != KERN_SUCCESS)
10461                         return(kr);
10462                 /* Account for non-pagealigned copy object */
10463                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10464                 if (consume_on_success)
10465                         zfree(vm_map_copy_zone, copy);
10466                 return(KERN_SUCCESS);
10467         }
10468
10469         /*
10470          *      Check for special kernel buffer allocated
10471          *      by new_ipc_kmsg_copyin.
10472          */
10473
10474         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10475                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10476                                                     copy, copy_size, FALSE,
10477                                                     consume_on_success);
10478         }
10479
10480
10481         /*
10482          *      Find space for the data
10483          */
10484
10485         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10486                                           VM_MAP_COPY_PAGE_MASK(copy));
10487         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10488                                  VM_MAP_COPY_PAGE_MASK(copy))
10489                 - vm_copy_start;
10490
10491
10492 StartAgain: ;
10493
10494         vm_map_lock(dst_map);
10495         if( dst_map->disable_vmentry_reuse == TRUE) {
10496                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10497                 last = entry;
10498         } else {
10499                 if (dst_map->holelistenabled) {
10500                         hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10501
10502                         if (hole_entry == NULL) {
10503                                 /*
10504                                  * No more space in the map?
10505                                  */
10506                                 vm_map_unlock(dst_map);
10507                                 return(KERN_NO_SPACE);
10508                         }
10509
10510                         last = hole_entry;
10511                         start = last->vme_start;
10512                 } else {
10513                         assert(first_free_is_valid(dst_map));
10514                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10515                         vm_map_min(dst_map) : last->vme_end;
10516                 }
10517                 start = vm_map_round_page(start,
10518                                           VM_MAP_PAGE_MASK(dst_map));
10519         }
10520
10521         while (TRUE) {
10522                 vm_map_entry_t  next = last->vme_next;
10523                 vm_map_offset_t end = start + size;
10524
10525                 if ((end > dst_map->max_offset) || (end < start)) {
10526                         if (dst_map->wait_for_space) {
10527                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10528                                         assert_wait((event_t) dst_map,
10529                                                     THREAD_INTERRUPTIBLE);
10530                                         vm_map_unlock(dst_map);
10531                                         thread_block(THREAD_CONTINUE_NULL);
10532                                         goto StartAgain;
10533                                 }
10534                         }
10535                         vm_map_unlock(dst_map);
10536                         return(KERN_NO_SPACE);
10537                 }
10538
10539                 if (dst_map->holelistenabled) {
10540                         if (last->vme_end >= end)
10541                                 break;
10542                 } else {
10543                         /*
10544                          *      If there are no more entries, we must win.
10545                          *
10546                          *      OR
10547                          *
10548                          *      If there is another entry, it must be
10549                          *      after the end of the potential new region.
10550                          */
10551
10552                         if (next == vm_map_to_entry(dst_map))
10553                                 break;
10554
10555                         if (next->vme_start >= end)
10556                                 break;
10557                 }
10558
10559                 last = next;
10560
10561                 if (dst_map->holelistenabled) {
10562                         if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10563                                 /*
10564                                  * Wrapped around
10565                                  */
10566                                 vm_map_unlock(dst_map);
10567                                 return(KERN_NO_SPACE);
10568                         }
10569                         start = last->vme_start;
10570                 } else {
10571                         start = last->vme_end;
10572                 }
10573                 start = vm_map_round_page(start,
10574                                           VM_MAP_PAGE_MASK(dst_map));
10575         }
10576
10577         if (dst_map->holelistenabled) {
10578                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10579                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10580                 }
10581         }
10582
10583
10584         adjustment = start - vm_copy_start;
10585         if (! consume_on_success) {
10586                 /*
10587                  * We're not allowed to consume "copy", so we'll have to
10588                  * copy its map entries into the destination map below.
10589                  * No need to re-allocate map entries from the correct
10590                  * (pageable or not) zone, since we'll get new map entries
10591                  * during the transfer.
10592                  * We'll also adjust the map entries's "start" and "end"
10593                  * during the transfer, to keep "copy"'s entries consistent
10594                  * with its "offset".
10595                  */
10596                 goto after_adjustments;
10597         }
10598
10599         /*
10600          *      Since we're going to just drop the map
10601          *      entries from the copy into the destination
10602          *      map, they must come from the same pool.
10603          */
10604
10605         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10606                 /*
10607                  * Mismatches occur when dealing with the default
10608                  * pager.
10609                  */
10610                 zone_t          old_zone;
10611                 vm_map_entry_t  next, new;
10612
10613                 /*
10614                  * Find the zone that the copies were allocated from
10615                  */
10616
10617                 entry = vm_map_copy_first_entry(copy);
10618
10619                 /*
10620                  * Reinitialize the copy so that vm_map_copy_entry_link
10621                  * will work.
10622                  */
10623                 vm_map_store_copy_reset(copy, entry);
10624                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10625
10626                 /*
10627                  * Copy each entry.
10628                  */
10629                 while (entry != vm_map_copy_to_entry(copy)) {
10630                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10631                         vm_map_entry_copy_full(new, entry);
10632                         assert(!new->iokit_acct);
10633                         if (new->is_sub_map) {
10634                                 /* clr address space specifics */
10635                                 new->use_pmap = FALSE;
10636                         }
10637                         vm_map_copy_entry_link(copy,
10638                                                vm_map_copy_last_entry(copy),
10639                                                new);
10640                         next = entry->vme_next;
10641                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10642                         zfree(old_zone, entry);
10643                         entry = next;
10644                 }
10645         }
10646
10647         /*
10648          *      Adjust the addresses in the copy chain, and
10649          *      reset the region attributes.
10650          */
10651
10652         for (entry = vm_map_copy_first_entry(copy);
10653              entry != vm_map_copy_to_entry(copy);
10654              entry = entry->vme_next) {
10655                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10656                         /*
10657                          * We're injecting this copy entry into a map that
10658                          * has the standard page alignment, so clear
10659                          * "map_aligned" (which might have been inherited
10660                          * from the original map entry).
10661                          */
10662                         entry->map_aligned = FALSE;
10663                 }
10664
10665                 entry->vme_start += adjustment;
10666                 entry->vme_end += adjustment;
10667
10668                 if (entry->map_aligned) {
10669                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10670                                                    VM_MAP_PAGE_MASK(dst_map)));
10671                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10672                                                    VM_MAP_PAGE_MASK(dst_map)));
10673                 }
10674
10675                 entry->inheritance = VM_INHERIT_DEFAULT;
10676                 entry->protection = VM_PROT_DEFAULT;
10677                 entry->max_protection = VM_PROT_ALL;
10678                 entry->behavior = VM_BEHAVIOR_DEFAULT;
10679
10680                 /*
10681                  * If the entry is now wired,
10682                  * map the pages into the destination map.
10683                  */
10684                 if (entry->wired_count != 0) {
10685                         vm_map_offset_t va;
10686                         vm_object_offset_t       offset;
10687                         vm_object_t object;
10688                         vm_prot_t prot;
10689                         int     type_of_fault;
10690
10691                         object = VME_OBJECT(entry);
10692                         offset = VME_OFFSET(entry);
10693                         va = entry->vme_start;
10694
10695                         pmap_pageable(dst_map->pmap,
10696                                       entry->vme_start,
10697                                       entry->vme_end,
10698                                       TRUE);
10699
10700                         while (va < entry->vme_end) {
10701                                 vm_page_t       m;
10702                                 struct vm_object_fault_info fault_info = {};
10703
10704                                 /*
10705                                  * Look up the page in the object.
10706                                  * Assert that the page will be found in the
10707                                  * top object:
10708                                  * either
10709                                  *      the object was newly created by
10710                                  *      vm_object_copy_slowly, and has
10711                                  *      copies of all of the pages from
10712                                  *      the source object
10713                                  * or
10714                                  *      the object was moved from the old
10715                                  *      map entry; because the old map
10716                                  *      entry was wired, all of the pages
10717                                  *      were in the top-level object.
10718                                  *      (XXX not true if we wire pages for
10719                                  *       reading)
10720                                  */
10721                                 vm_object_lock(object);
10722
10723                                 m = vm_page_lookup(object, offset);
10724                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10725                                     m->vmp_absent)
10726                                         panic("vm_map_copyout: wiring %p", m);
10727
10728                                 prot = entry->protection;
10729
10730                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10731                                     prot)
10732                                         prot |= VM_PROT_EXECUTE;
10733
10734                                 type_of_fault = DBG_CACHE_HIT_FAULT;
10735
10736                                 fault_info.user_tag = VME_ALIAS(entry);
10737                                 fault_info.pmap_options = 0;
10738                                 if (entry->iokit_acct ||
10739                                     (!entry->is_sub_map && !entry->use_pmap)) {
10740                                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10741                                 }
10742
10743                                 vm_fault_enter(m,
10744                                                dst_map->pmap,
10745                                                va,
10746                                                prot,
10747                                                prot,
10748                                                VM_PAGE_WIRED(m),
10749                                                FALSE, /* change_wiring */
10750                                                VM_KERN_MEMORY_NONE, /* tag - not wiring */
10751                                                &fault_info,
10752                                                NULL,  /* need_retry */
10753                                                &type_of_fault);
10754
10755                                 vm_object_unlock(object);
10756
10757                                 offset += PAGE_SIZE_64;
10758                                 va += PAGE_SIZE;
10759                         }
10760                 }
10761         }
10762
10763 after_adjustments:
10764
10765         /*
10766          *      Correct the page alignment for the result
10767          */
10768
10769         *dst_addr = start + (copy->offset - vm_copy_start);
10770
10771 #if KASAN
10772         kasan_notify_address(*dst_addr, size);
10773 #endif
10774
10775         /*
10776          *      Update the hints and the map size
10777          */
10778
10779         if (consume_on_success) {
10780                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10781         } else {
10782                 SAVE_HINT_MAP_WRITE(dst_map, last);
10783         }
10784
10785         dst_map->size += size;
10786
10787         /*
10788          *      Link in the copy
10789          */
10790
10791         if (consume_on_success) {
10792                 vm_map_copy_insert(dst_map, last, copy);
10793         } else {
10794                 vm_map_copy_remap(dst_map, last, copy, adjustment,
10795                                   cur_protection, max_protection,
10796                                   inheritance);
10797         }
10798
10799         vm_map_unlock(dst_map);
10800
10801         /*
10802          * XXX  If wiring_required, call vm_map_pageable
10803          */
10804
10805         return(KERN_SUCCESS);
10806 }
10807
10808 /*
10809  *      Routine:        vm_map_copyin
10810  *
10811  *      Description:
10812  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
10813  *
10814  */
10815
10816 #undef vm_map_copyin
10817
10818 kern_return_t
10819 vm_map_copyin(
10820         vm_map_t                        src_map,
10821         vm_map_address_t        src_addr,
10822         vm_map_size_t           len,
10823         boolean_t                       src_destroy,
10824         vm_map_copy_t           *copy_result)   /* OUT */
10825 {
10826         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10827                                         FALSE, copy_result, FALSE));
10828 }
10829
10830 /*
10831  *      Routine:        vm_map_copyin_common
10832  *
10833  *      Description:
10834  *              Copy the specified region (src_addr, len) from the
10835  *              source address space (src_map), possibly removing
10836  *              the region from the source address space (src_destroy).
10837  *
10838  *      Returns:
10839  *              A vm_map_copy_t object (copy_result), suitable for
10840  *              insertion into another address space (using vm_map_copyout),
10841  *              copying over another address space region (using
10842  *              vm_map_copy_overwrite).  If the copy is unused, it
10843  *              should be destroyed (using vm_map_copy_discard).
10844  *
10845  *      In/out conditions:
10846  *              The source map should not be locked on entry.
10847  */
10848
10849 typedef struct submap_map {
10850         vm_map_t        parent_map;
10851         vm_map_offset_t base_start;
10852         vm_map_offset_t base_end;
10853         vm_map_size_t   base_len;
10854         struct submap_map *next;
10855 } submap_map_t;
10856
10857 kern_return_t
10858 vm_map_copyin_common(
10859         vm_map_t        src_map,
10860         vm_map_address_t src_addr,
10861         vm_map_size_t   len,
10862         boolean_t       src_destroy,
10863         __unused boolean_t      src_volatile,
10864         vm_map_copy_t   *copy_result,   /* OUT */
10865         boolean_t       use_maxprot)
10866 {
10867         int flags;
10868
10869         flags = 0;
10870         if (src_destroy) {
10871                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10872         }
10873         if (use_maxprot) {
10874                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10875         }
10876         return vm_map_copyin_internal(src_map,
10877                                       src_addr,
10878                                       len,
10879                                       flags,
10880                                       copy_result);
10881 }
10882 kern_return_t
10883 vm_map_copyin_internal(
10884         vm_map_t        src_map,
10885         vm_map_address_t src_addr,
10886         vm_map_size_t   len,
10887         int             flags,
10888         vm_map_copy_t   *copy_result)   /* OUT */
10889 {
10890         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
10891                                          * in multi-level lookup, this
10892                                          * entry contains the actual
10893                                          * vm_object/offset.
10894                                          */
10895         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
10896
10897         vm_map_offset_t src_start;      /* Start of current entry --
10898                                          * where copy is taking place now
10899                                          */
10900         vm_map_offset_t src_end;        /* End of entire region to be
10901                                          * copied */
10902         vm_map_offset_t src_base;
10903         vm_map_t        base_map = src_map;
10904         boolean_t       map_share=FALSE;
10905         submap_map_t    *parent_maps = NULL;
10906
10907         vm_map_copy_t   copy;           /* Resulting copy */
10908         vm_map_address_t copy_addr;
10909         vm_map_size_t   copy_size;
10910         boolean_t       src_destroy;
10911         boolean_t       use_maxprot;
10912         boolean_t       preserve_purgeable;
10913         boolean_t       entry_was_shared;
10914         vm_map_entry_t  saved_src_entry;
10915
10916         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
10917                 return KERN_INVALID_ARGUMENT;
10918         }
10919
10920         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
10921         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
10922         preserve_purgeable =
10923                 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
10924
10925         /*
10926          *      Check for copies of zero bytes.
10927          */
10928
10929         if (len == 0) {
10930                 *copy_result = VM_MAP_COPY_NULL;
10931                 return(KERN_SUCCESS);
10932         }
10933
10934         /*
10935          *      Check that the end address doesn't overflow
10936          */
10937         src_end = src_addr + len;
10938         if (src_end < src_addr)
10939                 return KERN_INVALID_ADDRESS;
10940
10941         /*
10942          *      Compute (page aligned) start and end of region
10943          */
10944         src_start = vm_map_trunc_page(src_addr,
10945                                       VM_MAP_PAGE_MASK(src_map));
10946         src_end = vm_map_round_page(src_end,
10947                                     VM_MAP_PAGE_MASK(src_map));
10948
10949         /*
10950          * If the copy is sufficiently small, use a kernel buffer instead
10951          * of making a virtual copy.  The theory being that the cost of
10952          * setting up VM (and taking C-O-W faults) dominates the copy costs
10953          * for small regions.
10954          */
10955         if ((len < msg_ool_size_small) &&
10956             !use_maxprot &&
10957             !preserve_purgeable &&
10958             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
10959             /*
10960              * Since the "msg_ool_size_small" threshold was increased and
10961              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10962              * address space limits, we revert to doing a virtual copy if the
10963              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
10964              * of the commpage would now fail when it used to work.
10965              */
10966             (src_start >= vm_map_min(src_map) &&
10967              src_start < vm_map_max(src_map) &&
10968              src_end >= vm_map_min(src_map) &&
10969              src_end < vm_map_max(src_map)))
10970                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
10971                                                    src_destroy, copy_result);
10972
10973         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
10974
10975         /*
10976          *      Allocate a header element for the list.
10977          *
10978          *      Use the start and end in the header to
10979          *      remember the endpoints prior to rounding.
10980          */
10981
10982         copy = vm_map_copy_allocate();
10983         copy->type = VM_MAP_COPY_ENTRY_LIST;
10984         copy->cpy_hdr.entries_pageable = TRUE;
10985 #if 00
10986         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
10987 #else
10988         /*
10989          * The copy entries can be broken down for a variety of reasons,
10990          * so we can't guarantee that they will remain map-aligned...
10991          * Will need to adjust the first copy_entry's "vme_start" and
10992          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10993          * rather than the original map's alignment.
10994          */
10995         copy->cpy_hdr.page_shift = PAGE_SHIFT;
10996 #endif
10997
10998         vm_map_store_init( &(copy->cpy_hdr) );
10999
11000         copy->offset = src_addr;
11001         copy->size = len;
11002
11003         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11004
11005 #define RETURN(x)                                               \
11006         MACRO_BEGIN                                             \
11007         vm_map_unlock(src_map);                                 \
11008         if(src_map != base_map)                                 \
11009                 vm_map_deallocate(src_map);                     \
11010         if (new_entry != VM_MAP_ENTRY_NULL)                     \
11011                 vm_map_copy_entry_dispose(copy,new_entry);      \
11012         vm_map_copy_discard(copy);                              \
11013         {                                                       \
11014                 submap_map_t    *_ptr;                          \
11015                                                                 \
11016                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11017                         parent_maps=parent_maps->next;          \
11018                         if (_ptr->parent_map != base_map)       \
11019                                 vm_map_deallocate(_ptr->parent_map);    \
11020                         kfree(_ptr, sizeof(submap_map_t));      \
11021                 }                                               \
11022         }                                                       \
11023         MACRO_RETURN(x);                                        \
11024         MACRO_END
11025
11026         /*
11027          *      Find the beginning of the region.
11028          */
11029
11030         vm_map_lock(src_map);
11031
11032         /*
11033          * Lookup the original "src_addr" rather than the truncated
11034          * "src_start", in case "src_start" falls in a non-map-aligned
11035          * map entry *before* the map entry that contains "src_addr"...
11036          */
11037         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
11038                 RETURN(KERN_INVALID_ADDRESS);
11039         if(!tmp_entry->is_sub_map) {
11040                 /*
11041                  * ... but clip to the map-rounded "src_start" rather than
11042                  * "src_addr" to preserve map-alignment.  We'll adjust the
11043                  * first copy entry at the end, if needed.
11044                  */
11045                 vm_map_clip_start(src_map, tmp_entry, src_start);
11046         }
11047         if (src_start < tmp_entry->vme_start) {
11048                 /*
11049                  * Move "src_start" up to the start of the
11050                  * first map entry to copy.
11051                  */
11052                 src_start = tmp_entry->vme_start;
11053         }
11054         /* set for later submap fix-up */
11055         copy_addr = src_start;
11056
11057         /*
11058          *      Go through entries until we get to the end.
11059          */
11060
11061         while (TRUE) {
11062                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11063                 vm_map_size_t   src_size;               /* Size of source
11064                                                          * map entry (in both
11065                                                          * maps)
11066                                                          */
11067
11068                 vm_object_t             src_object;     /* Object to copy */
11069                 vm_object_offset_t      src_offset;
11070
11071                 boolean_t       src_needs_copy;         /* Should source map
11072                                                          * be made read-only
11073                                                          * for copy-on-write?
11074                                                          */
11075
11076                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11077
11078                 boolean_t       was_wired;              /* Was source wired? */
11079                 vm_map_version_t version;               /* Version before locks
11080                                                          * dropped to make copy
11081                                                          */
11082                 kern_return_t   result;                 /* Return value from
11083                                                          * copy_strategically.
11084                                                          */
11085                 while(tmp_entry->is_sub_map) {
11086                         vm_map_size_t submap_len;
11087                         submap_map_t *ptr;
11088
11089                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11090                         ptr->next = parent_maps;
11091                         parent_maps = ptr;
11092                         ptr->parent_map = src_map;
11093                         ptr->base_start = src_start;
11094                         ptr->base_end = src_end;
11095                         submap_len = tmp_entry->vme_end - src_start;
11096                         if(submap_len > (src_end-src_start))
11097                                 submap_len = src_end-src_start;
11098                         ptr->base_len = submap_len;
11099
11100                         src_start -= tmp_entry->vme_start;
11101                         src_start += VME_OFFSET(tmp_entry);
11102                         src_end = src_start + submap_len;
11103                         src_map = VME_SUBMAP(tmp_entry);
11104                         vm_map_lock(src_map);
11105                         /* keep an outstanding reference for all maps in */
11106                         /* the parents tree except the base map */
11107                         vm_map_reference(src_map);
11108                         vm_map_unlock(ptr->parent_map);
11109                         if (!vm_map_lookup_entry(
11110                                     src_map, src_start, &tmp_entry))
11111                                 RETURN(KERN_INVALID_ADDRESS);
11112                         map_share = TRUE;
11113                         if(!tmp_entry->is_sub_map)
11114                                 vm_map_clip_start(src_map, tmp_entry, src_start);
11115                         src_entry = tmp_entry;
11116                 }
11117                 /* we are now in the lowest level submap... */
11118
11119                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11120                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11121                         /* This is not, supported for now.In future */
11122                         /* we will need to detect the phys_contig   */
11123                         /* condition and then upgrade copy_slowly   */
11124                         /* to do physical copy from the device mem  */
11125                         /* based object. We can piggy-back off of   */
11126                         /* the was wired boolean to set-up the      */
11127                         /* proper handling */
11128                         RETURN(KERN_PROTECTION_FAILURE);
11129                 }
11130                 /*
11131                  *      Create a new address map entry to hold the result.
11132                  *      Fill in the fields from the appropriate source entries.
11133                  *      We must unlock the source map to do this if we need
11134                  *      to allocate a map entry.
11135                  */
11136                 if (new_entry == VM_MAP_ENTRY_NULL) {
11137                         version.main_timestamp = src_map->timestamp;
11138                         vm_map_unlock(src_map);
11139
11140                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11141
11142                         vm_map_lock(src_map);
11143                         if ((version.main_timestamp + 1) != src_map->timestamp) {
11144                                 if (!vm_map_lookup_entry(src_map, src_start,
11145                                                          &tmp_entry)) {
11146                                         RETURN(KERN_INVALID_ADDRESS);
11147                                 }
11148                                 if (!tmp_entry->is_sub_map)
11149                                         vm_map_clip_start(src_map, tmp_entry, src_start);
11150                                 continue; /* restart w/ new tmp_entry */
11151                         }
11152                 }
11153
11154                 /*
11155                  *      Verify that the region can be read.
11156                  */
11157                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11158                      !use_maxprot) ||
11159                     (src_entry->max_protection & VM_PROT_READ) == 0)
11160                         RETURN(KERN_PROTECTION_FAILURE);
11161
11162                 /*
11163                  *      Clip against the endpoints of the entire region.
11164                  */
11165
11166                 vm_map_clip_end(src_map, src_entry, src_end);
11167
11168                 src_size = src_entry->vme_end - src_start;
11169                 src_object = VME_OBJECT(src_entry);
11170                 src_offset = VME_OFFSET(src_entry);
11171                 was_wired = (src_entry->wired_count != 0);
11172
11173                 vm_map_entry_copy(new_entry, src_entry);
11174                 if (new_entry->is_sub_map) {
11175                         /* clr address space specifics */
11176                         new_entry->use_pmap = FALSE;
11177                 } else {
11178                         /*
11179                          * We're dealing with a copy-on-write operation,
11180                          * so the resulting mapping should not inherit the
11181                          * original mapping's accounting settings.
11182                          * "iokit_acct" should have been cleared in
11183                          * vm_map_entry_copy().
11184                          * "use_pmap" should be reset to its default (TRUE)
11185                          * so that the new mapping gets accounted for in
11186                          * the task's memory footprint.
11187                          */
11188                         assert(!new_entry->iokit_acct);
11189                         new_entry->use_pmap = TRUE;
11190                 }
11191
11192                 /*
11193                  *      Attempt non-blocking copy-on-write optimizations.
11194                  */
11195
11196                 if (src_destroy &&
11197                     (src_object == VM_OBJECT_NULL ||
11198                      (src_object->internal &&
11199                       src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11200                       !map_share))) {
11201                         /*
11202                          * If we are destroying the source, and the object
11203                          * is internal, we can move the object reference
11204                          * from the source to the copy.  The copy is
11205                          * copy-on-write only if the source is.
11206                          * We make another reference to the object, because
11207                          * destroying the source entry will deallocate it.
11208                          */
11209                         vm_object_reference(src_object);
11210
11211                         /*
11212                          * Copy is always unwired.  vm_map_copy_entry
11213                          * set its wired count to zero.
11214                          */
11215
11216                         goto CopySuccessful;
11217                 }
11218
11219
11220         RestartCopy:
11221                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
11222                     src_object, new_entry, VME_OBJECT(new_entry),
11223                     was_wired, 0);
11224                 if ((src_object == VM_OBJECT_NULL ||
11225                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11226                     vm_object_copy_quickly(
11227                             &VME_OBJECT(new_entry),
11228                             src_offset,
11229                             src_size,
11230                             &src_needs_copy,
11231                             &new_entry_needs_copy)) {
11232
11233                         new_entry->needs_copy = new_entry_needs_copy;
11234
11235                         /*
11236                          *      Handle copy-on-write obligations
11237                          */
11238
11239                         if (src_needs_copy && !tmp_entry->needs_copy) {
11240                                 vm_prot_t prot;
11241
11242                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11243
11244                                 if (override_nx(src_map, VME_ALIAS(src_entry))
11245                                     && prot)
11246                                         prot |= VM_PROT_EXECUTE;
11247
11248                                 vm_object_pmap_protect(
11249                                         src_object,
11250                                         src_offset,
11251                                         src_size,
11252                                         (src_entry->is_shared ?
11253                                          PMAP_NULL
11254                                          : src_map->pmap),
11255                                         src_entry->vme_start,
11256                                         prot);
11257
11258                                 assert(tmp_entry->wired_count == 0);
11259                                 tmp_entry->needs_copy = TRUE;
11260                         }
11261
11262                         /*
11263                          *      The map has never been unlocked, so it's safe
11264                          *      to move to the next entry rather than doing
11265                          *      another lookup.
11266                          */
11267
11268                         goto CopySuccessful;
11269                 }
11270
11271                 entry_was_shared = tmp_entry->is_shared;
11272
11273                 /*
11274                  *      Take an object reference, so that we may
11275                  *      release the map lock(s).
11276                  */
11277
11278                 assert(src_object != VM_OBJECT_NULL);
11279                 vm_object_reference(src_object);
11280
11281                 /*
11282                  *      Record the timestamp for later verification.
11283                  *      Unlock the map.
11284                  */
11285
11286                 version.main_timestamp = src_map->timestamp;
11287                 vm_map_unlock(src_map); /* Increments timestamp once! */
11288                 saved_src_entry = src_entry;
11289                 tmp_entry = VM_MAP_ENTRY_NULL;
11290                 src_entry = VM_MAP_ENTRY_NULL;
11291
11292                 /*
11293                  *      Perform the copy
11294                  */
11295
11296                 if (was_wired) {
11297                 CopySlowly:
11298                         vm_object_lock(src_object);
11299                         result = vm_object_copy_slowly(
11300                                 src_object,
11301                                 src_offset,
11302                                 src_size,
11303                                 THREAD_UNINT,
11304                                 &VME_OBJECT(new_entry));
11305                         VME_OFFSET_SET(new_entry, 0);
11306                         new_entry->needs_copy = FALSE;
11307                 }
11308                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11309                          (entry_was_shared  || map_share)) {
11310                         vm_object_t new_object;
11311
11312                         vm_object_lock_shared(src_object);
11313                         new_object = vm_object_copy_delayed(
11314                                 src_object,
11315                                 src_offset,
11316                                 src_size,
11317                                 TRUE);
11318                         if (new_object == VM_OBJECT_NULL)
11319                                 goto CopySlowly;
11320
11321                         VME_OBJECT_SET(new_entry, new_object);
11322                         assert(new_entry->wired_count == 0);
11323                         new_entry->needs_copy = TRUE;
11324                         assert(!new_entry->iokit_acct);
11325                         assert(new_object->purgable == VM_PURGABLE_DENY);
11326                         assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11327                         result = KERN_SUCCESS;
11328
11329                 } else {
11330                         vm_object_offset_t new_offset;
11331                         new_offset = VME_OFFSET(new_entry);
11332                         result = vm_object_copy_strategically(src_object,
11333                                                               src_offset,
11334                                                               src_size,
11335                                                               &VME_OBJECT(new_entry),
11336                                                               &new_offset,
11337                                                               &new_entry_needs_copy);
11338                         if (new_offset != VME_OFFSET(new_entry)) {
11339                                 VME_OFFSET_SET(new_entry, new_offset);
11340                         }
11341
11342                         new_entry->needs_copy = new_entry_needs_copy;
11343                 }
11344
11345                 if (result == KERN_SUCCESS &&
11346                     preserve_purgeable &&
11347                     src_object->purgable != VM_PURGABLE_DENY) {
11348                         vm_object_t     new_object;
11349
11350                         new_object = VME_OBJECT(new_entry);
11351                         assert(new_object != src_object);
11352                         vm_object_lock(new_object);
11353                         assert(new_object->ref_count == 1);
11354                         assert(new_object->shadow == VM_OBJECT_NULL);
11355                         assert(new_object->copy == VM_OBJECT_NULL);
11356                         assert(new_object->vo_owner == NULL);
11357
11358                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11359                         new_object->true_share = TRUE;
11360                         /* start as non-volatile with no owner... */
11361                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
11362                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11363                         /* ... and move to src_object's purgeable state */
11364                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11365                                 int state;
11366                                 state = src_object->purgable;
11367                                 vm_object_purgable_control(
11368                                         new_object,
11369                                         VM_PURGABLE_SET_STATE_FROM_KERNEL,
11370                                         &state);
11371                         }
11372                         vm_object_unlock(new_object);
11373                         new_object = VM_OBJECT_NULL;
11374                         /* no pmap accounting for purgeable objects */
11375                         new_entry->use_pmap = FALSE;
11376                 }
11377
11378                 if (result != KERN_SUCCESS &&
11379                     result != KERN_MEMORY_RESTART_COPY) {
11380                         vm_map_lock(src_map);
11381                         RETURN(result);
11382                 }
11383
11384                 /*
11385                  *      Throw away the extra reference
11386                  */
11387
11388                 vm_object_deallocate(src_object);
11389
11390                 /*
11391                  *      Verify that the map has not substantially
11392                  *      changed while the copy was being made.
11393                  */
11394
11395                 vm_map_lock(src_map);
11396
11397                 if ((version.main_timestamp + 1) == src_map->timestamp) {
11398                         /* src_map hasn't changed: src_entry is still valid */
11399                         src_entry = saved_src_entry;
11400                         goto VerificationSuccessful;
11401                 }
11402
11403                 /*
11404                  *      Simple version comparison failed.
11405                  *
11406                  *      Retry the lookup and verify that the
11407                  *      same object/offset are still present.
11408                  *
11409                  *      [Note: a memory manager that colludes with
11410                  *      the calling task can detect that we have
11411                  *      cheated.  While the map was unlocked, the
11412                  *      mapping could have been changed and restored.]
11413                  */
11414
11415                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11416                         if (result != KERN_MEMORY_RESTART_COPY) {
11417                                 vm_object_deallocate(VME_OBJECT(new_entry));
11418                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11419                                 /* reset accounting state */
11420                                 new_entry->iokit_acct = FALSE;
11421                                 new_entry->use_pmap = TRUE;
11422                         }
11423                         RETURN(KERN_INVALID_ADDRESS);
11424                 }
11425
11426                 src_entry = tmp_entry;
11427                 vm_map_clip_start(src_map, src_entry, src_start);
11428
11429                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11430                      !use_maxprot) ||
11431                     ((src_entry->max_protection & VM_PROT_READ) == 0))
11432                         goto VerificationFailed;
11433
11434                 if (src_entry->vme_end < new_entry->vme_end) {
11435                         /*
11436                          * This entry might have been shortened
11437                          * (vm_map_clip_end) or been replaced with
11438                          * an entry that ends closer to "src_start"
11439                          * than before.
11440                          * Adjust "new_entry" accordingly; copying
11441                          * less memory would be correct but we also
11442                          * redo the copy (see below) if the new entry
11443                          * no longer points at the same object/offset.
11444                          */
11445                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11446                                                    VM_MAP_COPY_PAGE_MASK(copy)));
11447                         new_entry->vme_end = src_entry->vme_end;
11448                         src_size = new_entry->vme_end - src_start;
11449                 } else if (src_entry->vme_end > new_entry->vme_end) {
11450                         /*
11451                          * This entry might have been extended
11452                          * (vm_map_entry_simplify() or coalesce)
11453                          * or been replaced with an entry that ends farther
11454                          * from "src_start" than before.
11455                          *
11456                          * We've called vm_object_copy_*() only on
11457                          * the previous <start:end> range, so we can't
11458                          * just extend new_entry.  We have to re-do
11459                          * the copy based on the new entry as if it was
11460                          * pointing at a different object/offset (see
11461                          * "Verification failed" below).
11462                          */
11463                 }
11464
11465                 if ((VME_OBJECT(src_entry) != src_object) ||
11466                     (VME_OFFSET(src_entry) != src_offset) ||
11467                     (src_entry->vme_end > new_entry->vme_end)) {
11468
11469                         /*
11470                          *      Verification failed.
11471                          *
11472                          *      Start over with this top-level entry.
11473                          */
11474
11475                 VerificationFailed: ;
11476
11477                         vm_object_deallocate(VME_OBJECT(new_entry));
11478                         tmp_entry = src_entry;
11479                         continue;
11480                 }
11481
11482                 /*
11483                  *      Verification succeeded.
11484                  */
11485
11486         VerificationSuccessful: ;
11487
11488                 if (result == KERN_MEMORY_RESTART_COPY)
11489                         goto RestartCopy;
11490
11491                 /*
11492                  *      Copy succeeded.
11493                  */
11494
11495         CopySuccessful: ;
11496
11497                 /*
11498                  *      Link in the new copy entry.
11499                  */
11500
11501                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11502                                        new_entry);
11503
11504                 /*
11505                  *      Determine whether the entire region
11506                  *      has been copied.
11507                  */
11508                 src_base = src_start;
11509                 src_start = new_entry->vme_end;
11510                 new_entry = VM_MAP_ENTRY_NULL;
11511                 while ((src_start >= src_end) && (src_end != 0)) {
11512                         submap_map_t    *ptr;
11513
11514                         if (src_map == base_map) {
11515                                 /* back to the top */
11516                                 break;
11517                         }
11518
11519                         ptr = parent_maps;
11520                         assert(ptr != NULL);
11521                         parent_maps = parent_maps->next;
11522
11523                         /* fix up the damage we did in that submap */
11524                         vm_map_simplify_range(src_map,
11525                                               src_base,
11526                                               src_end);
11527
11528                         vm_map_unlock(src_map);
11529                         vm_map_deallocate(src_map);
11530                         vm_map_lock(ptr->parent_map);
11531                         src_map = ptr->parent_map;
11532                         src_base = ptr->base_start;
11533                         src_start = ptr->base_start + ptr->base_len;
11534                         src_end = ptr->base_end;
11535                         if (!vm_map_lookup_entry(src_map,
11536                                                  src_start,
11537                                                  &tmp_entry) &&
11538                             (src_end > src_start)) {
11539                                 RETURN(KERN_INVALID_ADDRESS);
11540                         }
11541                         kfree(ptr, sizeof(submap_map_t));
11542                         if (parent_maps == NULL)
11543                                 map_share = FALSE;
11544                         src_entry = tmp_entry->vme_prev;
11545                 }
11546
11547                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11548                     (src_start >= src_addr + len) &&
11549                     (src_addr + len != 0)) {
11550                         /*
11551                          * Stop copying now, even though we haven't reached
11552                          * "src_end".  We'll adjust the end of the last copy
11553                          * entry at the end, if needed.
11554                          *
11555                          * If src_map's aligment is different from the
11556                          * system's page-alignment, there could be
11557                          * extra non-map-aligned map entries between
11558                          * the original (non-rounded) "src_addr + len"
11559                          * and the rounded "src_end".
11560                          * We do not want to copy those map entries since
11561                          * they're not part of the copied range.
11562                          */
11563                         break;
11564                 }
11565
11566                 if ((src_start >= src_end) && (src_end != 0))
11567                         break;
11568
11569                 /*
11570                  *      Verify that there are no gaps in the region
11571                  */
11572
11573                 tmp_entry = src_entry->vme_next;
11574                 if ((tmp_entry->vme_start != src_start) ||
11575                     (tmp_entry == vm_map_to_entry(src_map))) {
11576                         RETURN(KERN_INVALID_ADDRESS);
11577                 }
11578         }
11579
11580         /*
11581          * If the source should be destroyed, do it now, since the
11582          * copy was successful.
11583          */
11584         if (src_destroy) {
11585                 (void) vm_map_delete(
11586                         src_map,
11587                         vm_map_trunc_page(src_addr,
11588                                           VM_MAP_PAGE_MASK(src_map)),
11589                         src_end,
11590                         ((src_map == kernel_map) ?
11591                          VM_MAP_REMOVE_KUNWIRE :
11592                          VM_MAP_REMOVE_NO_FLAGS),
11593                         VM_MAP_NULL);
11594         } else {
11595                 /* fix up the damage we did in the base map */
11596                 vm_map_simplify_range(
11597                         src_map,
11598                         vm_map_trunc_page(src_addr,
11599                                           VM_MAP_PAGE_MASK(src_map)),
11600                         vm_map_round_page(src_end,
11601                                           VM_MAP_PAGE_MASK(src_map)));
11602         }
11603
11604         vm_map_unlock(src_map);
11605         tmp_entry = VM_MAP_ENTRY_NULL;
11606
11607         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11608                 vm_map_offset_t original_start, original_offset, original_end;
11609
11610                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11611
11612                 /* adjust alignment of first copy_entry's "vme_start" */
11613                 tmp_entry = vm_map_copy_first_entry(copy);
11614                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11615                         vm_map_offset_t adjustment;
11616
11617                         original_start = tmp_entry->vme_start;
11618                         original_offset = VME_OFFSET(tmp_entry);
11619
11620                         /* map-align the start of the first copy entry... */
11621                         adjustment = (tmp_entry->vme_start -
11622                                       vm_map_trunc_page(
11623                                               tmp_entry->vme_start,
11624                                               VM_MAP_PAGE_MASK(src_map)));
11625                         tmp_entry->vme_start -= adjustment;
11626                         VME_OFFSET_SET(tmp_entry,
11627                                        VME_OFFSET(tmp_entry) - adjustment);
11628                         copy_addr -= adjustment;
11629                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
11630                         /* ... adjust for mis-aligned start of copy range */
11631                         adjustment =
11632                                 (vm_map_trunc_page(copy->offset,
11633                                                    PAGE_MASK) -
11634                                  vm_map_trunc_page(copy->offset,
11635                                                    VM_MAP_PAGE_MASK(src_map)));
11636                         if (adjustment) {
11637                                 assert(page_aligned(adjustment));
11638                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11639                                 tmp_entry->vme_start += adjustment;
11640                                 VME_OFFSET_SET(tmp_entry,
11641                                                (VME_OFFSET(tmp_entry) +
11642                                                 adjustment));
11643                                 copy_addr += adjustment;
11644                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11645                         }
11646
11647                         /*
11648                          * Assert that the adjustments haven't exposed
11649                          * more than was originally copied...
11650                          */
11651                         assert(tmp_entry->vme_start >= original_start);
11652                         assert(VME_OFFSET(tmp_entry) >= original_offset);
11653                         /*
11654                          * ... and that it did not adjust outside of a
11655                          * a single 16K page.
11656                          */
11657                         assert(vm_map_trunc_page(tmp_entry->vme_start,
11658                                                  VM_MAP_PAGE_MASK(src_map)) ==
11659                                vm_map_trunc_page(original_start,
11660                                                  VM_MAP_PAGE_MASK(src_map)));
11661                 }
11662
11663                 /* adjust alignment of last copy_entry's "vme_end" */
11664                 tmp_entry = vm_map_copy_last_entry(copy);
11665                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11666                         vm_map_offset_t adjustment;
11667
11668                         original_end = tmp_entry->vme_end;
11669
11670                         /* map-align the end of the last copy entry... */
11671                         tmp_entry->vme_end =
11672                                 vm_map_round_page(tmp_entry->vme_end,
11673                                                   VM_MAP_PAGE_MASK(src_map));
11674                         /* ... adjust for mis-aligned end of copy range */
11675                         adjustment =
11676                                 (vm_map_round_page((copy->offset +
11677                                                     copy->size),
11678                                                    VM_MAP_PAGE_MASK(src_map)) -
11679                                  vm_map_round_page((copy->offset +
11680                                                     copy->size),
11681                                                    PAGE_MASK));
11682                         if (adjustment) {
11683                                 assert(page_aligned(adjustment));
11684                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11685                                 tmp_entry->vme_end -= adjustment;
11686                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11687                         }
11688
11689                         /*
11690                          * Assert that the adjustments haven't exposed
11691                          * more than was originally copied...
11692                          */
11693                         assert(tmp_entry->vme_end <= original_end);
11694                         /*
11695                          * ... and that it did not adjust outside of a
11696                          * a single 16K page.
11697                          */
11698                         assert(vm_map_round_page(tmp_entry->vme_end,
11699                                                  VM_MAP_PAGE_MASK(src_map)) ==
11700                                vm_map_round_page(original_end,
11701                                                  VM_MAP_PAGE_MASK(src_map)));
11702                 }
11703         }
11704
11705         /* Fix-up start and end points in copy.  This is necessary */
11706         /* when the various entries in the copy object were picked */
11707         /* up from different sub-maps */
11708
11709         tmp_entry = vm_map_copy_first_entry(copy);
11710         copy_size = 0; /* compute actual size */
11711         while (tmp_entry != vm_map_copy_to_entry(copy)) {
11712                 assert(VM_MAP_PAGE_ALIGNED(
11713                                copy_addr + (tmp_entry->vme_end -
11714                                             tmp_entry->vme_start),
11715                                VM_MAP_COPY_PAGE_MASK(copy)));
11716                 assert(VM_MAP_PAGE_ALIGNED(
11717                                copy_addr,
11718                                VM_MAP_COPY_PAGE_MASK(copy)));
11719
11720                 /*
11721                  * The copy_entries will be injected directly into the
11722                  * destination map and might not be "map aligned" there...
11723                  */
11724                 tmp_entry->map_aligned = FALSE;
11725
11726                 tmp_entry->vme_end = copy_addr +
11727                         (tmp_entry->vme_end - tmp_entry->vme_start);
11728                 tmp_entry->vme_start = copy_addr;
11729                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11730                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11731                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11732                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11733         }
11734
11735         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11736             copy_size < copy->size) {
11737                 /*
11738                  * The actual size of the VM map copy is smaller than what
11739                  * was requested by the caller.  This must be because some
11740                  * PAGE_SIZE-sized pages are missing at the end of the last
11741                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11742                  * The caller might not have been aware of those missing
11743                  * pages and might not want to be aware of it, which is
11744                  * fine as long as they don't try to access (and crash on)
11745                  * those missing pages.
11746                  * Let's adjust the size of the "copy", to avoid failing
11747                  * in vm_map_copyout() or vm_map_copy_overwrite().
11748                  */
11749                 assert(vm_map_round_page(copy_size,
11750                                          VM_MAP_PAGE_MASK(src_map)) ==
11751                        vm_map_round_page(copy->size,
11752                                          VM_MAP_PAGE_MASK(src_map)));
11753                 copy->size = copy_size;
11754         }
11755
11756         *copy_result = copy;
11757         return(KERN_SUCCESS);
11758
11759 #undef  RETURN
11760 }
11761
11762 kern_return_t
11763 vm_map_copy_extract(
11764         vm_map_t                src_map,
11765         vm_map_address_t        src_addr,
11766         vm_map_size_t           len,
11767         vm_map_copy_t           *copy_result,   /* OUT */
11768         vm_prot_t               *cur_prot,      /* OUT */
11769         vm_prot_t               *max_prot)
11770 {
11771         vm_map_offset_t src_start, src_end;
11772         vm_map_copy_t   copy;
11773         kern_return_t   kr;
11774
11775         /*
11776          *      Check for copies of zero bytes.
11777          */
11778
11779         if (len == 0) {
11780                 *copy_result = VM_MAP_COPY_NULL;
11781                 return(KERN_SUCCESS);
11782         }
11783
11784         /*
11785          *      Check that the end address doesn't overflow
11786          */
11787         src_end = src_addr + len;
11788         if (src_end < src_addr)
11789                 return KERN_INVALID_ADDRESS;
11790
11791         /*
11792          *      Compute (page aligned) start and end of region
11793          */
11794         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11795         src_end = vm_map_round_page(src_end, PAGE_MASK);
11796
11797         /*
11798          *      Allocate a header element for the list.
11799          *
11800          *      Use the start and end in the header to
11801          *      remember the endpoints prior to rounding.
11802          */
11803
11804         copy = vm_map_copy_allocate();
11805         copy->type = VM_MAP_COPY_ENTRY_LIST;
11806         copy->cpy_hdr.entries_pageable = TRUE;
11807
11808         vm_map_store_init(&copy->cpy_hdr);
11809
11810         copy->offset = 0;
11811         copy->size = len;
11812
11813         kr = vm_map_remap_extract(src_map,
11814                                   src_addr,
11815                                   len,
11816                                   FALSE, /* copy */
11817                                   &copy->cpy_hdr,
11818                                   cur_prot,
11819                                   max_prot,
11820                                   VM_INHERIT_SHARE,
11821                                   TRUE, /* pageable */
11822                                   FALSE, /* same_map */
11823                                   VM_MAP_KERNEL_FLAGS_NONE);
11824         if (kr != KERN_SUCCESS) {
11825                 vm_map_copy_discard(copy);
11826                 return kr;
11827         }
11828
11829         *copy_result = copy;
11830         return KERN_SUCCESS;
11831 }
11832
11833 /*
11834  *      vm_map_copyin_object:
11835  *
11836  *      Create a copy object from an object.
11837  *      Our caller donates an object reference.
11838  */
11839
11840 kern_return_t
11841 vm_map_copyin_object(
11842         vm_object_t             object,
11843         vm_object_offset_t      offset, /* offset of region in object */
11844         vm_object_size_t        size,   /* size of region in object */
11845         vm_map_copy_t   *copy_result)   /* OUT */
11846 {
11847         vm_map_copy_t   copy;           /* Resulting copy */
11848
11849         /*
11850          *      We drop the object into a special copy object
11851          *      that contains the object directly.
11852          */
11853
11854         copy = vm_map_copy_allocate();
11855         copy->type = VM_MAP_COPY_OBJECT;
11856         copy->cpy_object = object;
11857         copy->offset = offset;
11858         copy->size = size;
11859
11860         *copy_result = copy;
11861         return(KERN_SUCCESS);
11862 }
11863
11864 static void
11865 vm_map_fork_share(
11866         vm_map_t        old_map,
11867         vm_map_entry_t  old_entry,
11868         vm_map_t        new_map)
11869 {
11870         vm_object_t     object;
11871         vm_map_entry_t  new_entry;
11872
11873         /*
11874          *      New sharing code.  New map entry
11875          *      references original object.  Internal
11876          *      objects use asynchronous copy algorithm for
11877          *      future copies.  First make sure we have
11878          *      the right object.  If we need a shadow,
11879          *      or someone else already has one, then
11880          *      make a new shadow and share it.
11881          */
11882
11883         object = VME_OBJECT(old_entry);
11884         if (old_entry->is_sub_map) {
11885                 assert(old_entry->wired_count == 0);
11886 #ifndef NO_NESTED_PMAP
11887                 if(old_entry->use_pmap) {
11888                         kern_return_t   result;
11889
11890                         result = pmap_nest(new_map->pmap,
11891                                            (VME_SUBMAP(old_entry))->pmap,
11892                                            (addr64_t)old_entry->vme_start,
11893                                            (addr64_t)old_entry->vme_start,
11894                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
11895                         if(result)
11896                                 panic("vm_map_fork_share: pmap_nest failed!");
11897                 }
11898 #endif  /* NO_NESTED_PMAP */
11899         } else if (object == VM_OBJECT_NULL) {
11900                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
11901                                                             old_entry->vme_start));
11902                 VME_OFFSET_SET(old_entry, 0);
11903                 VME_OBJECT_SET(old_entry, object);
11904                 old_entry->use_pmap = TRUE;
11905 //              assert(!old_entry->needs_copy);
11906         } else if (object->copy_strategy !=
11907                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11908
11909                 /*
11910                  *      We are already using an asymmetric
11911                  *      copy, and therefore we already have
11912                  *      the right object.
11913                  */
11914
11915                 assert(! old_entry->needs_copy);
11916         }
11917         else if (old_entry->needs_copy ||       /* case 1 */
11918                  object->shadowed ||            /* case 2 */
11919                  (!object->true_share &&        /* case 3 */
11920                   !old_entry->is_shared &&
11921                   (object->vo_size >
11922                    (vm_map_size_t)(old_entry->vme_end -
11923                                    old_entry->vme_start)))) {
11924
11925                 /*
11926                  *      We need to create a shadow.
11927                  *      There are three cases here.
11928                  *      In the first case, we need to
11929                  *      complete a deferred symmetrical
11930                  *      copy that we participated in.
11931                  *      In the second and third cases,
11932                  *      we need to create the shadow so
11933                  *      that changes that we make to the
11934                  *      object do not interfere with
11935                  *      any symmetrical copies which
11936                  *      have occured (case 2) or which
11937                  *      might occur (case 3).
11938                  *
11939                  *      The first case is when we had
11940                  *      deferred shadow object creation
11941                  *      via the entry->needs_copy mechanism.
11942                  *      This mechanism only works when
11943                  *      only one entry points to the source
11944                  *      object, and we are about to create
11945                  *      a second entry pointing to the
11946                  *      same object. The problem is that
11947                  *      there is no way of mapping from
11948                  *      an object to the entries pointing
11949                  *      to it. (Deferred shadow creation
11950                  *      works with one entry because occurs
11951                  *      at fault time, and we walk from the
11952                  *      entry to the object when handling
11953                  *      the fault.)
11954                  *
11955                  *      The second case is when the object
11956                  *      to be shared has already been copied
11957                  *      with a symmetric copy, but we point
11958                  *      directly to the object without
11959                  *      needs_copy set in our entry. (This
11960                  *      can happen because different ranges
11961                  *      of an object can be pointed to by
11962                  *      different entries. In particular,
11963                  *      a single entry pointing to an object
11964                  *      can be split by a call to vm_inherit,
11965                  *      which, combined with task_create, can
11966                  *      result in the different entries
11967                  *      having different needs_copy values.)
11968                  *      The shadowed flag in the object allows
11969                  *      us to detect this case. The problem
11970                  *      with this case is that if this object
11971                  *      has or will have shadows, then we
11972                  *      must not perform an asymmetric copy
11973                  *      of this object, since such a copy
11974                  *      allows the object to be changed, which
11975                  *      will break the previous symmetrical
11976                  *      copies (which rely upon the object
11977                  *      not changing). In a sense, the shadowed
11978                  *      flag says "don't change this object".
11979                  *      We fix this by creating a shadow
11980                  *      object for this object, and sharing
11981                  *      that. This works because we are free
11982                  *      to change the shadow object (and thus
11983                  *      to use an asymmetric copy strategy);
11984                  *      this is also semantically correct,
11985                  *      since this object is temporary, and
11986                  *      therefore a copy of the object is
11987                  *      as good as the object itself. (This
11988                  *      is not true for permanent objects,
11989                  *      since the pager needs to see changes,
11990                  *      which won't happen if the changes
11991                  *      are made to a copy.)
11992                  *
11993                  *      The third case is when the object
11994                  *      to be shared has parts sticking
11995                  *      outside of the entry we're working
11996                  *      with, and thus may in the future
11997                  *      be subject to a symmetrical copy.
11998                  *      (This is a preemptive version of
11999                  *      case 2.)
12000                  */
12001                 VME_OBJECT_SHADOW(old_entry,
12002                                   (vm_map_size_t) (old_entry->vme_end -
12003                                                    old_entry->vme_start));
12004
12005                 /*
12006                  *      If we're making a shadow for other than
12007                  *      copy on write reasons, then we have
12008                  *      to remove write permission.
12009                  */
12010
12011                 if (!old_entry->needs_copy &&
12012                     (old_entry->protection & VM_PROT_WRITE)) {
12013                         vm_prot_t prot;
12014
12015                         assert(!pmap_has_prot_policy(old_entry->protection));
12016
12017                         prot = old_entry->protection & ~VM_PROT_WRITE;
12018
12019                         assert(!pmap_has_prot_policy(prot));
12020
12021                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
12022                                 prot |= VM_PROT_EXECUTE;
12023
12024
12025                         if (old_map->mapped_in_other_pmaps) {
12026                                 vm_object_pmap_protect(
12027                                         VME_OBJECT(old_entry),
12028                                         VME_OFFSET(old_entry),
12029                                         (old_entry->vme_end -
12030                                          old_entry->vme_start),
12031                                         PMAP_NULL,
12032                                         old_entry->vme_start,
12033                                         prot);
12034                         } else {
12035                                 pmap_protect(old_map->pmap,
12036                                              old_entry->vme_start,
12037                                              old_entry->vme_end,
12038                                              prot);
12039                         }
12040                 }
12041
12042                 old_entry->needs_copy = FALSE;
12043                 object = VME_OBJECT(old_entry);
12044         }
12045
12046
12047         /*
12048          *      If object was using a symmetric copy strategy,
12049          *      change its copy strategy to the default
12050          *      asymmetric copy strategy, which is copy_delay
12051          *      in the non-norma case and copy_call in the
12052          *      norma case. Bump the reference count for the
12053          *      new entry.
12054          */
12055
12056         if(old_entry->is_sub_map) {
12057                 vm_map_lock(VME_SUBMAP(old_entry));
12058                 vm_map_reference(VME_SUBMAP(old_entry));
12059                 vm_map_unlock(VME_SUBMAP(old_entry));
12060         } else {
12061                 vm_object_lock(object);
12062                 vm_object_reference_locked(object);
12063                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12064                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12065                 }
12066                 vm_object_unlock(object);
12067         }
12068
12069         /*
12070          *      Clone the entry, using object ref from above.
12071          *      Mark both entries as shared.
12072          */
12073
12074         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12075                                                           * map or descendants */
12076         vm_map_entry_copy(new_entry, old_entry);
12077         old_entry->is_shared = TRUE;
12078         new_entry->is_shared = TRUE;
12079
12080         /*
12081          * We're dealing with a shared mapping, so the resulting mapping
12082          * should inherit some of the original mapping's accounting settings.
12083          * "iokit_acct" should have been cleared in vm_map_entry_copy().
12084          * "use_pmap" should stay the same as before (if it hasn't been reset
12085          * to TRUE when we cleared "iokit_acct").
12086          */
12087         assert(!new_entry->iokit_acct);
12088
12089         /*
12090          *      If old entry's inheritence is VM_INHERIT_NONE,
12091          *      the new entry is for corpse fork, remove the
12092          *      write permission from the new entry.
12093          */
12094         if (old_entry->inheritance == VM_INHERIT_NONE) {
12095
12096                 new_entry->protection &= ~VM_PROT_WRITE;
12097                 new_entry->max_protection &= ~VM_PROT_WRITE;
12098         }
12099
12100         /*
12101          *      Insert the entry into the new map -- we
12102          *      know we're inserting at the end of the new
12103          *      map.
12104          */
12105
12106         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12107                                 VM_MAP_KERNEL_FLAGS_NONE);
12108
12109         /*
12110          *      Update the physical map
12111          */
12112
12113         if (old_entry->is_sub_map) {
12114                 /* Bill Angell pmap support goes here */
12115         } else {
12116                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12117                           old_entry->vme_end - old_entry->vme_start,
12118                           old_entry->vme_start);
12119         }
12120 }
12121
12122 static boolean_t
12123 vm_map_fork_copy(
12124         vm_map_t        old_map,
12125         vm_map_entry_t  *old_entry_p,
12126         vm_map_t        new_map,
12127         int             vm_map_copyin_flags)
12128 {
12129         vm_map_entry_t old_entry = *old_entry_p;
12130         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12131         vm_map_offset_t start = old_entry->vme_start;
12132         vm_map_copy_t copy;
12133         vm_map_entry_t last = vm_map_last_entry(new_map);
12134
12135         vm_map_unlock(old_map);
12136         /*
12137          *      Use maxprot version of copyin because we
12138          *      care about whether this memory can ever
12139          *      be accessed, not just whether it's accessible
12140          *      right now.
12141          */
12142         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12143         if (vm_map_copyin_internal(old_map, start, entry_size,
12144                                    vm_map_copyin_flags, &copy)
12145             != KERN_SUCCESS) {
12146                 /*
12147                  *      The map might have changed while it
12148                  *      was unlocked, check it again.  Skip
12149                  *      any blank space or permanently
12150                  *      unreadable region.
12151                  */
12152                 vm_map_lock(old_map);
12153                 if (!vm_map_lookup_entry(old_map, start, &last) ||
12154                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12155                         last = last->vme_next;
12156                 }
12157                 *old_entry_p = last;
12158
12159                 /*
12160                  * XXX  For some error returns, want to
12161                  * XXX  skip to the next element.  Note
12162                  *      that INVALID_ADDRESS and
12163                  *      PROTECTION_FAILURE are handled above.
12164                  */
12165
12166                 return FALSE;
12167         }
12168
12169         /*
12170          *      Insert the copy into the new map
12171          */
12172
12173         vm_map_copy_insert(new_map, last, copy);
12174
12175         /*
12176          *      Pick up the traversal at the end of
12177          *      the copied region.
12178          */
12179
12180         vm_map_lock(old_map);
12181         start += entry_size;
12182         if (! vm_map_lookup_entry(old_map, start, &last)) {
12183                 last = last->vme_next;
12184         } else {
12185                 if (last->vme_start == start) {
12186                         /*
12187                          * No need to clip here and we don't
12188                          * want to cause any unnecessary
12189                          * unnesting...
12190                          */
12191                 } else {
12192                         vm_map_clip_start(old_map, last, start);
12193                 }
12194         }
12195         *old_entry_p = last;
12196
12197         return TRUE;
12198 }
12199
12200 /*
12201  *      vm_map_fork:
12202  *
12203  *      Create and return a new map based on the old
12204  *      map, according to the inheritance values on the
12205  *      regions in that map and the options.
12206  *
12207  *      The source map must not be locked.
12208  */
12209 vm_map_t
12210 vm_map_fork(
12211         ledger_t        ledger,
12212         vm_map_t        old_map,
12213         int             options)
12214 {
12215         pmap_t          new_pmap;
12216         vm_map_t        new_map;
12217         vm_map_entry_t  old_entry;
12218         vm_map_size_t   new_size = 0, entry_size;
12219         vm_map_entry_t  new_entry;
12220         boolean_t       src_needs_copy;
12221         boolean_t       new_entry_needs_copy;
12222         boolean_t       pmap_is64bit;
12223         int             vm_map_copyin_flags;
12224         vm_inherit_t    old_entry_inheritance;
12225         int             map_create_options;
12226         kern_return_t   footprint_collect_kr;
12227
12228         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12229                         VM_MAP_FORK_PRESERVE_PURGEABLE |
12230                         VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12231                 /* unsupported option */
12232                 return VM_MAP_NULL;
12233         }
12234
12235         pmap_is64bit =
12236 #if defined(__i386__) || defined(__x86_64__)
12237                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12238 #elif defined(__arm64__)
12239                                old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12240 #elif defined(__arm__)
12241                                FALSE;
12242 #else
12243 #error Unknown architecture.
12244 #endif
12245
12246         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
12247
12248         vm_map_reference_swap(old_map);
12249         vm_map_lock(old_map);
12250
12251         map_create_options = 0;
12252         if (old_map->hdr.entries_pageable) {
12253                 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12254         }
12255         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12256                 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12257                 footprint_collect_kr = KERN_SUCCESS;
12258         }
12259         new_map = vm_map_create_options(new_pmap,
12260                                         old_map->min_offset,
12261                                         old_map->max_offset,
12262                                         map_create_options);
12263         vm_map_lock(new_map);
12264         vm_commit_pagezero_status(new_map);
12265         /* inherit the parent map's page size */
12266         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12267         for (
12268                 old_entry = vm_map_first_entry(old_map);
12269                 old_entry != vm_map_to_entry(old_map);
12270                 ) {
12271
12272                 entry_size = old_entry->vme_end - old_entry->vme_start;
12273
12274                 old_entry_inheritance = old_entry->inheritance;
12275                 /*
12276                  * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12277                  * share VM_INHERIT_NONE entries that are not backed by a
12278                  * device pager.
12279                  */
12280                 if (old_entry_inheritance == VM_INHERIT_NONE &&
12281                     (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12282                     !(!old_entry->is_sub_map &&
12283                       VME_OBJECT(old_entry) != NULL &&
12284                       VME_OBJECT(old_entry)->pager != NULL &&
12285                       is_device_pager_ops(
12286                               VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12287                         old_entry_inheritance = VM_INHERIT_SHARE;
12288                 }
12289
12290                 if (old_entry_inheritance != VM_INHERIT_NONE &&
12291                     (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12292                     footprint_collect_kr == KERN_SUCCESS) {
12293                         /*
12294                          * The corpse won't have old_map->pmap to query
12295                          * footprint information, so collect that data now
12296                          * and store it in new_map->vmmap_corpse_footprint
12297                          * for later autopsy.
12298                          */
12299                         footprint_collect_kr =
12300                                 vm_map_corpse_footprint_collect(old_map,
12301                                                                 old_entry,
12302                                                                 new_map);
12303                 }
12304
12305                 switch (old_entry_inheritance) {
12306                 case VM_INHERIT_NONE:
12307                         break;
12308
12309                 case VM_INHERIT_SHARE:
12310                         vm_map_fork_share(old_map, old_entry, new_map);
12311                         new_size += entry_size;
12312                         break;
12313
12314                 case VM_INHERIT_COPY:
12315
12316                         /*
12317                          *      Inline the copy_quickly case;
12318                          *      upon failure, fall back on call
12319                          *      to vm_map_fork_copy.
12320                          */
12321
12322                         if(old_entry->is_sub_map)
12323                                 break;
12324                         if ((old_entry->wired_count != 0) ||
12325                             ((VME_OBJECT(old_entry) != NULL) &&
12326                              (VME_OBJECT(old_entry)->true_share))) {
12327                                 goto slow_vm_map_fork_copy;
12328                         }
12329
12330                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12331                         vm_map_entry_copy(new_entry, old_entry);
12332                         if (new_entry->is_sub_map) {
12333                                 /* clear address space specifics */
12334                                 new_entry->use_pmap = FALSE;
12335                         } else {
12336                                 /*
12337                                  * We're dealing with a copy-on-write operation,
12338                                  * so the resulting mapping should not inherit
12339                                  * the original mapping's accounting settings.
12340                                  * "iokit_acct" should have been cleared in
12341                                  * vm_map_entry_copy().
12342                                  * "use_pmap" should be reset to its default
12343                                  * (TRUE) so that the new mapping gets
12344                                  * accounted for in the task's memory footprint.
12345                                  */
12346                                 assert(!new_entry->iokit_acct);
12347                                 new_entry->use_pmap = TRUE;
12348                         }
12349
12350                         if (! vm_object_copy_quickly(
12351                                     &VME_OBJECT(new_entry),
12352                                     VME_OFFSET(old_entry),
12353                                     (old_entry->vme_end -
12354                                      old_entry->vme_start),
12355                                     &src_needs_copy,
12356                                     &new_entry_needs_copy)) {
12357                                 vm_map_entry_dispose(new_map, new_entry);
12358                                 goto slow_vm_map_fork_copy;
12359                         }
12360
12361                         /*
12362                          *      Handle copy-on-write obligations
12363                          */
12364
12365                         if (src_needs_copy && !old_entry->needs_copy) {
12366                                 vm_prot_t prot;
12367
12368                                 assert(!pmap_has_prot_policy(old_entry->protection));
12369
12370                                 prot = old_entry->protection & ~VM_PROT_WRITE;
12371
12372                                 if (override_nx(old_map, VME_ALIAS(old_entry))
12373                                     && prot)
12374                                         prot |= VM_PROT_EXECUTE;
12375
12376                                 assert(!pmap_has_prot_policy(prot));
12377
12378                                 vm_object_pmap_protect(
12379                                         VME_OBJECT(old_entry),
12380                                         VME_OFFSET(old_entry),
12381                                         (old_entry->vme_end -
12382                                          old_entry->vme_start),
12383                                         ((old_entry->is_shared
12384                                           || old_map->mapped_in_other_pmaps)
12385                                          ? PMAP_NULL :
12386                                          old_map->pmap),
12387                                         old_entry->vme_start,
12388                                         prot);
12389
12390                                 assert(old_entry->wired_count == 0);
12391                                 old_entry->needs_copy = TRUE;
12392                         }
12393                         new_entry->needs_copy = new_entry_needs_copy;
12394
12395                         /*
12396                          *      Insert the entry at the end
12397                          *      of the map.
12398                          */
12399
12400                         vm_map_store_entry_link(new_map,
12401                                                 vm_map_last_entry(new_map),
12402                                                 new_entry,
12403                                                 VM_MAP_KERNEL_FLAGS_NONE);
12404                         new_size += entry_size;
12405                         break;
12406
12407                 slow_vm_map_fork_copy:
12408                         vm_map_copyin_flags = 0;
12409                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12410                                 vm_map_copyin_flags |=
12411                                         VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12412                         }
12413                         if (vm_map_fork_copy(old_map,
12414                                              &old_entry,
12415                                              new_map,
12416                                              vm_map_copyin_flags)) {
12417                                 new_size += entry_size;
12418                         }
12419                         continue;
12420                 }
12421                 old_entry = old_entry->vme_next;
12422         }
12423
12424 #if defined(__arm64__)
12425         pmap_insert_sharedpage(new_map->pmap);
12426 #endif
12427
12428         new_map->size = new_size;
12429
12430         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12431                 vm_map_corpse_footprint_collect_done(new_map);
12432         }
12433
12434         vm_map_unlock(new_map);
12435         vm_map_unlock(old_map);
12436         vm_map_deallocate(old_map);
12437
12438         return(new_map);
12439 }
12440
12441 /*
12442  * vm_map_exec:
12443  *
12444  *      Setup the "new_map" with the proper execution environment according
12445  *      to the type of executable (platform, 64bit, chroot environment).
12446  *      Map the comm page and shared region, etc...
12447  */
12448 kern_return_t
12449 vm_map_exec(
12450         vm_map_t        new_map,
12451         task_t          task,
12452         boolean_t       is64bit,
12453         void            *fsroot,
12454         cpu_type_t      cpu,
12455         cpu_subtype_t   cpu_subtype)
12456 {
12457         SHARED_REGION_TRACE_DEBUG(
12458                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12459                  (void *)VM_KERNEL_ADDRPERM(current_task()),
12460                  (void *)VM_KERNEL_ADDRPERM(new_map),
12461                  (void *)VM_KERNEL_ADDRPERM(task),
12462                  (void *)VM_KERNEL_ADDRPERM(fsroot),
12463                  cpu,
12464                  cpu_subtype));
12465         (void) vm_commpage_enter(new_map, task, is64bit);
12466         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12467         SHARED_REGION_TRACE_DEBUG(
12468                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12469                  (void *)VM_KERNEL_ADDRPERM(current_task()),
12470                  (void *)VM_KERNEL_ADDRPERM(new_map),
12471                  (void *)VM_KERNEL_ADDRPERM(task),
12472                  (void *)VM_KERNEL_ADDRPERM(fsroot),
12473                  cpu,
12474                  cpu_subtype));
12475         return KERN_SUCCESS;
12476 }
12477
12478 /*
12479  *      vm_map_lookup_locked:
12480  *
12481  *      Finds the VM object, offset, and
12482  *      protection for a given virtual address in the
12483  *      specified map, assuming a page fault of the
12484  *      type specified.
12485  *
12486  *      Returns the (object, offset, protection) for
12487  *      this address, whether it is wired down, and whether
12488  *      this map has the only reference to the data in question.
12489  *      In order to later verify this lookup, a "version"
12490  *      is returned.
12491  *
12492  *      The map MUST be locked by the caller and WILL be
12493  *      locked on exit.  In order to guarantee the
12494  *      existence of the returned object, it is returned
12495  *      locked.
12496  *
12497  *      If a lookup is requested with "write protection"
12498  *      specified, the map may be changed to perform virtual
12499  *      copying operations, although the data referenced will
12500  *      remain the same.
12501  */
12502 kern_return_t
12503 vm_map_lookup_locked(
12504         vm_map_t                *var_map,       /* IN/OUT */
12505         vm_map_offset_t         vaddr,
12506         vm_prot_t               fault_type,
12507         int                     object_lock_type,
12508         vm_map_version_t        *out_version,   /* OUT */
12509         vm_object_t             *object,        /* OUT */
12510         vm_object_offset_t      *offset,        /* OUT */
12511         vm_prot_t               *out_prot,      /* OUT */
12512         boolean_t               *wired,         /* OUT */
12513         vm_object_fault_info_t  fault_info,     /* OUT */
12514         vm_map_t                *real_map)
12515 {
12516         vm_map_entry_t                  entry;
12517         vm_map_t                        map = *var_map;
12518         vm_map_t                        old_map = *var_map;
12519         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
12520         vm_map_offset_t                 cow_parent_vaddr = 0;
12521         vm_map_offset_t                 old_start = 0;
12522         vm_map_offset_t                 old_end = 0;
12523         vm_prot_t                       prot;
12524         boolean_t                       mask_protections;
12525         boolean_t                       force_copy;
12526         vm_prot_t                       original_fault_type;
12527
12528         /*
12529          * VM_PROT_MASK means that the caller wants us to use "fault_type"
12530          * as a mask against the mapping's actual protections, not as an
12531          * absolute value.
12532          */
12533         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12534         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12535         fault_type &= VM_PROT_ALL;
12536         original_fault_type = fault_type;
12537
12538         *real_map = map;
12539
12540 RetryLookup:
12541         fault_type = original_fault_type;
12542
12543         /*
12544          *      If the map has an interesting hint, try it before calling
12545          *      full blown lookup routine.
12546          */
12547         entry = map->hint;
12548
12549         if ((entry == vm_map_to_entry(map)) ||
12550             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12551                 vm_map_entry_t  tmp_entry;
12552
12553                 /*
12554                  *      Entry was either not a valid hint, or the vaddr
12555                  *      was not contained in the entry, so do a full lookup.
12556                  */
12557                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12558                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
12559                                 vm_map_unlock(cow_sub_map_parent);
12560                         if((*real_map != map)
12561                            && (*real_map != cow_sub_map_parent))
12562                                 vm_map_unlock(*real_map);
12563                         return KERN_INVALID_ADDRESS;
12564                 }
12565
12566                 entry = tmp_entry;
12567         }
12568         if(map == old_map) {
12569                 old_start = entry->vme_start;
12570                 old_end = entry->vme_end;
12571         }
12572
12573         /*
12574          *      Handle submaps.  Drop lock on upper map, submap is
12575          *      returned locked.
12576          */
12577
12578 submap_recurse:
12579         if (entry->is_sub_map) {
12580                 vm_map_offset_t         local_vaddr;
12581                 vm_map_offset_t         end_delta;
12582                 vm_map_offset_t         start_delta;
12583                 vm_map_entry_t          submap_entry;
12584                 vm_prot_t               subentry_protection;
12585                 vm_prot_t               subentry_max_protection;
12586                 boolean_t               mapped_needs_copy=FALSE;
12587
12588                 local_vaddr = vaddr;
12589
12590                 if ((entry->use_pmap &&
12591                      ! ((fault_type & VM_PROT_WRITE) ||
12592                         force_copy))) {
12593                         /* if real_map equals map we unlock below */
12594                         if ((*real_map != map) &&
12595                             (*real_map != cow_sub_map_parent))
12596                                 vm_map_unlock(*real_map);
12597                         *real_map = VME_SUBMAP(entry);
12598                 }
12599
12600                 if(entry->needs_copy &&
12601                    ((fault_type & VM_PROT_WRITE) ||
12602                     force_copy)) {
12603                         if (!mapped_needs_copy) {
12604                                 if (vm_map_lock_read_to_write(map)) {
12605                                         vm_map_lock_read(map);
12606                                         *real_map = map;
12607                                         goto RetryLookup;
12608                                 }
12609                                 vm_map_lock_read(VME_SUBMAP(entry));
12610                                 *var_map = VME_SUBMAP(entry);
12611                                 cow_sub_map_parent = map;
12612                                 /* reset base to map before cow object */
12613                                 /* this is the map which will accept   */
12614                                 /* the new cow object */
12615                                 old_start = entry->vme_start;
12616                                 old_end = entry->vme_end;
12617                                 cow_parent_vaddr = vaddr;
12618                                 mapped_needs_copy = TRUE;
12619                         } else {
12620                                 vm_map_lock_read(VME_SUBMAP(entry));
12621                                 *var_map = VME_SUBMAP(entry);
12622                                 if((cow_sub_map_parent != map) &&
12623                                    (*real_map != map))
12624                                         vm_map_unlock(map);
12625                         }
12626                 } else {
12627                         vm_map_lock_read(VME_SUBMAP(entry));
12628                         *var_map = VME_SUBMAP(entry);
12629                         /* leave map locked if it is a target */
12630                         /* cow sub_map above otherwise, just  */
12631                         /* follow the maps down to the object */
12632                         /* here we unlock knowing we are not  */
12633                         /* revisiting the map.  */
12634                         if((*real_map != map) && (map != cow_sub_map_parent))
12635                                 vm_map_unlock_read(map);
12636                 }
12637
12638                 map = *var_map;
12639
12640                 /* calculate the offset in the submap for vaddr */
12641                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12642
12643         RetrySubMap:
12644                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12645                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
12646                                 vm_map_unlock(cow_sub_map_parent);
12647                         }
12648                         if((*real_map != map)
12649                            && (*real_map != cow_sub_map_parent)) {
12650                                 vm_map_unlock(*real_map);
12651                         }
12652                         *real_map = map;
12653                         return KERN_INVALID_ADDRESS;
12654                 }
12655
12656                 /* find the attenuated shadow of the underlying object */
12657                 /* on our target map */
12658
12659                 /* in english the submap object may extend beyond the     */
12660                 /* region mapped by the entry or, may only fill a portion */
12661                 /* of it.  For our purposes, we only care if the object   */
12662                 /* doesn't fill.  In this case the area which will        */
12663                 /* ultimately be clipped in the top map will only need    */
12664                 /* to be as big as the portion of the underlying entry    */
12665                 /* which is mapped */
12666                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12667                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
12668
12669                 end_delta =
12670                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12671                         submap_entry->vme_end ?
12672                         0 : (VME_OFFSET(entry) +
12673                              (old_end - old_start))
12674                         - submap_entry->vme_end;
12675
12676                 old_start += start_delta;
12677                 old_end -= end_delta;
12678
12679                 if(submap_entry->is_sub_map) {
12680                         entry = submap_entry;
12681                         vaddr = local_vaddr;
12682                         goto submap_recurse;
12683                 }
12684
12685                 if (((fault_type & VM_PROT_WRITE) ||
12686                      force_copy)
12687                     && cow_sub_map_parent) {
12688
12689                         vm_object_t     sub_object, copy_object;
12690                         vm_object_offset_t copy_offset;
12691                         vm_map_offset_t local_start;
12692                         vm_map_offset_t local_end;
12693                         boolean_t               copied_slowly = FALSE;
12694
12695                         if (vm_map_lock_read_to_write(map)) {
12696                                 vm_map_lock_read(map);
12697                                 old_start -= start_delta;
12698                                 old_end += end_delta;
12699                                 goto RetrySubMap;
12700                         }
12701
12702
12703                         sub_object = VME_OBJECT(submap_entry);
12704                         if (sub_object == VM_OBJECT_NULL) {
12705                                 sub_object =
12706                                         vm_object_allocate(
12707                                                 (vm_map_size_t)
12708                                                 (submap_entry->vme_end -
12709                                                  submap_entry->vme_start));
12710                                 VME_OBJECT_SET(submap_entry, sub_object);
12711                                 VME_OFFSET_SET(submap_entry, 0);
12712                                 assert(!submap_entry->is_sub_map);
12713                                 assert(submap_entry->use_pmap);
12714                         }
12715                         local_start =  local_vaddr -
12716                                 (cow_parent_vaddr - old_start);
12717                         local_end = local_vaddr +
12718                                 (old_end - cow_parent_vaddr);
12719                         vm_map_clip_start(map, submap_entry, local_start);
12720                         vm_map_clip_end(map, submap_entry, local_end);
12721                         if (submap_entry->is_sub_map) {
12722                                 /* unnesting was done when clipping */
12723                                 assert(!submap_entry->use_pmap);
12724                         }
12725
12726                         /* This is the COW case, lets connect */
12727                         /* an entry in our space to the underlying */
12728                         /* object in the submap, bypassing the  */
12729                         /* submap. */
12730
12731
12732                         if(submap_entry->wired_count != 0 ||
12733                            (sub_object->copy_strategy ==
12734                             MEMORY_OBJECT_COPY_NONE)) {
12735                                 vm_object_lock(sub_object);
12736                                 vm_object_copy_slowly(sub_object,
12737                                                       VME_OFFSET(submap_entry),
12738                                                       (submap_entry->vme_end -
12739                                                        submap_entry->vme_start),
12740                                                       FALSE,
12741                                                       &copy_object);
12742                                 copied_slowly = TRUE;
12743                         } else {
12744
12745                                 /* set up shadow object */
12746                                 copy_object = sub_object;
12747                                 vm_object_lock(sub_object);
12748                                 vm_object_reference_locked(sub_object);
12749                                 sub_object->shadowed = TRUE;
12750                                 vm_object_unlock(sub_object);
12751
12752                                 assert(submap_entry->wired_count == 0);
12753                                 submap_entry->needs_copy = TRUE;
12754
12755                                 prot = submap_entry->protection;
12756                                 assert(!pmap_has_prot_policy(prot));
12757                                 prot = prot & ~VM_PROT_WRITE;
12758                                 assert(!pmap_has_prot_policy(prot));
12759
12760                                 if (override_nx(old_map,
12761                                                 VME_ALIAS(submap_entry))
12762                                     && prot)
12763                                         prot |= VM_PROT_EXECUTE;
12764
12765                                 vm_object_pmap_protect(
12766                                         sub_object,
12767                                         VME_OFFSET(submap_entry),
12768                                         submap_entry->vme_end -
12769                                         submap_entry->vme_start,
12770                                         (submap_entry->is_shared
12771                                          || map->mapped_in_other_pmaps) ?
12772                                         PMAP_NULL : map->pmap,
12773                                         submap_entry->vme_start,
12774                                         prot);
12775                         }
12776
12777                         /*
12778                          * Adjust the fault offset to the submap entry.
12779                          */
12780                         copy_offset = (local_vaddr -
12781                                        submap_entry->vme_start +
12782                                        VME_OFFSET(submap_entry));
12783
12784                         /* This works diffently than the   */
12785                         /* normal submap case. We go back  */
12786                         /* to the parent of the cow map and*/
12787                         /* clip out the target portion of  */
12788                         /* the sub_map, substituting the   */
12789                         /* new copy object,                */
12790
12791                         subentry_protection = submap_entry->protection;
12792                         subentry_max_protection = submap_entry->max_protection;
12793                         vm_map_unlock(map);
12794                         submap_entry = NULL; /* not valid after map unlock */
12795
12796                         local_start = old_start;
12797                         local_end = old_end;
12798                         map = cow_sub_map_parent;
12799                         *var_map = cow_sub_map_parent;
12800                         vaddr = cow_parent_vaddr;
12801                         cow_sub_map_parent = NULL;
12802
12803                         if(!vm_map_lookup_entry(map,
12804                                                 vaddr, &entry)) {
12805                                 vm_object_deallocate(
12806                                         copy_object);
12807                                 vm_map_lock_write_to_read(map);
12808                                 return KERN_INVALID_ADDRESS;
12809                         }
12810
12811                         /* clip out the portion of space */
12812                         /* mapped by the sub map which   */
12813                         /* corresponds to the underlying */
12814                         /* object */
12815
12816                         /*
12817                          * Clip (and unnest) the smallest nested chunk
12818                          * possible around the faulting address...
12819                          */
12820                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
12821                         local_end = local_start + pmap_nesting_size_min;
12822                         /*
12823                          * ... but don't go beyond the "old_start" to "old_end"
12824                          * range, to avoid spanning over another VM region
12825                          * with a possibly different VM object and/or offset.
12826                          */
12827                         if (local_start < old_start) {
12828                                 local_start = old_start;
12829                         }
12830                         if (local_end > old_end) {
12831                                 local_end = old_end;
12832                         }
12833                         /*
12834                          * Adjust copy_offset to the start of the range.
12835                          */
12836                         copy_offset -= (vaddr - local_start);
12837
12838                         vm_map_clip_start(map, entry, local_start);
12839                         vm_map_clip_end(map, entry, local_end);
12840                         if (entry->is_sub_map) {
12841                                 /* unnesting was done when clipping */
12842                                 assert(!entry->use_pmap);
12843                         }
12844
12845                         /* substitute copy object for */
12846                         /* shared map entry           */
12847                         vm_map_deallocate(VME_SUBMAP(entry));
12848                         assert(!entry->iokit_acct);
12849                         entry->is_sub_map = FALSE;
12850                         entry->use_pmap = TRUE;
12851                         VME_OBJECT_SET(entry, copy_object);
12852
12853                         /* propagate the submap entry's protections */
12854                         if (entry->protection != VM_PROT_READ) {
12855                                 /*
12856                                  * Someone has already altered the top entry's
12857                                  * protections via vm_protect(VM_PROT_COPY).
12858                                  * Respect these new values and ignore the
12859                                  * submap entry's protections.
12860                                  */
12861                         } else {
12862                                 /*
12863                                  * Regular copy-on-write: propagate the submap
12864                                  * entry's protections to the top map entry.
12865                                  */
12866                                 entry->protection |= subentry_protection;
12867                         }
12868                         entry->max_protection |= subentry_max_protection;
12869
12870                         if ((entry->protection & VM_PROT_WRITE) &&
12871                             (entry->protection & VM_PROT_EXECUTE) &&
12872 #if !CONFIG_EMBEDDED
12873                             map != kernel_map &&
12874                             cs_process_enforcement(NULL) &&
12875 #endif /* !CONFIG_EMBEDDED */
12876                             !(entry->used_for_jit)) {
12877                                 DTRACE_VM3(cs_wx,
12878                                            uint64_t, (uint64_t)entry->vme_start,
12879                                            uint64_t, (uint64_t)entry->vme_end,
12880                                            vm_prot_t, entry->protection);
12881                                 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
12882                                        proc_selfpid(),
12883                                        (current_task()->bsd_info
12884                                         ? proc_name_address(current_task()->bsd_info)
12885                                         : "?"),
12886                                        __FUNCTION__);
12887                                 entry->protection &= ~VM_PROT_EXECUTE;
12888                         }
12889
12890                         if(copied_slowly) {
12891                                 VME_OFFSET_SET(entry, local_start - old_start);
12892                                 entry->needs_copy = FALSE;
12893                                 entry->is_shared = FALSE;
12894                         } else {
12895                                 VME_OFFSET_SET(entry, copy_offset);
12896                                 assert(entry->wired_count == 0);
12897                                 entry->needs_copy = TRUE;
12898                                 if(entry->inheritance == VM_INHERIT_SHARE)
12899                                         entry->inheritance = VM_INHERIT_COPY;
12900                                 if (map != old_map)
12901                                         entry->is_shared = TRUE;
12902                         }
12903                         if(entry->inheritance == VM_INHERIT_SHARE)
12904                                 entry->inheritance = VM_INHERIT_COPY;
12905
12906                         vm_map_lock_write_to_read(map);
12907                 } else {
12908                         if((cow_sub_map_parent)
12909                            && (cow_sub_map_parent != *real_map)
12910                            && (cow_sub_map_parent != map)) {
12911                                 vm_map_unlock(cow_sub_map_parent);
12912                         }
12913                         entry = submap_entry;
12914                         vaddr = local_vaddr;
12915                 }
12916         }
12917
12918         /*
12919          *      Check whether this task is allowed to have
12920          *      this page.
12921          */
12922
12923         prot = entry->protection;
12924
12925         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
12926                 /*
12927                  * HACK -- if not a stack, then allow execution
12928                  */
12929                 prot |= VM_PROT_EXECUTE;
12930         }
12931
12932         if (mask_protections) {
12933                 fault_type &= prot;
12934                 if (fault_type == VM_PROT_NONE) {
12935                         goto protection_failure;
12936                 }
12937         }
12938         if (((fault_type & prot) != fault_type)
12939 #if __arm64__
12940             /* prefetch abort in execute-only page */
12941             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
12942 #endif
12943             ) {
12944         protection_failure:
12945                 if (*real_map != map) {
12946                         vm_map_unlock(*real_map);
12947                 }
12948                 *real_map = map;
12949
12950                 if ((fault_type & VM_PROT_EXECUTE) && prot)
12951                         log_stack_execution_failure((addr64_t)vaddr, prot);
12952
12953                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
12954                 return KERN_PROTECTION_FAILURE;
12955         }
12956
12957         /*
12958          *      If this page is not pageable, we have to get
12959          *      it for all possible accesses.
12960          */
12961
12962         *wired = (entry->wired_count != 0);
12963         if (*wired)
12964                 fault_type = prot;
12965
12966         /*
12967          *      If the entry was copy-on-write, we either ...
12968          */
12969
12970         if (entry->needs_copy) {
12971                 /*
12972                  *      If we want to write the page, we may as well
12973                  *      handle that now since we've got the map locked.
12974                  *
12975                  *      If we don't need to write the page, we just
12976                  *      demote the permissions allowed.
12977                  */
12978
12979                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
12980                         /*
12981                          *      Make a new object, and place it in the
12982                          *      object chain.  Note that no new references
12983                          *      have appeared -- one just moved from the
12984                          *      map to the new object.
12985                          */
12986
12987                         if (vm_map_lock_read_to_write(map)) {
12988                                 vm_map_lock_read(map);
12989                                 goto RetryLookup;
12990                         }
12991
12992                         if (VME_OBJECT(entry)->shadowed == FALSE) {
12993                                 vm_object_lock(VME_OBJECT(entry));
12994                                 VME_OBJECT(entry)->shadowed = TRUE;
12995                                 vm_object_unlock(VME_OBJECT(entry));
12996                         }
12997                         VME_OBJECT_SHADOW(entry,
12998                                           (vm_map_size_t) (entry->vme_end -
12999                                                            entry->vme_start));
13000                         entry->needs_copy = FALSE;
13001
13002                         vm_map_lock_write_to_read(map);
13003                 }
13004                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13005                         /*
13006                          *      We're attempting to read a copy-on-write
13007                          *      page -- don't allow writes.
13008                          */
13009
13010                         prot &= (~VM_PROT_WRITE);
13011                 }
13012         }
13013
13014         /*
13015          *      Create an object if necessary.
13016          */
13017         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13018
13019                 if (vm_map_lock_read_to_write(map)) {
13020                         vm_map_lock_read(map);
13021                         goto RetryLookup;
13022                 }
13023
13024                 VME_OBJECT_SET(entry,
13025                                vm_object_allocate(
13026                                        (vm_map_size_t)(entry->vme_end -
13027                                                        entry->vme_start)));
13028                 VME_OFFSET_SET(entry, 0);
13029                 assert(entry->use_pmap);
13030                 vm_map_lock_write_to_read(map);
13031         }
13032
13033         /*
13034          *      Return the object/offset from this entry.  If the entry
13035          *      was copy-on-write or empty, it has been fixed up.  Also
13036          *      return the protection.
13037          */
13038
13039         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13040         *object = VME_OBJECT(entry);
13041         *out_prot = prot;
13042
13043         if (fault_info) {
13044                 fault_info->interruptible = THREAD_UNINT; /* for now... */
13045                 /* ... the caller will change "interruptible" if needed */
13046                 fault_info->cluster_size = 0;
13047                 fault_info->user_tag = VME_ALIAS(entry);
13048                 fault_info->pmap_options = 0;
13049                 if (entry->iokit_acct ||
13050                     (!entry->is_sub_map && !entry->use_pmap)) {
13051                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13052                 }
13053                 fault_info->behavior = entry->behavior;
13054                 fault_info->lo_offset = VME_OFFSET(entry);
13055                 fault_info->hi_offset =
13056                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13057                 fault_info->no_cache  = entry->no_cache;
13058                 fault_info->stealth = FALSE;
13059                 fault_info->io_sync = FALSE;
13060                 if (entry->used_for_jit ||
13061                     entry->vme_resilient_codesign) {
13062                         fault_info->cs_bypass = TRUE;
13063                 } else {
13064                         fault_info->cs_bypass = FALSE;
13065                 }
13066                 fault_info->pmap_cs_associated = FALSE;
13067 #if CONFIG_PMAP_CS
13068                 if (entry->pmap_cs_associated) {
13069                         /*
13070                          * The pmap layer will validate this page
13071                          * before allowing it to be executed from.
13072                          */
13073                         fault_info->pmap_cs_associated = TRUE;
13074                 }
13075 #endif /* CONFIG_PMAP_CS */
13076                 fault_info->mark_zf_absent = FALSE;
13077                 fault_info->batch_pmap_op = FALSE;
13078         }
13079
13080         /*
13081          *      Lock the object to prevent it from disappearing
13082          */
13083         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
13084                 vm_object_lock(*object);
13085         else
13086                 vm_object_lock_shared(*object);
13087
13088         /*
13089          *      Save the version number
13090          */
13091
13092         out_version->main_timestamp = map->timestamp;
13093
13094         return KERN_SUCCESS;
13095 }
13096
13097
13098 /*
13099  *      vm_map_verify:
13100  *
13101  *      Verifies that the map in question has not changed
13102  *      since the given version. The map has to be locked
13103  *      ("shared" mode is fine) before calling this function
13104  *      and it will be returned locked too.
13105  */
13106 boolean_t
13107 vm_map_verify(
13108         vm_map_t                map,
13109         vm_map_version_t        *version)       /* REF */
13110 {
13111         boolean_t       result;
13112
13113         vm_map_lock_assert_held(map);
13114         result = (map->timestamp == version->main_timestamp);
13115
13116         return(result);
13117 }
13118
13119 /*
13120  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13121  *      Goes away after regular vm_region_recurse function migrates to
13122  *      64 bits
13123  *      vm_region_recurse: A form of vm_region which follows the
13124  *      submaps in a target map
13125  *
13126  */
13127
13128 kern_return_t
13129 vm_map_region_recurse_64(
13130         vm_map_t                 map,
13131         vm_map_offset_t *address,               /* IN/OUT */
13132         vm_map_size_t           *size,                  /* OUT */
13133         natural_t               *nesting_depth, /* IN/OUT */
13134         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
13135         mach_msg_type_number_t  *count) /* IN/OUT */
13136 {
13137         mach_msg_type_number_t  original_count;
13138         vm_region_extended_info_data_t  extended;
13139         vm_map_entry_t                  tmp_entry;
13140         vm_map_offset_t                 user_address;
13141         unsigned int                    user_max_depth;
13142
13143         /*
13144          * "curr_entry" is the VM map entry preceding or including the
13145          * address we're looking for.
13146          * "curr_map" is the map or sub-map containing "curr_entry".
13147          * "curr_address" is the equivalent of the top map's "user_address"
13148          * in the current map.
13149          * "curr_offset" is the cumulated offset of "curr_map" in the
13150          * target task's address space.
13151          * "curr_depth" is the depth of "curr_map" in the chain of
13152          * sub-maps.
13153          *
13154          * "curr_max_below" and "curr_max_above" limit the range (around
13155          * "curr_address") we should take into account in the current (sub)map.
13156          * They limit the range to what's visible through the map entries
13157          * we've traversed from the top map to the current map.
13158
13159          */
13160         vm_map_entry_t                  curr_entry;
13161         vm_map_address_t                curr_address;
13162         vm_map_offset_t                 curr_offset;
13163         vm_map_t                        curr_map;
13164         unsigned int                    curr_depth;
13165         vm_map_offset_t                 curr_max_below, curr_max_above;
13166         vm_map_offset_t                 curr_skip;
13167
13168         /*
13169          * "next_" is the same as "curr_" but for the VM region immediately
13170          * after the address we're looking for.  We need to keep track of this
13171          * too because we want to return info about that region if the
13172          * address we're looking for is not mapped.
13173          */
13174         vm_map_entry_t                  next_entry;
13175         vm_map_offset_t                 next_offset;
13176         vm_map_offset_t                 next_address;
13177         vm_map_t                        next_map;
13178         unsigned int                    next_depth;
13179         vm_map_offset_t                 next_max_below, next_max_above;
13180         vm_map_offset_t                 next_skip;
13181
13182         boolean_t                       look_for_pages;
13183         vm_region_submap_short_info_64_t short_info;
13184         boolean_t                       do_region_footprint;
13185
13186         if (map == VM_MAP_NULL) {
13187                 /* no address space to work on */
13188                 return KERN_INVALID_ARGUMENT;
13189         }
13190
13191
13192         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13193                 /*
13194                  * "info" structure is not big enough and
13195                  * would overflow
13196                  */
13197                 return KERN_INVALID_ARGUMENT;
13198         }
13199
13200         do_region_footprint = task_self_region_footprint();
13201         original_count = *count;
13202
13203         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13204                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13205                 look_for_pages = FALSE;
13206                 short_info = (vm_region_submap_short_info_64_t) submap_info;
13207                 submap_info = NULL;
13208         } else {
13209                 look_for_pages = TRUE;
13210                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13211                 short_info = NULL;
13212
13213                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13214                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13215                 }
13216         }
13217
13218         user_address = *address;
13219         user_max_depth = *nesting_depth;
13220
13221         if (not_in_kdp) {
13222                 vm_map_lock_read(map);
13223         }
13224
13225 recurse_again:
13226         curr_entry = NULL;
13227         curr_map = map;
13228         curr_address = user_address;
13229         curr_offset = 0;
13230         curr_skip = 0;
13231         curr_depth = 0;
13232         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13233         curr_max_below = curr_address;
13234
13235         next_entry = NULL;
13236         next_map = NULL;
13237         next_address = 0;
13238         next_offset = 0;
13239         next_skip = 0;
13240         next_depth = 0;
13241         next_max_above = (vm_map_offset_t) -1;
13242         next_max_below = (vm_map_offset_t) -1;
13243
13244         for (;;) {
13245                 if (vm_map_lookup_entry(curr_map,
13246                                         curr_address,
13247                                         &tmp_entry)) {
13248                         /* tmp_entry contains the address we're looking for */
13249                         curr_entry = tmp_entry;
13250                 } else {
13251                         vm_map_offset_t skip;
13252                         /*
13253                          * The address is not mapped.  "tmp_entry" is the
13254                          * map entry preceding the address.  We want the next
13255                          * one, if it exists.
13256                          */
13257                         curr_entry = tmp_entry->vme_next;
13258
13259                         if (curr_entry == vm_map_to_entry(curr_map) ||
13260                             (curr_entry->vme_start >=
13261                              curr_address + curr_max_above)) {
13262                                 /* no next entry at this level: stop looking */
13263                                 if (not_in_kdp) {
13264                                         vm_map_unlock_read(curr_map);
13265                                 }
13266                                 curr_entry = NULL;
13267                                 curr_map = NULL;
13268                                 curr_skip = 0;
13269                                 curr_offset = 0;
13270                                 curr_depth = 0;
13271                                 curr_max_above = 0;
13272                                 curr_max_below = 0;
13273                                 break;
13274                         }
13275
13276                         /* adjust current address and offset */
13277                         skip = curr_entry->vme_start - curr_address;
13278                         curr_address = curr_entry->vme_start;
13279                         curr_skip += skip;
13280                         curr_offset += skip;
13281                         curr_max_above -= skip;
13282                         curr_max_below = 0;
13283                 }
13284
13285                 /*
13286                  * Is the next entry at this level closer to the address (or
13287                  * deeper in the submap chain) than the one we had
13288                  * so far ?
13289                  */
13290                 tmp_entry = curr_entry->vme_next;
13291                 if (tmp_entry == vm_map_to_entry(curr_map)) {
13292                         /* no next entry at this level */
13293                 } else if (tmp_entry->vme_start >=
13294                            curr_address + curr_max_above) {
13295                         /*
13296                          * tmp_entry is beyond the scope of what we mapped of
13297                          * this submap in the upper level: ignore it.
13298                          */
13299                 } else if ((next_entry == NULL) ||
13300                            (tmp_entry->vme_start + curr_offset <=
13301                             next_entry->vme_start + next_offset)) {
13302                         /*
13303                          * We didn't have a "next_entry" or this one is
13304                          * closer to the address we're looking for:
13305                          * use this "tmp_entry" as the new "next_entry".
13306                          */
13307                         if (next_entry != NULL) {
13308                                 /* unlock the last "next_map" */
13309                                 if (next_map != curr_map && not_in_kdp) {
13310                                         vm_map_unlock_read(next_map);
13311                                 }
13312                         }
13313                         next_entry = tmp_entry;
13314                         next_map = curr_map;
13315                         next_depth = curr_depth;
13316                         next_address = next_entry->vme_start;
13317                         next_skip = curr_skip;
13318                         next_skip += (next_address - curr_address);
13319                         next_offset = curr_offset;
13320                         next_offset += (next_address - curr_address);
13321                         next_max_above = MIN(next_max_above, curr_max_above);
13322                         next_max_above = MIN(next_max_above,
13323                                              next_entry->vme_end - next_address);
13324                         next_max_below = MIN(next_max_below, curr_max_below);
13325                         next_max_below = MIN(next_max_below,
13326                                              next_address - next_entry->vme_start);
13327                 }
13328
13329                 /*
13330                  * "curr_max_{above,below}" allow us to keep track of the
13331                  * portion of the submap that is actually mapped at this level:
13332                  * the rest of that submap is irrelevant to us, since it's not
13333                  * mapped here.
13334                  * The relevant portion of the map starts at
13335                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13336                  */
13337                 curr_max_above = MIN(curr_max_above,
13338                                      curr_entry->vme_end - curr_address);
13339                 curr_max_below = MIN(curr_max_below,
13340                                      curr_address - curr_entry->vme_start);
13341
13342                 if (!curr_entry->is_sub_map ||
13343                     curr_depth >= user_max_depth) {
13344                         /*
13345                          * We hit a leaf map or we reached the maximum depth
13346                          * we could, so stop looking.  Keep the current map
13347                          * locked.
13348                          */
13349                         break;
13350                 }
13351
13352                 /*
13353                  * Get down to the next submap level.
13354                  */
13355
13356                 /*
13357                  * Lock the next level and unlock the current level,
13358                  * unless we need to keep it locked to access the "next_entry"
13359                  * later.
13360                  */
13361                 if (not_in_kdp) {
13362                         vm_map_lock_read(VME_SUBMAP(curr_entry));
13363                 }
13364                 if (curr_map == next_map) {
13365                         /* keep "next_map" locked in case we need it */
13366                 } else {
13367                         /* release this map */
13368                         if (not_in_kdp)
13369                                 vm_map_unlock_read(curr_map);
13370                 }
13371
13372                 /*
13373                  * Adjust the offset.  "curr_entry" maps the submap
13374                  * at relative address "curr_entry->vme_start" in the
13375                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
13376                  * bytes of the submap.
13377                  * "curr_offset" always represents the offset of a virtual
13378                  * address in the curr_map relative to the absolute address
13379                  * space (i.e. the top-level VM map).
13380                  */
13381                 curr_offset +=
13382                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13383                 curr_address = user_address + curr_offset;
13384                 /* switch to the submap */
13385                 curr_map = VME_SUBMAP(curr_entry);
13386                 curr_depth++;
13387                 curr_entry = NULL;
13388         }
13389
13390 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13391 // so probably should be a real 32b ID vs. ptr.
13392 // Current users just check for equality
13393
13394         if (curr_entry == NULL) {
13395                 /* no VM region contains the address... */
13396
13397                 if (do_region_footprint && /* we want footprint numbers */
13398                     next_entry == NULL && /* & there are no more regions */
13399                     /* & we haven't already provided our fake region: */
13400                     user_address <= vm_map_last_entry(map)->vme_end) {
13401                         ledger_amount_t nonvol, nonvol_compressed;
13402                         /*
13403                          * Add a fake memory region to account for
13404                          * purgeable memory that counts towards this
13405                          * task's memory footprint, i.e. the resident
13406                          * compressed pages of non-volatile objects
13407                          * owned by that task.
13408                          */
13409                         ledger_get_balance(
13410                                 map->pmap->ledger,
13411                                 task_ledgers.purgeable_nonvolatile,
13412                                 &nonvol);
13413                         ledger_get_balance(
13414                                 map->pmap->ledger,
13415                                 task_ledgers.purgeable_nonvolatile_compressed,
13416                                 &nonvol_compressed);
13417                         if (nonvol + nonvol_compressed == 0) {
13418                                 /* no purgeable memory usage to report */
13419                                 return KERN_INVALID_ADDRESS;
13420                         }
13421                         /* fake region to show nonvolatile footprint */
13422                         if (look_for_pages) {
13423                                 submap_info->protection = VM_PROT_DEFAULT;
13424                                 submap_info->max_protection = VM_PROT_DEFAULT;
13425                                 submap_info->inheritance = VM_INHERIT_DEFAULT;
13426                                 submap_info->offset = 0;
13427                                 submap_info->user_tag = -1;
13428                                 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
13429                                 submap_info->pages_shared_now_private = 0;
13430                                 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
13431                                 submap_info->pages_dirtied = submap_info->pages_resident;
13432                                 submap_info->ref_count = 1;
13433                                 submap_info->shadow_depth = 0;
13434                                 submap_info->external_pager = 0;
13435                                 submap_info->share_mode = SM_PRIVATE;
13436                                 submap_info->is_submap = 0;
13437                                 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13438                                 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13439                                 submap_info->user_wired_count = 0;
13440                                 submap_info->pages_reusable = 0;
13441                         } else {
13442                                 short_info->user_tag = -1;
13443                                 short_info->offset = 0;
13444                                 short_info->protection = VM_PROT_DEFAULT;
13445                                 short_info->inheritance = VM_INHERIT_DEFAULT;
13446                                 short_info->max_protection = VM_PROT_DEFAULT;
13447                                 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13448                                 short_info->user_wired_count = 0;
13449                                 short_info->is_submap = 0;
13450                                 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13451                                 short_info->external_pager = 0;
13452                                 short_info->shadow_depth = 0;
13453                                 short_info->share_mode = SM_PRIVATE;
13454                                 short_info->ref_count = 1;
13455                         }
13456                         *nesting_depth = 0;
13457                         *size = (vm_map_size_t) (nonvol + nonvol_compressed);
13458 //                      *address = user_address;
13459                         *address = vm_map_last_entry(map)->vme_end;
13460                         return KERN_SUCCESS;
13461                 }
13462
13463                 if (next_entry == NULL) {
13464                         /* ... and no VM region follows it either */
13465                         return KERN_INVALID_ADDRESS;
13466                 }
13467                 /* ... gather info about the next VM region */
13468                 curr_entry = next_entry;
13469                 curr_map = next_map;    /* still locked ... */
13470                 curr_address = next_address;
13471                 curr_skip = next_skip;
13472                 curr_offset = next_offset;
13473                 curr_depth = next_depth;
13474                 curr_max_above = next_max_above;
13475                 curr_max_below = next_max_below;
13476         } else {
13477                 /* we won't need "next_entry" after all */
13478                 if (next_entry != NULL) {
13479                         /* release "next_map" */
13480                         if (next_map != curr_map && not_in_kdp) {
13481                                 vm_map_unlock_read(next_map);
13482                         }
13483                 }
13484         }
13485         next_entry = NULL;
13486         next_map = NULL;
13487         next_offset = 0;
13488         next_skip = 0;
13489         next_depth = 0;
13490         next_max_below = -1;
13491         next_max_above = -1;
13492
13493         if (curr_entry->is_sub_map &&
13494             curr_depth < user_max_depth) {
13495                 /*
13496                  * We're not as deep as we could be:  we must have
13497                  * gone back up after not finding anything mapped
13498                  * below the original top-level map entry's.
13499                  * Let's move "curr_address" forward and recurse again.
13500                  */
13501                 user_address = curr_address;
13502                 goto recurse_again;
13503         }
13504
13505         *nesting_depth = curr_depth;
13506         *size = curr_max_above + curr_max_below;
13507         *address = user_address + curr_skip - curr_max_below;
13508
13509 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13510 // so probably should be a real 32b ID vs. ptr.
13511 // Current users just check for equality
13512 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13513
13514         if (look_for_pages) {
13515                 submap_info->user_tag = VME_ALIAS(curr_entry);
13516                 submap_info->offset = VME_OFFSET(curr_entry);
13517                 submap_info->protection = curr_entry->protection;
13518                 submap_info->inheritance = curr_entry->inheritance;
13519                 submap_info->max_protection = curr_entry->max_protection;
13520                 submap_info->behavior = curr_entry->behavior;
13521                 submap_info->user_wired_count = curr_entry->user_wired_count;
13522                 submap_info->is_submap = curr_entry->is_sub_map;
13523                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13524         } else {
13525                 short_info->user_tag = VME_ALIAS(curr_entry);
13526                 short_info->offset = VME_OFFSET(curr_entry);
13527                 short_info->protection = curr_entry->protection;
13528                 short_info->inheritance = curr_entry->inheritance;
13529                 short_info->max_protection = curr_entry->max_protection;
13530                 short_info->behavior = curr_entry->behavior;
13531                 short_info->user_wired_count = curr_entry->user_wired_count;
13532                 short_info->is_submap = curr_entry->is_sub_map;
13533                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13534         }
13535
13536         extended.pages_resident = 0;
13537         extended.pages_swapped_out = 0;
13538         extended.pages_shared_now_private = 0;
13539         extended.pages_dirtied = 0;
13540         extended.pages_reusable = 0;
13541         extended.external_pager = 0;
13542         extended.shadow_depth = 0;
13543         extended.share_mode = SM_EMPTY;
13544         extended.ref_count = 0;
13545
13546         if (not_in_kdp) {
13547                 if (!curr_entry->is_sub_map) {
13548                         vm_map_offset_t range_start, range_end;
13549                         range_start = MAX((curr_address - curr_max_below),
13550                                           curr_entry->vme_start);
13551                         range_end = MIN((curr_address + curr_max_above),
13552                                         curr_entry->vme_end);
13553                         vm_map_region_walk(curr_map,
13554                                            range_start,
13555                                            curr_entry,
13556                                            (VME_OFFSET(curr_entry) +
13557                                             (range_start -
13558                                              curr_entry->vme_start)),
13559                                            range_end - range_start,
13560                                            &extended,
13561                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13562                         if (extended.external_pager &&
13563                             extended.ref_count == 2 &&
13564                             extended.share_mode == SM_SHARED) {
13565                                 extended.share_mode = SM_PRIVATE;
13566                         }
13567                 } else {
13568                         if (curr_entry->use_pmap) {
13569                                 extended.share_mode = SM_TRUESHARED;
13570                         } else {
13571                                 extended.share_mode = SM_PRIVATE;
13572                         }
13573                         extended.ref_count = VME_SUBMAP(curr_entry)->map_refcnt;
13574                 }
13575         }
13576
13577         if (look_for_pages) {
13578                 submap_info->pages_resident = extended.pages_resident;
13579                 submap_info->pages_swapped_out = extended.pages_swapped_out;
13580                 submap_info->pages_shared_now_private =
13581                         extended.pages_shared_now_private;
13582                 submap_info->pages_dirtied = extended.pages_dirtied;
13583                 submap_info->external_pager = extended.external_pager;
13584                 submap_info->shadow_depth = extended.shadow_depth;
13585                 submap_info->share_mode = extended.share_mode;
13586                 submap_info->ref_count = extended.ref_count;
13587
13588                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13589                         submap_info->pages_reusable = extended.pages_reusable;
13590                 }
13591         } else {
13592                 short_info->external_pager = extended.external_pager;
13593                 short_info->shadow_depth = extended.shadow_depth;
13594                 short_info->share_mode = extended.share_mode;
13595                 short_info->ref_count = extended.ref_count;
13596         }
13597
13598         if (not_in_kdp) {
13599                 vm_map_unlock_read(curr_map);
13600         }
13601
13602         return KERN_SUCCESS;
13603 }
13604
13605 /*
13606  *      vm_region:
13607  *
13608  *      User call to obtain information about a region in
13609  *      a task's address map. Currently, only one flavor is
13610  *      supported.
13611  *
13612  *      XXX The reserved and behavior fields cannot be filled
13613  *          in until the vm merge from the IK is completed, and
13614  *          vm_reserve is implemented.
13615  */
13616
13617 kern_return_t
13618 vm_map_region(
13619         vm_map_t                 map,
13620         vm_map_offset_t *address,               /* IN/OUT */
13621         vm_map_size_t           *size,                  /* OUT */
13622         vm_region_flavor_t       flavor,                /* IN */
13623         vm_region_info_t         info,                  /* OUT */
13624         mach_msg_type_number_t  *count, /* IN/OUT */
13625         mach_port_t             *object_name)           /* OUT */
13626 {
13627         vm_map_entry_t          tmp_entry;
13628         vm_map_entry_t          entry;
13629         vm_map_offset_t         start;
13630
13631         if (map == VM_MAP_NULL)
13632                 return(KERN_INVALID_ARGUMENT);
13633
13634         switch (flavor) {
13635
13636         case VM_REGION_BASIC_INFO:
13637                 /* legacy for old 32-bit objects info */
13638         {
13639                 vm_region_basic_info_t  basic;
13640
13641                 if (*count < VM_REGION_BASIC_INFO_COUNT)
13642                         return(KERN_INVALID_ARGUMENT);
13643
13644                 basic = (vm_region_basic_info_t) info;
13645                 *count = VM_REGION_BASIC_INFO_COUNT;
13646
13647                 vm_map_lock_read(map);
13648
13649                 start = *address;
13650                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13651                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13652                                 vm_map_unlock_read(map);
13653                                 return(KERN_INVALID_ADDRESS);
13654                         }
13655                 } else {
13656                         entry = tmp_entry;
13657                 }
13658
13659                 start = entry->vme_start;
13660
13661                 basic->offset = (uint32_t)VME_OFFSET(entry);
13662                 basic->protection = entry->protection;
13663                 basic->inheritance = entry->inheritance;
13664                 basic->max_protection = entry->max_protection;
13665                 basic->behavior = entry->behavior;
13666                 basic->user_wired_count = entry->user_wired_count;
13667                 basic->reserved = entry->is_sub_map;
13668                 *address = start;
13669                 *size = (entry->vme_end - start);
13670
13671                 if (object_name) *object_name = IP_NULL;
13672                 if (entry->is_sub_map) {
13673                         basic->shared = FALSE;
13674                 } else {
13675                         basic->shared = entry->is_shared;
13676                 }
13677
13678                 vm_map_unlock_read(map);
13679                 return(KERN_SUCCESS);
13680         }
13681
13682         case VM_REGION_BASIC_INFO_64:
13683         {
13684                 vm_region_basic_info_64_t       basic;
13685
13686                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
13687                         return(KERN_INVALID_ARGUMENT);
13688
13689                 basic = (vm_region_basic_info_64_t) info;
13690                 *count = VM_REGION_BASIC_INFO_COUNT_64;
13691
13692                 vm_map_lock_read(map);
13693
13694                 start = *address;
13695                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13696                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13697                                 vm_map_unlock_read(map);
13698                                 return(KERN_INVALID_ADDRESS);
13699                         }
13700                 } else {
13701                         entry = tmp_entry;
13702                 }
13703
13704                 start = entry->vme_start;
13705
13706                 basic->offset = VME_OFFSET(entry);
13707                 basic->protection = entry->protection;
13708                 basic->inheritance = entry->inheritance;
13709                 basic->max_protection = entry->max_protection;
13710                 basic->behavior = entry->behavior;
13711                 basic->user_wired_count = entry->user_wired_count;
13712                 basic->reserved = entry->is_sub_map;
13713                 *address = start;
13714                 *size = (entry->vme_end - start);
13715
13716                 if (object_name) *object_name = IP_NULL;
13717                 if (entry->is_sub_map) {
13718                         basic->shared = FALSE;
13719                 } else {
13720                         basic->shared = entry->is_shared;
13721                 }
13722
13723                 vm_map_unlock_read(map);
13724                 return(KERN_SUCCESS);
13725         }
13726         case VM_REGION_EXTENDED_INFO:
13727                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
13728                         return(KERN_INVALID_ARGUMENT);
13729                 /*fallthru*/
13730         case VM_REGION_EXTENDED_INFO__legacy:
13731                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
13732                         return KERN_INVALID_ARGUMENT;
13733
13734         {
13735                 vm_region_extended_info_t       extended;
13736                 mach_msg_type_number_t original_count;
13737
13738                 extended = (vm_region_extended_info_t) info;
13739
13740                 vm_map_lock_read(map);
13741
13742                 start = *address;
13743                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13744                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13745                                 vm_map_unlock_read(map);
13746                                 return(KERN_INVALID_ADDRESS);
13747                         }
13748                 } else {
13749                         entry = tmp_entry;
13750                 }
13751                 start = entry->vme_start;
13752
13753                 extended->protection = entry->protection;
13754                 extended->user_tag = VME_ALIAS(entry);
13755                 extended->pages_resident = 0;
13756                 extended->pages_swapped_out = 0;
13757                 extended->pages_shared_now_private = 0;
13758                 extended->pages_dirtied = 0;
13759                 extended->external_pager = 0;
13760                 extended->shadow_depth = 0;
13761
13762                 original_count = *count;
13763                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13764                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13765                 } else {
13766                         extended->pages_reusable = 0;
13767                         *count = VM_REGION_EXTENDED_INFO_COUNT;
13768                 }
13769
13770                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13771
13772                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
13773                         extended->share_mode = SM_PRIVATE;
13774
13775                 if (object_name)
13776                         *object_name = IP_NULL;
13777                 *address = start;
13778                 *size = (entry->vme_end - start);
13779
13780                 vm_map_unlock_read(map);
13781                 return(KERN_SUCCESS);
13782         }
13783         case VM_REGION_TOP_INFO:
13784         {
13785                 vm_region_top_info_t    top;
13786
13787                 if (*count < VM_REGION_TOP_INFO_COUNT)
13788                         return(KERN_INVALID_ARGUMENT);
13789
13790                 top = (vm_region_top_info_t) info;
13791                 *count = VM_REGION_TOP_INFO_COUNT;
13792
13793                 vm_map_lock_read(map);
13794
13795                 start = *address;
13796                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13797                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13798                                 vm_map_unlock_read(map);
13799                                 return(KERN_INVALID_ADDRESS);
13800                         }
13801                 } else {
13802                         entry = tmp_entry;
13803
13804                 }
13805                 start = entry->vme_start;
13806
13807                 top->private_pages_resident = 0;
13808                 top->shared_pages_resident = 0;
13809
13810                 vm_map_region_top_walk(entry, top);
13811
13812                 if (object_name)
13813                         *object_name = IP_NULL;
13814                 *address = start;
13815                 *size = (entry->vme_end - start);
13816
13817                 vm_map_unlock_read(map);
13818                 return(KERN_SUCCESS);
13819         }
13820         default:
13821                 return(KERN_INVALID_ARGUMENT);
13822         }
13823 }
13824
13825 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
13826         MIN((entry_size),                                               \
13827             ((obj)->all_reusable ?                                      \
13828              (obj)->wired_page_count :                                  \
13829              (obj)->resident_page_count - (obj)->reusable_page_count))
13830
13831 void
13832 vm_map_region_top_walk(
13833         vm_map_entry_t             entry,
13834         vm_region_top_info_t       top)
13835 {
13836
13837         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
13838                 top->share_mode = SM_EMPTY;
13839                 top->ref_count = 0;
13840                 top->obj_id = 0;
13841                 return;
13842         }
13843
13844         {
13845                 struct  vm_object *obj, *tmp_obj;
13846                 int             ref_count;
13847                 uint32_t        entry_size;
13848
13849                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
13850
13851                 obj = VME_OBJECT(entry);
13852
13853                 vm_object_lock(obj);
13854
13855                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13856                         ref_count--;
13857
13858                 assert(obj->reusable_page_count <= obj->resident_page_count);
13859                 if (obj->shadow) {
13860                         if (ref_count == 1)
13861                                 top->private_pages_resident =
13862                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13863                         else
13864                                 top->shared_pages_resident =
13865                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13866                         top->ref_count  = ref_count;
13867                         top->share_mode = SM_COW;
13868
13869                         while ((tmp_obj = obj->shadow)) {
13870                                 vm_object_lock(tmp_obj);
13871                                 vm_object_unlock(obj);
13872                                 obj = tmp_obj;
13873
13874                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13875                                         ref_count--;
13876
13877                                 assert(obj->reusable_page_count <= obj->resident_page_count);
13878                                 top->shared_pages_resident +=
13879                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13880                                 top->ref_count += ref_count - 1;
13881                         }
13882                 } else {
13883                         if (entry->superpage_size) {
13884                                 top->share_mode = SM_LARGE_PAGE;
13885                                 top->shared_pages_resident = 0;
13886                                 top->private_pages_resident = entry_size;
13887                         } else if (entry->needs_copy) {
13888                                 top->share_mode = SM_COW;
13889                                 top->shared_pages_resident =
13890                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13891                         } else {
13892                                 if (ref_count == 1 ||
13893                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
13894                                         top->share_mode = SM_PRIVATE;
13895                                                 top->private_pages_resident =
13896                                                         OBJ_RESIDENT_COUNT(obj,
13897                                                                            entry_size);
13898                                 } else {
13899                                         top->share_mode = SM_SHARED;
13900                                         top->shared_pages_resident =
13901                                                 OBJ_RESIDENT_COUNT(obj,
13902                                                                   entry_size);
13903                                 }
13904                         }
13905                         top->ref_count = ref_count;
13906                 }
13907                 /* XXX K64: obj_id will be truncated */
13908                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
13909
13910                 vm_object_unlock(obj);
13911         }
13912 }
13913
13914 void
13915 vm_map_region_walk(
13916         vm_map_t                        map,
13917         vm_map_offset_t                 va,
13918         vm_map_entry_t                  entry,
13919         vm_object_offset_t              offset,
13920         vm_object_size_t                range,
13921         vm_region_extended_info_t       extended,
13922         boolean_t                       look_for_pages,
13923         mach_msg_type_number_t count)
13924 {
13925         struct vm_object *obj, *tmp_obj;
13926         vm_map_offset_t       last_offset;
13927         int               i;
13928         int               ref_count;
13929         struct vm_object        *shadow_object;
13930         int                     shadow_depth;
13931         boolean_t         do_region_footprint;
13932
13933         do_region_footprint = task_self_region_footprint();
13934
13935         if ((VME_OBJECT(entry) == 0) ||
13936             (entry->is_sub_map) ||
13937             (VME_OBJECT(entry)->phys_contiguous &&
13938              !entry->superpage_size)) {
13939                 extended->share_mode = SM_EMPTY;
13940                 extended->ref_count = 0;
13941                 return;
13942         }
13943
13944         if (entry->superpage_size) {
13945                 extended->shadow_depth = 0;
13946                 extended->share_mode = SM_LARGE_PAGE;
13947                 extended->ref_count = 1;
13948                 extended->external_pager = 0;
13949                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
13950                 extended->shadow_depth = 0;
13951                 return;
13952         }
13953
13954         obj = VME_OBJECT(entry);
13955
13956         vm_object_lock(obj);
13957
13958         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13959                 ref_count--;
13960
13961         if (look_for_pages) {
13962                 for (last_offset = offset + range;
13963                      offset < last_offset;
13964                      offset += PAGE_SIZE_64, va += PAGE_SIZE) {
13965
13966                         if (do_region_footprint) {
13967                                 int disp;
13968
13969                                 disp = 0;
13970                                 if (map->has_corpse_footprint) {
13971                                         /*
13972                                          * Query the page info data we saved
13973                                          * while forking the corpse.
13974                                          */
13975                                         vm_map_corpse_footprint_query_page_info(
13976                                                 map,
13977                                                 va,
13978                                                 &disp);
13979                                 } else {
13980                                         /*
13981                                          * Query the pmap.
13982                                          */
13983                                         pmap_query_page_info(map->pmap,
13984                                                              va,
13985                                                              &disp);
13986                                 }
13987                                 if (disp & PMAP_QUERY_PAGE_PRESENT) {
13988                                         if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
13989                                                 extended->pages_resident++;
13990                                         }
13991                                         if (disp & PMAP_QUERY_PAGE_REUSABLE) {
13992                                                 extended->pages_reusable++;
13993                                         } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
13994                                                    (disp & PMAP_QUERY_PAGE_ALTACCT)) {
13995                                                 /* alternate accounting */
13996                                         } else {
13997                                                 extended->pages_dirtied++;
13998                                         }
13999                                 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14000                                         if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14001                                                 /* alternate accounting */
14002                                         } else {
14003                                                 extended->pages_swapped_out++;
14004                                         }
14005                                 }
14006                                 /* deal with alternate accounting */
14007                                 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14008                                     /* && not tagged as no-footprint? */
14009                                     VM_OBJECT_OWNER(obj) != NULL &&
14010                                     VM_OBJECT_OWNER(obj)->map == map) {
14011                                         if ((((va
14012                                                - entry->vme_start
14013                                                + VME_OFFSET(entry))
14014                                               / PAGE_SIZE) <
14015                                              (obj->resident_page_count +
14016                                               vm_compressor_pager_get_count(obj->pager)))) {
14017                                                 /*
14018                                                  * Non-volatile purgeable object owned
14019                                                  * by this task: report the first
14020                                                  * "#resident + #compressed" pages as
14021                                                  * "resident" (to show that they
14022                                                  * contribute to the footprint) but not
14023                                                  * "dirty" (to avoid double-counting
14024                                                  * with the fake "non-volatile" region
14025                                                  * we'll report at the end of the
14026                                                  * address space to account for all
14027                                                  * (mapped or not) non-volatile memory
14028                                                  * owned by this task.
14029                                                  */
14030                                                 extended->pages_resident++;
14031                                         }
14032                                 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14033                                             obj->purgable == VM_PURGABLE_EMPTY) &&
14034                                            /* && not tagged as no-footprint? */
14035                                            VM_OBJECT_OWNER(obj) != NULL &&
14036                                            VM_OBJECT_OWNER(obj)->map == map) {
14037                                         if ((((va
14038                                                - entry->vme_start
14039                                                + VME_OFFSET(entry))
14040                                               / PAGE_SIZE) <
14041                                              obj->wired_page_count)) {
14042                                                 /*
14043                                                  * Volatile|empty purgeable object owned
14044                                                  * by this task: report the first
14045                                                  * "#wired" pages as "resident" (to
14046                                                  * show that they contribute to the
14047                                                  * footprint) but not "dirty" (to avoid
14048                                                  * double-counting with the fake
14049                                                  * "non-volatile" region we'll report
14050                                                  * at the end of the address space to
14051                                                  * account for all (mapped or not)
14052                                                  * non-volatile memory owned by this
14053                                                  * task.
14054                                                  */
14055                                                 extended->pages_resident++;
14056                                         }
14057                                 } else if (obj->purgable != VM_PURGABLE_DENY) {
14058                                         /*
14059                                          * Pages from purgeable objects
14060                                          * will be reported as dirty
14061                                          * appropriately in an extra
14062                                          * fake memory region at the end of
14063                                          * the address space.
14064                                          */
14065                                 } else if (entry->iokit_acct) {
14066                                         /*
14067                                          * IOKit mappings are considered
14068                                          * as fully dirty for footprint's
14069                                          * sake.
14070                                          */
14071                                         extended->pages_dirtied++;
14072                                 }
14073                                 continue;
14074                         }
14075
14076                         vm_map_region_look_for_page(map, va, obj,
14077                                                     offset, ref_count,
14078                                                     0, extended, count);
14079                 }
14080
14081                 if (do_region_footprint) {
14082                         goto collect_object_info;
14083                 }
14084
14085         } else {
14086         collect_object_info:
14087                 shadow_object = obj->shadow;
14088                 shadow_depth = 0;
14089
14090                 if ( !(obj->pager_trusted) && !(obj->internal))
14091                         extended->external_pager = 1;
14092
14093                 if (shadow_object != VM_OBJECT_NULL) {
14094                         vm_object_lock(shadow_object);
14095                         for (;
14096                              shadow_object != VM_OBJECT_NULL;
14097                              shadow_depth++) {
14098                                 vm_object_t     next_shadow;
14099
14100                                 if ( !(shadow_object->pager_trusted) &&
14101                                      !(shadow_object->internal))
14102                                         extended->external_pager = 1;
14103
14104                                 next_shadow = shadow_object->shadow;
14105                                 if (next_shadow) {
14106                                         vm_object_lock(next_shadow);
14107                                 }
14108                                 vm_object_unlock(shadow_object);
14109                                 shadow_object = next_shadow;
14110                         }
14111                 }
14112                 extended->shadow_depth = shadow_depth;
14113         }
14114
14115         if (extended->shadow_depth || entry->needs_copy)
14116                 extended->share_mode = SM_COW;
14117         else {
14118                 if (ref_count == 1)
14119                         extended->share_mode = SM_PRIVATE;
14120                 else {
14121                         if (obj->true_share)
14122                                 extended->share_mode = SM_TRUESHARED;
14123                         else
14124                                 extended->share_mode = SM_SHARED;
14125                 }
14126         }
14127         extended->ref_count = ref_count - extended->shadow_depth;
14128
14129         for (i = 0; i < extended->shadow_depth; i++) {
14130                 if ((tmp_obj = obj->shadow) == 0)
14131                         break;
14132                 vm_object_lock(tmp_obj);
14133                 vm_object_unlock(obj);
14134
14135                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
14136                         ref_count--;
14137
14138                 extended->ref_count += ref_count;
14139                 obj = tmp_obj;
14140         }
14141         vm_object_unlock(obj);
14142
14143         if (extended->share_mode == SM_SHARED) {
14144                 vm_map_entry_t       cur;
14145                 vm_map_entry_t       last;
14146                 int      my_refs;
14147
14148                 obj = VME_OBJECT(entry);
14149                 last = vm_map_to_entry(map);
14150                 my_refs = 0;
14151
14152                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
14153                         ref_count--;
14154                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
14155                         my_refs += vm_map_region_count_obj_refs(cur, obj);
14156
14157                 if (my_refs == ref_count)
14158                         extended->share_mode = SM_PRIVATE_ALIASED;
14159                 else if (my_refs > 1)
14160                         extended->share_mode = SM_SHARED_ALIASED;
14161         }
14162 }
14163
14164
14165 /* object is locked on entry and locked on return */
14166
14167
14168 static void
14169 vm_map_region_look_for_page(
14170         __unused vm_map_t               map,
14171         __unused vm_map_offset_t        va,
14172         vm_object_t                     object,
14173         vm_object_offset_t              offset,
14174         int                             max_refcnt,
14175         int                             depth,
14176         vm_region_extended_info_t       extended,
14177         mach_msg_type_number_t count)
14178 {
14179         vm_page_t       p;
14180         vm_object_t     shadow;
14181         int             ref_count;
14182         vm_object_t     caller_object;
14183
14184         shadow = object->shadow;
14185         caller_object = object;
14186
14187
14188         while (TRUE) {
14189
14190                 if ( !(object->pager_trusted) && !(object->internal))
14191                         extended->external_pager = 1;
14192
14193                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14194                         if (shadow && (max_refcnt == 1))
14195                                 extended->pages_shared_now_private++;
14196
14197                         if (!p->vmp_fictitious &&
14198                             (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
14199                                 extended->pages_dirtied++;
14200                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14201                                 if (p->vmp_reusable || object->all_reusable) {
14202                                         extended->pages_reusable++;
14203                                 }
14204                         }
14205
14206                         extended->pages_resident++;
14207
14208                         if(object != caller_object)
14209                                 vm_object_unlock(object);
14210
14211                         return;
14212                 }
14213                 if (object->internal &&
14214                     object->alive &&
14215                     !object->terminating &&
14216                     object->pager_ready) {
14217
14218                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14219                             == VM_EXTERNAL_STATE_EXISTS) {
14220                                 /* the pager has that page */
14221                                 extended->pages_swapped_out++;
14222                                 if (object != caller_object)
14223                                         vm_object_unlock(object);
14224                                 return;
14225                         }
14226                 }
14227
14228                 if (shadow) {
14229                         vm_object_lock(shadow);
14230
14231                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
14232                                 ref_count--;
14233
14234                         if (++depth > extended->shadow_depth)
14235                                 extended->shadow_depth = depth;
14236
14237                         if (ref_count > max_refcnt)
14238                                 max_refcnt = ref_count;
14239
14240                         if(object != caller_object)
14241                                 vm_object_unlock(object);
14242
14243                         offset = offset + object->vo_shadow_offset;
14244                         object = shadow;
14245                         shadow = object->shadow;
14246                         continue;
14247                 }
14248                 if(object != caller_object)
14249                         vm_object_unlock(object);
14250                 break;
14251         }
14252 }
14253
14254 static int
14255 vm_map_region_count_obj_refs(
14256         vm_map_entry_t    entry,
14257         vm_object_t       object)
14258 {
14259         int ref_count;
14260         vm_object_t chk_obj;
14261         vm_object_t tmp_obj;
14262
14263         if (VME_OBJECT(entry) == 0)
14264                 return(0);
14265
14266         if (entry->is_sub_map)
14267                 return(0);
14268         else {
14269                 ref_count = 0;
14270
14271                 chk_obj = VME_OBJECT(entry);
14272                 vm_object_lock(chk_obj);
14273
14274                 while (chk_obj) {
14275                         if (chk_obj == object)
14276                                 ref_count++;
14277                         tmp_obj = chk_obj->shadow;
14278                         if (tmp_obj)
14279                                 vm_object_lock(tmp_obj);
14280                         vm_object_unlock(chk_obj);
14281
14282                         chk_obj = tmp_obj;
14283                 }
14284         }
14285         return(ref_count);
14286 }
14287
14288
14289 /*
14290  *      Routine:        vm_map_simplify
14291  *
14292  *      Description:
14293  *              Attempt to simplify the map representation in
14294  *              the vicinity of the given starting address.
14295  *      Note:
14296  *              This routine is intended primarily to keep the
14297  *              kernel maps more compact -- they generally don't
14298  *              benefit from the "expand a map entry" technology
14299  *              at allocation time because the adjacent entry
14300  *              is often wired down.
14301  */
14302 void
14303 vm_map_simplify_entry(
14304         vm_map_t        map,
14305         vm_map_entry_t  this_entry)
14306 {
14307         vm_map_entry_t  prev_entry;
14308
14309         counter(c_vm_map_simplify_entry_called++);
14310
14311         prev_entry = this_entry->vme_prev;
14312
14313         if ((this_entry != vm_map_to_entry(map)) &&
14314             (prev_entry != vm_map_to_entry(map)) &&
14315
14316             (prev_entry->vme_end == this_entry->vme_start) &&
14317
14318             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14319             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14320             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14321                                     prev_entry->vme_start))
14322              == VME_OFFSET(this_entry)) &&
14323
14324             (prev_entry->behavior == this_entry->behavior) &&
14325             (prev_entry->needs_copy == this_entry->needs_copy) &&
14326             (prev_entry->protection == this_entry->protection) &&
14327             (prev_entry->max_protection == this_entry->max_protection) &&
14328             (prev_entry->inheritance == this_entry->inheritance) &&
14329             (prev_entry->use_pmap == this_entry->use_pmap) &&
14330             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14331             (prev_entry->no_cache == this_entry->no_cache) &&
14332             (prev_entry->permanent == this_entry->permanent) &&
14333             (prev_entry->map_aligned == this_entry->map_aligned) &&
14334             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14335             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14336             (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14337             /* from_reserved_zone: OK if that field doesn't match */
14338             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14339             (prev_entry->vme_resilient_codesign ==
14340              this_entry->vme_resilient_codesign) &&
14341             (prev_entry->vme_resilient_media ==
14342              this_entry->vme_resilient_media) &&
14343
14344             (prev_entry->wired_count == this_entry->wired_count) &&
14345             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14346
14347             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14348             (prev_entry->in_transition == FALSE) &&
14349             (this_entry->in_transition == FALSE) &&
14350             (prev_entry->needs_wakeup == FALSE) &&
14351             (this_entry->needs_wakeup == FALSE) &&
14352             (prev_entry->is_shared == FALSE) &&
14353             (this_entry->is_shared == FALSE) &&
14354             (prev_entry->superpage_size == FALSE) &&
14355             (this_entry->superpage_size == FALSE)
14356                 ) {
14357                 vm_map_store_entry_unlink(map, prev_entry);
14358                 assert(prev_entry->vme_start < this_entry->vme_end);
14359                 if (prev_entry->map_aligned)
14360                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14361                                                    VM_MAP_PAGE_MASK(map)));
14362                 this_entry->vme_start = prev_entry->vme_start;
14363                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14364
14365                 if (map->holelistenabled) {
14366                         vm_map_store_update_first_free(map, this_entry, TRUE);
14367                 }
14368
14369                 if (prev_entry->is_sub_map) {
14370                         vm_map_deallocate(VME_SUBMAP(prev_entry));
14371                 } else {
14372                         vm_object_deallocate(VME_OBJECT(prev_entry));
14373                 }
14374                 vm_map_entry_dispose(map, prev_entry);
14375                 SAVE_HINT_MAP_WRITE(map, this_entry);
14376                 counter(c_vm_map_simplified++);
14377         }
14378 }
14379
14380 void
14381 vm_map_simplify(
14382         vm_map_t        map,
14383         vm_map_offset_t start)
14384 {
14385         vm_map_entry_t  this_entry;
14386
14387         vm_map_lock(map);
14388         if (vm_map_lookup_entry(map, start, &this_entry)) {
14389                 vm_map_simplify_entry(map, this_entry);
14390                 vm_map_simplify_entry(map, this_entry->vme_next);
14391         }
14392         counter(c_vm_map_simplify_called++);
14393         vm_map_unlock(map);
14394 }
14395
14396 static void
14397 vm_map_simplify_range(
14398         vm_map_t        map,
14399         vm_map_offset_t start,
14400         vm_map_offset_t end)
14401 {
14402         vm_map_entry_t  entry;
14403
14404         /*
14405          * The map should be locked (for "write") by the caller.
14406          */
14407
14408         if (start >= end) {
14409                 /* invalid address range */
14410                 return;
14411         }
14412
14413         start = vm_map_trunc_page(start,
14414                                   VM_MAP_PAGE_MASK(map));
14415         end = vm_map_round_page(end,
14416                                 VM_MAP_PAGE_MASK(map));
14417
14418         if (!vm_map_lookup_entry(map, start, &entry)) {
14419                 /* "start" is not mapped and "entry" ends before "start" */
14420                 if (entry == vm_map_to_entry(map)) {
14421                         /* start with first entry in the map */
14422                         entry = vm_map_first_entry(map);
14423                 } else {
14424                         /* start with next entry */
14425                         entry = entry->vme_next;
14426                 }
14427         }
14428
14429         while (entry != vm_map_to_entry(map) &&
14430                entry->vme_start <= end) {
14431                 /* try and coalesce "entry" with its previous entry */
14432                 vm_map_simplify_entry(map, entry);
14433                 entry = entry->vme_next;
14434         }
14435 }
14436
14437
14438 /*
14439  *      Routine:        vm_map_machine_attribute
14440  *      Purpose:
14441  *              Provide machine-specific attributes to mappings,
14442  *              such as cachability etc. for machines that provide
14443  *              them.  NUMA architectures and machines with big/strange
14444  *              caches will use this.
14445  *      Note:
14446  *              Responsibilities for locking and checking are handled here,
14447  *              everything else in the pmap module. If any non-volatile
14448  *              information must be kept, the pmap module should handle
14449  *              it itself. [This assumes that attributes do not
14450  *              need to be inherited, which seems ok to me]
14451  */
14452 kern_return_t
14453 vm_map_machine_attribute(
14454         vm_map_t                        map,
14455         vm_map_offset_t         start,
14456         vm_map_offset_t         end,
14457         vm_machine_attribute_t  attribute,
14458         vm_machine_attribute_val_t* value)              /* IN/OUT */
14459 {
14460         kern_return_t   ret;
14461         vm_map_size_t sync_size;
14462         vm_map_entry_t entry;
14463
14464         if (start < vm_map_min(map) || end > vm_map_max(map))
14465                 return KERN_INVALID_ADDRESS;
14466
14467         /* Figure how much memory we need to flush (in page increments) */
14468         sync_size = end - start;
14469
14470         vm_map_lock(map);
14471
14472         if (attribute != MATTR_CACHE) {
14473                 /* If we don't have to find physical addresses, we */
14474                 /* don't have to do an explicit traversal here.    */
14475                 ret = pmap_attribute(map->pmap, start, end-start,
14476                                      attribute, value);
14477                 vm_map_unlock(map);
14478                 return ret;
14479         }
14480
14481         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
14482
14483         while(sync_size) {
14484                 if (vm_map_lookup_entry(map, start, &entry)) {
14485                         vm_map_size_t   sub_size;
14486                         if((entry->vme_end - start) > sync_size) {
14487                                 sub_size = sync_size;
14488                                 sync_size = 0;
14489                         } else {
14490                                 sub_size = entry->vme_end - start;
14491                                 sync_size -= sub_size;
14492                         }
14493                         if(entry->is_sub_map) {
14494                                 vm_map_offset_t sub_start;
14495                                 vm_map_offset_t sub_end;
14496
14497                                 sub_start = (start - entry->vme_start)
14498                                         + VME_OFFSET(entry);
14499                                 sub_end = sub_start + sub_size;
14500                                 vm_map_machine_attribute(
14501                                         VME_SUBMAP(entry),
14502                                         sub_start,
14503                                         sub_end,
14504                                         attribute, value);
14505                         } else {
14506                                 if (VME_OBJECT(entry)) {
14507                                         vm_page_t               m;
14508                                         vm_object_t             object;
14509                                         vm_object_t             base_object;
14510                                         vm_object_t             last_object;
14511                                         vm_object_offset_t      offset;
14512                                         vm_object_offset_t      base_offset;
14513                                         vm_map_size_t           range;
14514                                         range = sub_size;
14515                                         offset = (start - entry->vme_start)
14516                                                 + VME_OFFSET(entry);
14517                                         base_offset = offset;
14518                                         object = VME_OBJECT(entry);
14519                                         base_object = object;
14520                                         last_object = NULL;
14521
14522                                         vm_object_lock(object);
14523
14524                                         while (range) {
14525                                                 m = vm_page_lookup(
14526                                                         object, offset);
14527
14528                                                 if (m && !m->vmp_fictitious) {
14529                                                         ret =
14530                                                                 pmap_attribute_cache_sync(
14531                                                                         VM_PAGE_GET_PHYS_PAGE(m),
14532                                                                         PAGE_SIZE,
14533                                                                         attribute, value);
14534
14535                                                 } else if (object->shadow) {
14536                                                         offset = offset + object->vo_shadow_offset;
14537                                                         last_object = object;
14538                                                         object = object->shadow;
14539                                                         vm_object_lock(last_object->shadow);
14540                                                         vm_object_unlock(last_object);
14541                                                         continue;
14542                                                 }
14543                                                 range -= PAGE_SIZE;
14544
14545                                                 if (base_object != object) {
14546                                                         vm_object_unlock(object);
14547                                                         vm_object_lock(base_object);
14548                                                         object = base_object;
14549                                                 }
14550                                                 /* Bump to the next page */
14551                                                 base_offset += PAGE_SIZE;
14552                                                 offset = base_offset;
14553                                         }
14554                                         vm_object_unlock(object);
14555                                 }
14556                         }
14557                         start += sub_size;
14558                 } else {
14559                         vm_map_unlock(map);
14560                         return KERN_FAILURE;
14561                 }
14562
14563         }
14564
14565         vm_map_unlock(map);
14566
14567         return ret;
14568 }
14569
14570 /*
14571  *      vm_map_behavior_set:
14572  *
14573  *      Sets the paging reference behavior of the specified address
14574  *      range in the target map.  Paging reference behavior affects
14575  *      how pagein operations resulting from faults on the map will be
14576  *      clustered.
14577  */
14578 kern_return_t
14579 vm_map_behavior_set(
14580         vm_map_t        map,
14581         vm_map_offset_t start,
14582         vm_map_offset_t end,
14583         vm_behavior_t   new_behavior)
14584 {
14585         vm_map_entry_t  entry;
14586         vm_map_entry_t  temp_entry;
14587
14588         XPR(XPR_VM_MAP,
14589             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
14590             map, start, end, new_behavior, 0);
14591
14592         if (start > end ||
14593             start < vm_map_min(map) ||
14594             end > vm_map_max(map)) {
14595                 return KERN_NO_SPACE;
14596         }
14597
14598         switch (new_behavior) {
14599
14600         /*
14601          * This first block of behaviors all set a persistent state on the specified
14602          * memory range.  All we have to do here is to record the desired behavior
14603          * in the vm_map_entry_t's.
14604          */
14605
14606         case VM_BEHAVIOR_DEFAULT:
14607         case VM_BEHAVIOR_RANDOM:
14608         case VM_BEHAVIOR_SEQUENTIAL:
14609         case VM_BEHAVIOR_RSEQNTL:
14610         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14611                 vm_map_lock(map);
14612
14613                 /*
14614                  *      The entire address range must be valid for the map.
14615                  *      Note that vm_map_range_check() does a
14616                  *      vm_map_lookup_entry() internally and returns the
14617                  *      entry containing the start of the address range if
14618                  *      the entire range is valid.
14619                  */
14620                 if (vm_map_range_check(map, start, end, &temp_entry)) {
14621                         entry = temp_entry;
14622                         vm_map_clip_start(map, entry, start);
14623                 }
14624                 else {
14625                         vm_map_unlock(map);
14626                         return(KERN_INVALID_ADDRESS);
14627                 }
14628
14629                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14630                         vm_map_clip_end(map, entry, end);
14631                         if (entry->is_sub_map) {
14632                                 assert(!entry->use_pmap);
14633                         }
14634
14635                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
14636                                 entry->zero_wired_pages = TRUE;
14637                         } else {
14638                                 entry->behavior = new_behavior;
14639                         }
14640                         entry = entry->vme_next;
14641                 }
14642
14643                 vm_map_unlock(map);
14644                 break;
14645
14646         /*
14647          * The rest of these are different from the above in that they cause
14648          * an immediate action to take place as opposed to setting a behavior that
14649          * affects future actions.
14650          */
14651
14652         case VM_BEHAVIOR_WILLNEED:
14653                 return vm_map_willneed(map, start, end);
14654
14655         case VM_BEHAVIOR_DONTNEED:
14656                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14657
14658         case VM_BEHAVIOR_FREE:
14659                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14660
14661         case VM_BEHAVIOR_REUSABLE:
14662                 return vm_map_reusable_pages(map, start, end);
14663
14664         case VM_BEHAVIOR_REUSE:
14665                 return vm_map_reuse_pages(map, start, end);
14666
14667         case VM_BEHAVIOR_CAN_REUSE:
14668                 return vm_map_can_reuse(map, start, end);
14669
14670 #if MACH_ASSERT
14671         case VM_BEHAVIOR_PAGEOUT:
14672                 return vm_map_pageout(map, start, end);
14673 #endif /* MACH_ASSERT */
14674
14675         default:
14676                 return(KERN_INVALID_ARGUMENT);
14677         }
14678
14679         return(KERN_SUCCESS);
14680 }
14681
14682
14683 /*
14684  * Internals for madvise(MADV_WILLNEED) system call.
14685  *
14686  * The present implementation is to do a read-ahead if the mapping corresponds
14687  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
14688  * and basically ignore the "advice" (which we are always free to do).
14689  */
14690
14691
14692 static kern_return_t
14693 vm_map_willneed(
14694         vm_map_t        map,
14695         vm_map_offset_t start,
14696         vm_map_offset_t end
14697 )
14698 {
14699         vm_map_entry_t                  entry;
14700         vm_object_t                     object;
14701         memory_object_t                 pager;
14702         struct vm_object_fault_info     fault_info = {};
14703         kern_return_t                   kr;
14704         vm_object_size_t                len;
14705         vm_object_offset_t              offset;
14706
14707         fault_info.interruptible = THREAD_UNINT;        /* ignored value */
14708         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
14709         fault_info.stealth       = TRUE;
14710
14711         /*
14712          * The MADV_WILLNEED operation doesn't require any changes to the
14713          * vm_map_entry_t's, so the read lock is sufficient.
14714          */
14715
14716         vm_map_lock_read(map);
14717
14718         /*
14719          * The madvise semantics require that the address range be fully
14720          * allocated with no holes.  Otherwise, we're required to return
14721          * an error.
14722          */
14723
14724         if (! vm_map_range_check(map, start, end, &entry)) {
14725                 vm_map_unlock_read(map);
14726                 return KERN_INVALID_ADDRESS;
14727         }
14728
14729         /*
14730          * Examine each vm_map_entry_t in the range.
14731          */
14732         for (; entry != vm_map_to_entry(map) && start < end; ) {
14733
14734                 /*
14735                  * The first time through, the start address could be anywhere
14736                  * within the vm_map_entry we found.  So adjust the offset to
14737                  * correspond.  After that, the offset will always be zero to
14738                  * correspond to the beginning of the current vm_map_entry.
14739                  */
14740                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14741
14742                 /*
14743                  * Set the length so we don't go beyond the end of the
14744                  * map_entry or beyond the end of the range we were given.
14745                  * This range could span also multiple map entries all of which
14746                  * map different files, so make sure we only do the right amount
14747                  * of I/O for each object.  Note that it's possible for there
14748                  * to be multiple map entries all referring to the same object
14749                  * but with different page permissions, but it's not worth
14750                  * trying to optimize that case.
14751                  */
14752                 len = MIN(entry->vme_end - start, end - start);
14753
14754                 if ((vm_size_t) len != len) {
14755                         /* 32-bit overflow */
14756                         len = (vm_size_t) (0 - PAGE_SIZE);
14757                 }
14758                 fault_info.cluster_size = (vm_size_t) len;
14759                 fault_info.lo_offset    = offset;
14760                 fault_info.hi_offset    = offset + len;
14761                 fault_info.user_tag     = VME_ALIAS(entry);
14762                 fault_info.pmap_options = 0;
14763                 if (entry->iokit_acct ||
14764                     (!entry->is_sub_map && !entry->use_pmap)) {
14765                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14766                 }
14767
14768                 /*
14769                  * If there's no read permission to this mapping, then just
14770                  * skip it.
14771                  */
14772                 if ((entry->protection & VM_PROT_READ) == 0) {
14773                         entry = entry->vme_next;
14774                         start = entry->vme_start;
14775                         continue;
14776                 }
14777
14778                 /*
14779                  * Find the file object backing this map entry.  If there is
14780                  * none, then we simply ignore the "will need" advice for this
14781                  * entry and go on to the next one.
14782                  */
14783                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14784                         entry = entry->vme_next;
14785                         start = entry->vme_start;
14786                         continue;
14787                 }
14788
14789                 /*
14790                  * The data_request() could take a long time, so let's
14791                  * release the map lock to avoid blocking other threads.
14792                  */
14793                 vm_map_unlock_read(map);
14794
14795                 vm_object_paging_begin(object);
14796                 pager = object->pager;
14797                 vm_object_unlock(object);
14798
14799                 /*
14800                  * Get the data from the object asynchronously.
14801                  *
14802                  * Note that memory_object_data_request() places limits on the
14803                  * amount of I/O it will do.  Regardless of the len we
14804                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
14805                  * silently truncates the len to that size.  This isn't
14806                  * necessarily bad since madvise shouldn't really be used to
14807                  * page in unlimited amounts of data.  Other Unix variants
14808                  * limit the willneed case as well.  If this turns out to be an
14809                  * issue for developers, then we can always adjust the policy
14810                  * here and still be backwards compatible since this is all
14811                  * just "advice".
14812                  */
14813                 kr = memory_object_data_request(
14814                         pager,
14815                         offset + object->paging_offset,
14816                         0,      /* ignored */
14817                         VM_PROT_READ,
14818                         (memory_object_fault_info_t)&fault_info);
14819
14820                 vm_object_lock(object);
14821                 vm_object_paging_end(object);
14822                 vm_object_unlock(object);
14823
14824                 /*
14825                  * If we couldn't do the I/O for some reason, just give up on
14826                  * the madvise.  We still return success to the user since
14827                  * madvise isn't supposed to fail when the advice can't be
14828                  * taken.
14829                  */
14830                 if (kr != KERN_SUCCESS) {
14831                         return KERN_SUCCESS;
14832                 }
14833
14834                 start += len;
14835                 if (start >= end) {
14836                         /* done */
14837                         return KERN_SUCCESS;
14838                 }
14839
14840                 /* look up next entry */
14841                 vm_map_lock_read(map);
14842                 if (! vm_map_lookup_entry(map, start, &entry)) {
14843                         /*
14844                          * There's a new hole in the address range.
14845                          */
14846                         vm_map_unlock_read(map);
14847                         return KERN_INVALID_ADDRESS;
14848                 }
14849         }
14850
14851         vm_map_unlock_read(map);
14852         return KERN_SUCCESS;
14853 }
14854
14855 static boolean_t
14856 vm_map_entry_is_reusable(
14857         vm_map_entry_t entry)
14858 {
14859         /* Only user map entries */
14860
14861         vm_object_t object;
14862
14863         if (entry->is_sub_map) {
14864                 return FALSE;
14865         }
14866
14867         switch (VME_ALIAS(entry)) {
14868         case VM_MEMORY_MALLOC:
14869         case VM_MEMORY_MALLOC_SMALL:
14870         case VM_MEMORY_MALLOC_LARGE:
14871         case VM_MEMORY_REALLOC:
14872         case VM_MEMORY_MALLOC_TINY:
14873         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
14874         case VM_MEMORY_MALLOC_LARGE_REUSED:
14875                 /*
14876                  * This is a malloc() memory region: check if it's still
14877                  * in its original state and can be re-used for more
14878                  * malloc() allocations.
14879                  */
14880                 break;
14881         default:
14882                 /*
14883                  * Not a malloc() memory region: let the caller decide if
14884                  * it's re-usable.
14885                  */
14886                 return TRUE;
14887         }
14888
14889         if (/*entry->is_shared ||*/
14890             entry->is_sub_map ||
14891             entry->in_transition ||
14892             entry->protection != VM_PROT_DEFAULT ||
14893             entry->max_protection != VM_PROT_ALL ||
14894             entry->inheritance != VM_INHERIT_DEFAULT ||
14895             entry->no_cache ||
14896             entry->permanent ||
14897             entry->superpage_size != FALSE ||
14898             entry->zero_wired_pages ||
14899             entry->wired_count != 0 ||
14900             entry->user_wired_count != 0) {
14901                 return FALSE;
14902         }
14903
14904         object = VME_OBJECT(entry);
14905         if (object == VM_OBJECT_NULL) {
14906                 return TRUE;
14907         }
14908         if (
14909 #if 0
14910                 /*
14911                  * Let's proceed even if the VM object is potentially
14912                  * shared.
14913                  * We check for this later when processing the actual
14914                  * VM pages, so the contents will be safe if shared.
14915                  *
14916                  * But we can still mark this memory region as "reusable" to
14917                  * acknowledge that the caller did let us know that the memory
14918                  * could be re-used and should not be penalized for holding
14919                  * on to it.  This allows its "resident size" to not include
14920                  * the reusable range.
14921                  */
14922             object->ref_count == 1 &&
14923 #endif
14924             object->wired_page_count == 0 &&
14925             object->copy == VM_OBJECT_NULL &&
14926             object->shadow == VM_OBJECT_NULL &&
14927             object->internal &&
14928             object->purgable == VM_PURGABLE_DENY &&
14929             object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
14930             !object->true_share &&
14931             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
14932             !object->code_signed) {
14933                 return TRUE;
14934         }
14935         return FALSE;
14936
14937
14938 }
14939
14940 static kern_return_t
14941 vm_map_reuse_pages(
14942         vm_map_t        map,
14943         vm_map_offset_t start,
14944         vm_map_offset_t end)
14945 {
14946         vm_map_entry_t                  entry;
14947         vm_object_t                     object;
14948         vm_object_offset_t              start_offset, end_offset;
14949
14950         /*
14951          * The MADV_REUSE operation doesn't require any changes to the
14952          * vm_map_entry_t's, so the read lock is sufficient.
14953          */
14954
14955         vm_map_lock_read(map);
14956         assert(map->pmap != kernel_pmap);       /* protect alias access */
14957
14958         /*
14959          * The madvise semantics require that the address range be fully
14960          * allocated with no holes.  Otherwise, we're required to return
14961          * an error.
14962          */
14963
14964         if (!vm_map_range_check(map, start, end, &entry)) {
14965                 vm_map_unlock_read(map);
14966                 vm_page_stats_reusable.reuse_pages_failure++;
14967                 return KERN_INVALID_ADDRESS;
14968         }
14969
14970         /*
14971          * Examine each vm_map_entry_t in the range.
14972          */
14973         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14974              entry = entry->vme_next) {
14975                 /*
14976                  * Sanity check on the VM map entry.
14977                  */
14978                 if (! vm_map_entry_is_reusable(entry)) {
14979                         vm_map_unlock_read(map);
14980                         vm_page_stats_reusable.reuse_pages_failure++;
14981                         return KERN_INVALID_ADDRESS;
14982                 }
14983
14984                 /*
14985                  * The first time through, the start address could be anywhere
14986                  * within the vm_map_entry we found.  So adjust the offset to
14987                  * correspond.
14988                  */
14989                 if (entry->vme_start < start) {
14990                         start_offset = start - entry->vme_start;
14991                 } else {
14992                         start_offset = 0;
14993                 }
14994                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14995                 start_offset += VME_OFFSET(entry);
14996                 end_offset += VME_OFFSET(entry);
14997
14998                 assert(!entry->is_sub_map);
14999                 object = VME_OBJECT(entry);
15000                 if (object != VM_OBJECT_NULL) {
15001                         vm_object_lock(object);
15002                         vm_object_reuse_pages(object, start_offset, end_offset,
15003                                               TRUE);
15004                         vm_object_unlock(object);
15005                 }
15006
15007                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15008                         /*
15009                          * XXX
15010                          * We do not hold the VM map exclusively here.
15011                          * The "alias" field is not that critical, so it's
15012                          * safe to update it here, as long as it is the only
15013                          * one that can be modified while holding the VM map
15014                          * "shared".
15015                          */
15016                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15017                 }
15018         }
15019
15020         vm_map_unlock_read(map);
15021         vm_page_stats_reusable.reuse_pages_success++;
15022         return KERN_SUCCESS;
15023 }
15024
15025
15026 static kern_return_t
15027 vm_map_reusable_pages(
15028         vm_map_t        map,
15029         vm_map_offset_t start,
15030         vm_map_offset_t end)
15031 {
15032         vm_map_entry_t                  entry;
15033         vm_object_t                     object;
15034         vm_object_offset_t              start_offset, end_offset;
15035         vm_map_offset_t                 pmap_offset;
15036
15037         /*
15038          * The MADV_REUSABLE operation doesn't require any changes to the
15039          * vm_map_entry_t's, so the read lock is sufficient.
15040          */
15041
15042         vm_map_lock_read(map);
15043         assert(map->pmap != kernel_pmap);       /* protect alias access */
15044
15045         /*
15046          * The madvise semantics require that the address range be fully
15047          * allocated with no holes.  Otherwise, we're required to return
15048          * an error.
15049          */
15050
15051         if (!vm_map_range_check(map, start, end, &entry)) {
15052                 vm_map_unlock_read(map);
15053                 vm_page_stats_reusable.reusable_pages_failure++;
15054                 return KERN_INVALID_ADDRESS;
15055         }
15056
15057         /*
15058          * Examine each vm_map_entry_t in the range.
15059          */
15060         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15061              entry = entry->vme_next) {
15062                 int kill_pages = 0;
15063
15064                 /*
15065                  * Sanity check on the VM map entry.
15066                  */
15067                 if (! vm_map_entry_is_reusable(entry)) {
15068                         vm_map_unlock_read(map);
15069                         vm_page_stats_reusable.reusable_pages_failure++;
15070                         return KERN_INVALID_ADDRESS;
15071                 }
15072
15073                 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15074                         /* not writable: can't discard contents */
15075                         vm_map_unlock_read(map);
15076                         vm_page_stats_reusable.reusable_nonwritable++;
15077                         vm_page_stats_reusable.reusable_pages_failure++;
15078                         return KERN_PROTECTION_FAILURE;
15079                 }
15080
15081                 /*
15082                  * The first time through, the start address could be anywhere
15083                  * within the vm_map_entry we found.  So adjust the offset to
15084                  * correspond.
15085                  */
15086                 if (entry->vme_start < start) {
15087                         start_offset = start - entry->vme_start;
15088                         pmap_offset = start;
15089                 } else {
15090                         start_offset = 0;
15091                         pmap_offset = entry->vme_start;
15092                 }
15093                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15094                 start_offset += VME_OFFSET(entry);
15095                 end_offset += VME_OFFSET(entry);
15096
15097                 assert(!entry->is_sub_map);
15098                 object = VME_OBJECT(entry);
15099                 if (object == VM_OBJECT_NULL)
15100                         continue;
15101
15102
15103                 vm_object_lock(object);
15104                 if (((object->ref_count == 1) ||
15105                      (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15106                       object->copy == VM_OBJECT_NULL)) &&
15107                     object->shadow == VM_OBJECT_NULL &&
15108                     /*
15109                      * "iokit_acct" entries are billed for their virtual size
15110                      * (rather than for their resident pages only), so they
15111                      * wouldn't benefit from making pages reusable, and it
15112                      * would be hard to keep track of pages that are both
15113                      * "iokit_acct" and "reusable" in the pmap stats and
15114                      * ledgers.
15115                      */
15116                     !(entry->iokit_acct ||
15117                       (!entry->is_sub_map && !entry->use_pmap))) {
15118                         if (object->ref_count != 1) {
15119                                 vm_page_stats_reusable.reusable_shared++;
15120                         }
15121                         kill_pages = 1;
15122                 } else {
15123                         kill_pages = -1;
15124                 }
15125                 if (kill_pages != -1) {
15126                         vm_object_deactivate_pages(object,
15127                                                    start_offset,
15128                                                    end_offset - start_offset,
15129                                                    kill_pages,
15130                                                    TRUE /*reusable_pages*/,
15131                                                    map->pmap,
15132                                                    pmap_offset);
15133                 } else {
15134                         vm_page_stats_reusable.reusable_pages_shared++;
15135                 }
15136                 vm_object_unlock(object);
15137
15138                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15139                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15140                         /*
15141                          * XXX
15142                          * We do not hold the VM map exclusively here.
15143                          * The "alias" field is not that critical, so it's
15144                          * safe to update it here, as long as it is the only
15145                          * one that can be modified while holding the VM map
15146                          * "shared".
15147                          */
15148                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15149                 }
15150         }
15151
15152         vm_map_unlock_read(map);
15153         vm_page_stats_reusable.reusable_pages_success++;
15154         return KERN_SUCCESS;
15155 }
15156
15157
15158 static kern_return_t
15159 vm_map_can_reuse(
15160         vm_map_t        map,
15161         vm_map_offset_t start,
15162         vm_map_offset_t end)
15163 {
15164         vm_map_entry_t                  entry;
15165
15166         /*
15167          * The MADV_REUSABLE operation doesn't require any changes to the
15168          * vm_map_entry_t's, so the read lock is sufficient.
15169          */
15170
15171         vm_map_lock_read(map);
15172         assert(map->pmap != kernel_pmap);       /* protect alias access */
15173
15174         /*
15175          * The madvise semantics require that the address range be fully
15176          * allocated with no holes.  Otherwise, we're required to return
15177          * an error.
15178          */
15179
15180         if (!vm_map_range_check(map, start, end, &entry)) {
15181                 vm_map_unlock_read(map);
15182                 vm_page_stats_reusable.can_reuse_failure++;
15183                 return KERN_INVALID_ADDRESS;
15184         }
15185
15186         /*
15187          * Examine each vm_map_entry_t in the range.
15188          */
15189         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15190              entry = entry->vme_next) {
15191                 /*
15192                  * Sanity check on the VM map entry.
15193                  */
15194                 if (! vm_map_entry_is_reusable(entry)) {
15195                         vm_map_unlock_read(map);
15196                         vm_page_stats_reusable.can_reuse_failure++;
15197                         return KERN_INVALID_ADDRESS;
15198                 }
15199         }
15200
15201         vm_map_unlock_read(map);
15202         vm_page_stats_reusable.can_reuse_success++;
15203         return KERN_SUCCESS;
15204 }
15205
15206
15207 #if MACH_ASSERT
15208 static kern_return_t
15209 vm_map_pageout(
15210         vm_map_t        map,
15211         vm_map_offset_t start,
15212         vm_map_offset_t end)
15213 {
15214         vm_map_entry_t                  entry;
15215
15216         /*
15217          * The MADV_PAGEOUT operation doesn't require any changes to the
15218          * vm_map_entry_t's, so the read lock is sufficient.
15219          */
15220
15221         vm_map_lock_read(map);
15222
15223         /*
15224          * The madvise semantics require that the address range be fully
15225          * allocated with no holes.  Otherwise, we're required to return
15226          * an error.
15227          */
15228
15229         if (!vm_map_range_check(map, start, end, &entry)) {
15230                 vm_map_unlock_read(map);
15231                 return KERN_INVALID_ADDRESS;
15232         }
15233
15234         /*
15235          * Examine each vm_map_entry_t in the range.
15236          */
15237         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15238              entry = entry->vme_next) {
15239                 vm_object_t     object;
15240
15241                 /*
15242                  * Sanity check on the VM map entry.
15243                  */
15244                 if (entry->is_sub_map) {
15245                         vm_map_t submap;
15246                         vm_map_offset_t submap_start;
15247                         vm_map_offset_t submap_end;
15248                         vm_map_entry_t submap_entry;
15249
15250                         submap = VME_SUBMAP(entry);
15251                         submap_start = VME_OFFSET(entry);
15252                         submap_end = submap_start + (entry->vme_end -
15253                                                      entry->vme_start);
15254
15255                         vm_map_lock_read(submap);
15256
15257                         if (! vm_map_range_check(submap,
15258                                                  submap_start,
15259                                                  submap_end,
15260                                                  &submap_entry)) {
15261                                 vm_map_unlock_read(submap);
15262                                 vm_map_unlock_read(map);
15263                                 return KERN_INVALID_ADDRESS;
15264                         }
15265
15266                         object = VME_OBJECT(submap_entry);
15267                         if (submap_entry->is_sub_map ||
15268                             object == VM_OBJECT_NULL ||
15269                             !object->internal) {
15270                                 vm_map_unlock_read(submap);
15271                                 continue;
15272                         }
15273
15274                         vm_object_pageout(object);
15275
15276                         vm_map_unlock_read(submap);
15277                         submap = VM_MAP_NULL;
15278                         submap_entry = VM_MAP_ENTRY_NULL;
15279                         continue;
15280                 }
15281
15282                 object = VME_OBJECT(entry);
15283                 if (entry->is_sub_map ||
15284                     object == VM_OBJECT_NULL ||
15285                     !object->internal) {
15286                         continue;
15287                 }
15288
15289                 vm_object_pageout(object);
15290         }
15291
15292         vm_map_unlock_read(map);
15293         return KERN_SUCCESS;
15294 }
15295 #endif /* MACH_ASSERT */
15296
15297
15298 /*
15299  *      Routine:        vm_map_entry_insert
15300  *
15301  *      Description:    This routine inserts a new vm_entry in a locked map.
15302  */
15303 vm_map_entry_t
15304 vm_map_entry_insert(
15305         vm_map_t                map,
15306         vm_map_entry_t          insp_entry,
15307         vm_map_offset_t         start,
15308         vm_map_offset_t         end,
15309         vm_object_t             object,
15310         vm_object_offset_t      offset,
15311         boolean_t               needs_copy,
15312         boolean_t               is_shared,
15313         boolean_t               in_transition,
15314         vm_prot_t               cur_protection,
15315         vm_prot_t               max_protection,
15316         vm_behavior_t           behavior,
15317         vm_inherit_t            inheritance,
15318         unsigned                wired_count,
15319         boolean_t               no_cache,
15320         boolean_t               permanent,
15321         unsigned int            superpage_size,
15322         boolean_t               clear_map_aligned,
15323         boolean_t               is_submap,
15324         boolean_t               used_for_jit,
15325         int                     alias)
15326 {
15327         vm_map_entry_t  new_entry;
15328
15329         assert(insp_entry != (vm_map_entry_t)0);
15330         vm_map_lock_assert_exclusive(map);
15331
15332 #if DEVELOPMENT || DEBUG
15333         vm_object_offset_t      end_offset = 0;
15334         assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15335 #endif /* DEVELOPMENT || DEBUG */
15336
15337         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15338
15339         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15340                 new_entry->map_aligned = TRUE;
15341         } else {
15342                 new_entry->map_aligned = FALSE;
15343         }
15344         if (clear_map_aligned &&
15345             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15346              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15347                 new_entry->map_aligned = FALSE;
15348         }
15349
15350         new_entry->vme_start = start;
15351         new_entry->vme_end = end;
15352         assert(page_aligned(new_entry->vme_start));
15353         assert(page_aligned(new_entry->vme_end));
15354         if (new_entry->map_aligned) {
15355                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15356                                            VM_MAP_PAGE_MASK(map)));
15357                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15358                                            VM_MAP_PAGE_MASK(map)));
15359         }
15360         assert(new_entry->vme_start < new_entry->vme_end);
15361
15362         VME_OBJECT_SET(new_entry, object);
15363         VME_OFFSET_SET(new_entry, offset);
15364         new_entry->is_shared = is_shared;
15365         new_entry->is_sub_map = is_submap;
15366         new_entry->needs_copy = needs_copy;
15367         new_entry->in_transition = in_transition;
15368         new_entry->needs_wakeup = FALSE;
15369         new_entry->inheritance = inheritance;
15370         new_entry->protection = cur_protection;
15371         new_entry->max_protection = max_protection;
15372         new_entry->behavior = behavior;
15373         new_entry->wired_count = wired_count;
15374         new_entry->user_wired_count = 0;
15375         if (is_submap) {
15376                 /*
15377                  * submap: "use_pmap" means "nested".
15378                  * default: false.
15379                  */
15380                 new_entry->use_pmap = FALSE;
15381         } else {
15382                 /*
15383                  * object: "use_pmap" means "use pmap accounting" for footprint.
15384                  * default: true.
15385                  */
15386                 new_entry->use_pmap = TRUE;
15387         }
15388         VME_ALIAS_SET(new_entry, alias);
15389         new_entry->zero_wired_pages = FALSE;
15390         new_entry->no_cache = no_cache;
15391         new_entry->permanent = permanent;
15392         if (superpage_size)
15393                 new_entry->superpage_size = TRUE;
15394         else
15395                 new_entry->superpage_size = FALSE;
15396         if (used_for_jit){
15397 #if CONFIG_EMBEDDED
15398                 if (!(map->jit_entry_exists))
15399 #endif /* CONFIG_EMBEDDED */
15400                 {
15401                         new_entry->used_for_jit = TRUE;
15402                         map->jit_entry_exists = TRUE;
15403
15404                         /* Tell the pmap that it supports JIT. */
15405                         pmap_set_jit_entitled(map->pmap);
15406                 }
15407         } else {
15408                 new_entry->used_for_jit = FALSE;
15409         }
15410         new_entry->pmap_cs_associated = FALSE;
15411         new_entry->iokit_acct = FALSE;
15412         new_entry->vme_resilient_codesign = FALSE;
15413         new_entry->vme_resilient_media = FALSE;
15414         new_entry->vme_atomic = FALSE;
15415
15416         /*
15417          *      Insert the new entry into the list.
15418          */
15419
15420         vm_map_store_entry_link(map, insp_entry, new_entry,
15421                                 VM_MAP_KERNEL_FLAGS_NONE);
15422         map->size += end - start;
15423
15424         /*
15425          *      Update the free space hint and the lookup hint.
15426          */
15427
15428         SAVE_HINT_MAP_WRITE(map, new_entry);
15429         return new_entry;
15430 }
15431
15432 /*
15433  *      Routine:        vm_map_remap_extract
15434  *
15435  *      Descritpion:    This routine returns a vm_entry list from a map.
15436  */
15437 static kern_return_t
15438 vm_map_remap_extract(
15439         vm_map_t                map,
15440         vm_map_offset_t         addr,
15441         vm_map_size_t           size,
15442         boolean_t               copy,
15443         struct vm_map_header    *map_header,
15444         vm_prot_t               *cur_protection,
15445         vm_prot_t               *max_protection,
15446         /* What, no behavior? */
15447         vm_inherit_t            inheritance,
15448         boolean_t               pageable,
15449         boolean_t               same_map,
15450         vm_map_kernel_flags_t   vmk_flags)
15451 {
15452         kern_return_t           result;
15453         vm_map_size_t           mapped_size;
15454         vm_map_size_t           tmp_size;
15455         vm_map_entry_t          src_entry;     /* result of last map lookup */
15456         vm_map_entry_t          new_entry;
15457         vm_object_offset_t      offset;
15458         vm_map_offset_t         map_address;
15459         vm_map_offset_t         src_start;     /* start of entry to map */
15460         vm_map_offset_t         src_end;       /* end of region to be mapped */
15461         vm_object_t             object;
15462         vm_map_version_t        version;
15463         boolean_t               src_needs_copy;
15464         boolean_t               new_entry_needs_copy;
15465         vm_map_entry_t          saved_src_entry;
15466         boolean_t               src_entry_was_wired;
15467         vm_prot_t               max_prot_for_prot_copy;
15468
15469         assert(map != VM_MAP_NULL);
15470         assert(size != 0);
15471         assert(size == vm_map_round_page(size, PAGE_MASK));
15472         assert(inheritance == VM_INHERIT_NONE ||
15473                inheritance == VM_INHERIT_COPY ||
15474                inheritance == VM_INHERIT_SHARE);
15475
15476         /*
15477          *      Compute start and end of region.
15478          */
15479         src_start = vm_map_trunc_page(addr, PAGE_MASK);
15480         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15481
15482
15483         /*
15484          *      Initialize map_header.
15485          */
15486         map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15487         map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15488         map_header->nentries = 0;
15489         map_header->entries_pageable = pageable;
15490         map_header->page_shift = PAGE_SHIFT;
15491
15492         vm_map_store_init( map_header );
15493
15494         if (copy && vmk_flags.vmkf_remap_prot_copy) {
15495                 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15496         } else {
15497                 max_prot_for_prot_copy = VM_PROT_NONE;
15498         }
15499         *cur_protection = VM_PROT_ALL;
15500         *max_protection = VM_PROT_ALL;
15501
15502         map_address = 0;
15503         mapped_size = 0;
15504         result = KERN_SUCCESS;
15505
15506         /*
15507          *      The specified source virtual space might correspond to
15508          *      multiple map entries, need to loop on them.
15509          */
15510         vm_map_lock(map);
15511         while (mapped_size != size) {
15512                 vm_map_size_t   entry_size;
15513
15514                 /*
15515                  *      Find the beginning of the region.
15516                  */
15517                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
15518                         result = KERN_INVALID_ADDRESS;
15519                         break;
15520                 }
15521
15522                 if (src_start < src_entry->vme_start ||
15523                     (mapped_size && src_start != src_entry->vme_start)) {
15524                         result = KERN_INVALID_ADDRESS;
15525                         break;
15526                 }
15527
15528                 tmp_size = size - mapped_size;
15529                 if (src_end > src_entry->vme_end)
15530                         tmp_size -= (src_end - src_entry->vme_end);
15531
15532                 entry_size = (vm_map_size_t)(src_entry->vme_end -
15533                                              src_entry->vme_start);
15534
15535                 if(src_entry->is_sub_map) {
15536                         vm_map_reference(VME_SUBMAP(src_entry));
15537                         object = VM_OBJECT_NULL;
15538                 } else {
15539                         object = VME_OBJECT(src_entry);
15540                         if (src_entry->iokit_acct) {
15541                                 /*
15542                                  * This entry uses "IOKit accounting".
15543                                  */
15544                         } else if (object != VM_OBJECT_NULL &&
15545                                    object->purgable != VM_PURGABLE_DENY) {
15546                                 /*
15547                                  * Purgeable objects have their own accounting:
15548                                  * no pmap accounting for them.
15549                                  */
15550                                 assertf(!src_entry->use_pmap,
15551                                         "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15552                                         map,
15553                                         src_entry,
15554                                         (uint64_t)src_entry->vme_start,
15555                                         (uint64_t)src_entry->vme_end,
15556                                         src_entry->protection,
15557                                         src_entry->max_protection,
15558                                         VME_ALIAS(src_entry));
15559                         } else {
15560                                 /*
15561                                  * Not IOKit or purgeable:
15562                                  * must be accounted by pmap stats.
15563                                  */
15564                                 assertf(src_entry->use_pmap,
15565                                         "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15566                                         map,
15567                                         src_entry,
15568                                         (uint64_t)src_entry->vme_start,
15569                                         (uint64_t)src_entry->vme_end,
15570                                         src_entry->protection,
15571                                         src_entry->max_protection,
15572                                         VME_ALIAS(src_entry));
15573                         }
15574
15575                         if (object == VM_OBJECT_NULL) {
15576                                 object = vm_object_allocate(entry_size);
15577                                 VME_OFFSET_SET(src_entry, 0);
15578                                 VME_OBJECT_SET(src_entry, object);
15579                                 assert(src_entry->use_pmap);
15580                         } else if (object->copy_strategy !=
15581                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
15582                                 /*
15583                                  *      We are already using an asymmetric
15584                                  *      copy, and therefore we already have
15585                                  *      the right object.
15586                                  */
15587                                 assert(!src_entry->needs_copy);
15588                         } else if (src_entry->needs_copy || object->shadowed ||
15589                                    (object->internal && !object->true_share &&
15590                                     !src_entry->is_shared &&
15591                                     object->vo_size > entry_size)) {
15592
15593                                 VME_OBJECT_SHADOW(src_entry, entry_size);
15594                                 assert(src_entry->use_pmap);
15595
15596                                 if (!src_entry->needs_copy &&
15597                                     (src_entry->protection & VM_PROT_WRITE)) {
15598                                         vm_prot_t prot;
15599
15600                                         assert(!pmap_has_prot_policy(src_entry->protection));
15601
15602                                         prot = src_entry->protection & ~VM_PROT_WRITE;
15603
15604                                         if (override_nx(map,
15605                                                         VME_ALIAS(src_entry))
15606                                             && prot)
15607                                                 prot |= VM_PROT_EXECUTE;
15608
15609                                         assert(!pmap_has_prot_policy(prot));
15610
15611                                         if(map->mapped_in_other_pmaps) {
15612                                                 vm_object_pmap_protect(
15613                                                         VME_OBJECT(src_entry),
15614                                                         VME_OFFSET(src_entry),
15615                                                         entry_size,
15616                                                         PMAP_NULL,
15617                                                         src_entry->vme_start,
15618                                                         prot);
15619                                         } else {
15620                                                 pmap_protect(vm_map_pmap(map),
15621                                                              src_entry->vme_start,
15622                                                              src_entry->vme_end,
15623                                                              prot);
15624                                         }
15625                                 }
15626
15627                                 object = VME_OBJECT(src_entry);
15628                                 src_entry->needs_copy = FALSE;
15629                         }
15630
15631
15632                         vm_object_lock(object);
15633                         vm_object_reference_locked(object); /* object ref. for new entry */
15634                         if (object->copy_strategy ==
15635                             MEMORY_OBJECT_COPY_SYMMETRIC) {
15636                                 object->copy_strategy =
15637                                         MEMORY_OBJECT_COPY_DELAY;
15638                         }
15639                         vm_object_unlock(object);
15640                 }
15641
15642                 offset = (VME_OFFSET(src_entry) +
15643                           (src_start - src_entry->vme_start));
15644
15645                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15646                 vm_map_entry_copy(new_entry, src_entry);
15647                 if (new_entry->is_sub_map) {
15648                         /* clr address space specifics */
15649                         new_entry->use_pmap = FALSE;
15650                 } else if (copy) {
15651                         /*
15652                          * We're dealing with a copy-on-write operation,
15653                          * so the resulting mapping should not inherit the
15654                          * original mapping's accounting settings.
15655                          * "use_pmap" should be reset to its default (TRUE)
15656                          * so that the new mapping gets accounted for in
15657                          * the task's memory footprint.
15658                          */
15659                         new_entry->use_pmap = TRUE;
15660                 }
15661                 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15662                 assert(!new_entry->iokit_acct);
15663
15664                 new_entry->map_aligned = FALSE;
15665
15666                 new_entry->vme_start = map_address;
15667                 new_entry->vme_end = map_address + tmp_size;
15668                 assert(new_entry->vme_start < new_entry->vme_end);
15669                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15670                         /*
15671                          * Remapping for vm_map_protect(VM_PROT_COPY)
15672                          * to convert a read-only mapping into a
15673                          * copy-on-write version of itself but
15674                          * with write access:
15675                          * keep the original inheritance and add
15676                          * VM_PROT_WRITE to the max protection.
15677                          */
15678                         new_entry->inheritance = src_entry->inheritance;
15679                         new_entry->protection &= max_prot_for_prot_copy;
15680                         new_entry->max_protection |= VM_PROT_WRITE;
15681                 } else {
15682                         new_entry->inheritance = inheritance;
15683                 }
15684                 VME_OFFSET_SET(new_entry, offset);
15685
15686                 /*
15687                  * The new region has to be copied now if required.
15688                  */
15689         RestartCopy:
15690                 if (!copy) {
15691                         /*
15692                          * Cannot allow an entry describing a JIT
15693                          * region to be shared across address spaces.
15694                          */
15695                         if (src_entry->used_for_jit == TRUE && !same_map) {
15696 #if CONFIG_EMBEDDED
15697                                 result = KERN_INVALID_ARGUMENT;
15698                                 break;
15699 #endif /* CONFIG_EMBEDDED */
15700                         }
15701                         src_entry->is_shared = TRUE;
15702                         new_entry->is_shared = TRUE;
15703                         if (!(new_entry->is_sub_map))
15704                                 new_entry->needs_copy = FALSE;
15705
15706                 } else if (src_entry->is_sub_map) {
15707                         /* make this a COW sub_map if not already */
15708                         assert(new_entry->wired_count == 0);
15709                         new_entry->needs_copy = TRUE;
15710                         object = VM_OBJECT_NULL;
15711                 } else if (src_entry->wired_count == 0 &&
15712                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
15713                                                   VME_OFFSET(new_entry),
15714                                                   (new_entry->vme_end -
15715                                                    new_entry->vme_start),
15716                                                   &src_needs_copy,
15717                                                   &new_entry_needs_copy)) {
15718
15719                         new_entry->needs_copy = new_entry_needs_copy;
15720                         new_entry->is_shared = FALSE;
15721                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15722
15723                         /*
15724                          * Handle copy_on_write semantics.
15725                          */
15726                         if (src_needs_copy && !src_entry->needs_copy) {
15727                                 vm_prot_t prot;
15728
15729                                 assert(!pmap_has_prot_policy(src_entry->protection));
15730
15731                                 prot = src_entry->protection & ~VM_PROT_WRITE;
15732
15733                                 if (override_nx(map,
15734                                                 VME_ALIAS(src_entry))
15735                                     && prot)
15736                                         prot |= VM_PROT_EXECUTE;
15737
15738                                 assert(!pmap_has_prot_policy(prot));
15739
15740                                 vm_object_pmap_protect(object,
15741                                                        offset,
15742                                                        entry_size,
15743                                                        ((src_entry->is_shared
15744                                                          || map->mapped_in_other_pmaps) ?
15745                                                         PMAP_NULL : map->pmap),
15746                                                        src_entry->vme_start,
15747                                                        prot);
15748
15749                                 assert(src_entry->wired_count == 0);
15750                                 src_entry->needs_copy = TRUE;
15751                         }
15752                         /*
15753                          * Throw away the old object reference of the new entry.
15754                          */
15755                         vm_object_deallocate(object);
15756
15757                 } else {
15758                         new_entry->is_shared = FALSE;
15759                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15760
15761                         src_entry_was_wired = (src_entry->wired_count > 0);
15762                         saved_src_entry = src_entry;
15763                         src_entry = VM_MAP_ENTRY_NULL;
15764
15765                         /*
15766                          * The map can be safely unlocked since we
15767                          * already hold a reference on the object.
15768                          *
15769                          * Record the timestamp of the map for later
15770                          * verification, and unlock the map.
15771                          */
15772                         version.main_timestamp = map->timestamp;
15773                         vm_map_unlock(map);     /* Increments timestamp once! */
15774
15775                         /*
15776                          * Perform the copy.
15777                          */
15778                         if (src_entry_was_wired > 0) {
15779                                 vm_object_lock(object);
15780                                 result = vm_object_copy_slowly(
15781                                         object,
15782                                         offset,
15783                                         (new_entry->vme_end -
15784                                         new_entry->vme_start),
15785                                         THREAD_UNINT,
15786                                         &VME_OBJECT(new_entry));
15787
15788                                 VME_OFFSET_SET(new_entry, 0);
15789                                 new_entry->needs_copy = FALSE;
15790                         } else {
15791                                 vm_object_offset_t new_offset;
15792
15793                                 new_offset = VME_OFFSET(new_entry);
15794                                 result = vm_object_copy_strategically(
15795                                         object,
15796                                         offset,
15797                                         (new_entry->vme_end -
15798                                         new_entry->vme_start),
15799                                         &VME_OBJECT(new_entry),
15800                                         &new_offset,
15801                                         &new_entry_needs_copy);
15802                                 if (new_offset != VME_OFFSET(new_entry)) {
15803                                         VME_OFFSET_SET(new_entry, new_offset);
15804                                 }
15805
15806                                 new_entry->needs_copy = new_entry_needs_copy;
15807                         }
15808
15809                         /*
15810                          * Throw away the old object reference of the new entry.
15811                          */
15812                         vm_object_deallocate(object);
15813
15814                         if (result != KERN_SUCCESS &&
15815                             result != KERN_MEMORY_RESTART_COPY) {
15816                                 _vm_map_entry_dispose(map_header, new_entry);
15817                                 vm_map_lock(map);
15818                                 break;
15819                         }
15820
15821                         /*
15822                          * Verify that the map has not substantially
15823                          * changed while the copy was being made.
15824                          */
15825
15826                         vm_map_lock(map);
15827                         if (version.main_timestamp + 1 != map->timestamp) {
15828                                 /*
15829                                  * Simple version comparison failed.
15830                                  *
15831                                  * Retry the lookup and verify that the
15832                                  * same object/offset are still present.
15833                                  */
15834                                 saved_src_entry = VM_MAP_ENTRY_NULL;
15835                                 vm_object_deallocate(VME_OBJECT(new_entry));
15836                                 _vm_map_entry_dispose(map_header, new_entry);
15837                                 if (result == KERN_MEMORY_RESTART_COPY)
15838                                         result = KERN_SUCCESS;
15839                                 continue;
15840                         }
15841                         /* map hasn't changed: src_entry is still valid */
15842                         src_entry = saved_src_entry;
15843                         saved_src_entry = VM_MAP_ENTRY_NULL;
15844
15845                         if (result == KERN_MEMORY_RESTART_COPY) {
15846                                 vm_object_reference(object);
15847                                 goto RestartCopy;
15848                         }
15849                 }
15850
15851                 _vm_map_store_entry_link(map_header,
15852                                    map_header->links.prev, new_entry);
15853
15854                 /*Protections for submap mapping are irrelevant here*/
15855                 if( !src_entry->is_sub_map ) {
15856                         *cur_protection &= src_entry->protection;
15857                         *max_protection &= src_entry->max_protection;
15858                 }
15859                 map_address += tmp_size;
15860                 mapped_size += tmp_size;
15861                 src_start += tmp_size;
15862
15863         } /* end while */
15864
15865         vm_map_unlock(map);
15866         if (result != KERN_SUCCESS) {
15867                 /*
15868                  * Free all allocated elements.
15869                  */
15870                 for (src_entry = map_header->links.next;
15871                      src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
15872                      src_entry = new_entry) {
15873                         new_entry = src_entry->vme_next;
15874                         _vm_map_store_entry_unlink(map_header, src_entry);
15875                         if (src_entry->is_sub_map) {
15876                                 vm_map_deallocate(VME_SUBMAP(src_entry));
15877                         } else {
15878                                 vm_object_deallocate(VME_OBJECT(src_entry));
15879                         }
15880                         _vm_map_entry_dispose(map_header, src_entry);
15881                 }
15882         }
15883         return result;
15884 }
15885
15886 /*
15887  *      Routine:        vm_remap
15888  *
15889  *                      Map portion of a task's address space.
15890  *                      Mapped region must not overlap more than
15891  *                      one vm memory object. Protections and
15892  *                      inheritance attributes remain the same
15893  *                      as in the original task and are out parameters.
15894  *                      Source and Target task can be identical
15895  *                      Other attributes are identical as for vm_map()
15896  */
15897 kern_return_t
15898 vm_map_remap(
15899         vm_map_t                target_map,
15900         vm_map_address_t        *address,
15901         vm_map_size_t           size,
15902         vm_map_offset_t         mask,
15903         int                     flags,
15904         vm_map_kernel_flags_t   vmk_flags,
15905         vm_tag_t                tag,
15906         vm_map_t                src_map,
15907         vm_map_offset_t         memory_address,
15908         boolean_t               copy,
15909         vm_prot_t               *cur_protection,
15910         vm_prot_t               *max_protection,
15911         vm_inherit_t            inheritance)
15912 {
15913         kern_return_t           result;
15914         vm_map_entry_t          entry;
15915         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
15916         vm_map_entry_t          new_entry;
15917         struct vm_map_header    map_header;
15918         vm_map_offset_t         offset_in_mapping;
15919
15920         if (target_map == VM_MAP_NULL)
15921                 return KERN_INVALID_ARGUMENT;
15922
15923         switch (inheritance) {
15924         case VM_INHERIT_NONE:
15925         case VM_INHERIT_COPY:
15926         case VM_INHERIT_SHARE:
15927                 if (size != 0 && src_map != VM_MAP_NULL)
15928                         break;
15929                 /*FALL THRU*/
15930         default:
15931                 return KERN_INVALID_ARGUMENT;
15932         }
15933
15934         /*
15935          * If the user is requesting that we return the address of the
15936          * first byte of the data (rather than the base of the page),
15937          * then we use different rounding semantics: specifically,
15938          * we assume that (memory_address, size) describes a region
15939          * all of whose pages we must cover, rather than a base to be truncated
15940          * down and a size to be added to that base.  So we figure out
15941          * the highest page that the requested region includes and make
15942          * sure that the size will cover it.
15943          *
15944          * The key example we're worried about it is of the form:
15945          *
15946          *              memory_address = 0x1ff0, size = 0x20
15947          *
15948          * With the old semantics, we round down the memory_address to 0x1000
15949          * and round up the size to 0x1000, resulting in our covering *only*
15950          * page 0x1000.  With the new semantics, we'd realize that the region covers
15951          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
15952          * 0x1000 and page 0x2000 in the region we remap.
15953          */
15954         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15955                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
15956                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
15957         } else {
15958                 size = vm_map_round_page(size, PAGE_MASK);
15959         }
15960         if (size == 0) {
15961                 return KERN_INVALID_ARGUMENT;
15962         }
15963
15964         result = vm_map_remap_extract(src_map, memory_address,
15965                                       size, copy, &map_header,
15966                                       cur_protection,
15967                                       max_protection,
15968                                       inheritance,
15969                                       target_map->hdr.entries_pageable,
15970                                       src_map == target_map,
15971                                       vmk_flags);
15972
15973         if (result != KERN_SUCCESS) {
15974                 return result;
15975         }
15976
15977         /*
15978          * Allocate/check a range of free virtual address
15979          * space for the target
15980          */
15981         *address = vm_map_trunc_page(*address,
15982                                      VM_MAP_PAGE_MASK(target_map));
15983         vm_map_lock(target_map);
15984         result = vm_map_remap_range_allocate(target_map, address, size,
15985                                              mask, flags, vmk_flags, tag,
15986                                              &insp_entry);
15987
15988         for (entry = map_header.links.next;
15989              entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
15990              entry = new_entry) {
15991                 new_entry = entry->vme_next;
15992                 _vm_map_store_entry_unlink(&map_header, entry);
15993                 if (result == KERN_SUCCESS) {
15994                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15995                                 /* no codesigning -> read-only access */
15996                                 entry->max_protection = VM_PROT_READ;
15997                                 entry->protection = VM_PROT_READ;
15998                                 entry->vme_resilient_codesign = TRUE;
15999                         }
16000                         entry->vme_start += *address;
16001                         entry->vme_end += *address;
16002                         assert(!entry->map_aligned);
16003                         vm_map_store_entry_link(target_map, insp_entry, entry,
16004                                                 vmk_flags);
16005                         insp_entry = entry;
16006                 } else {
16007                         if (!entry->is_sub_map) {
16008                                 vm_object_deallocate(VME_OBJECT(entry));
16009                         } else {
16010                                 vm_map_deallocate(VME_SUBMAP(entry));
16011                         }
16012                         _vm_map_entry_dispose(&map_header, entry);
16013                 }
16014         }
16015
16016         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16017                 *cur_protection = VM_PROT_READ;
16018                 *max_protection = VM_PROT_READ;
16019         }
16020
16021         if( target_map->disable_vmentry_reuse == TRUE) {
16022                 assert(!target_map->is_nested_map);
16023                 if( target_map->highest_entry_end < insp_entry->vme_end ){
16024                         target_map->highest_entry_end = insp_entry->vme_end;
16025                 }
16026         }
16027
16028         if (result == KERN_SUCCESS) {
16029                 target_map->size += size;
16030                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16031
16032 #if PMAP_CS
16033                 if (*max_protection & VM_PROT_EXECUTE) {
16034                         vm_map_address_t region_start = 0, region_size = 0;
16035                         struct pmap_cs_code_directory *region_cd = NULL;
16036                         vm_map_address_t base = 0;
16037                         struct pmap_cs_lookup_results results = {};
16038                         vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16039                         vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16040
16041                         pmap_cs_lookup(src_map->pmap, memory_address, &results);
16042                         region_size = results.region_size;
16043                         region_start = results.region_start;
16044                         region_cd = results.region_cd_entry;
16045                         base = results.base;
16046
16047                         if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16048                                 *cur_protection = VM_PROT_READ;
16049                                 *max_protection = VM_PROT_READ;
16050                                 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16051                                            "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16052                                            page_addr, page_addr+assoc_size, *address,
16053                                            region_start, region_size,
16054                                            region_cd != NULL ? "not " : ""              // Don't leak kernel slide
16055                                         );
16056                         }
16057                 }
16058 #endif
16059
16060         }
16061         vm_map_unlock(target_map);
16062
16063         if (result == KERN_SUCCESS && target_map->wiring_required)
16064                 result = vm_map_wire_kernel(target_map, *address,
16065                                      *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16066                                      TRUE);
16067
16068         /*
16069          * If requested, return the address of the data pointed to by the
16070          * request, rather than the base of the resulting page.
16071          */
16072         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16073                 *address += offset_in_mapping;
16074         }
16075
16076         return result;
16077 }
16078
16079 /*
16080  *      Routine:        vm_map_remap_range_allocate
16081  *
16082  *      Description:
16083  *              Allocate a range in the specified virtual address map.
16084  *              returns the address and the map entry just before the allocated
16085  *              range
16086  *
16087  *      Map must be locked.
16088  */
16089
16090 static kern_return_t
16091 vm_map_remap_range_allocate(
16092         vm_map_t                map,
16093         vm_map_address_t        *address,       /* IN/OUT */
16094         vm_map_size_t           size,
16095         vm_map_offset_t         mask,
16096         int                     flags,
16097         vm_map_kernel_flags_t   vmk_flags,
16098         __unused vm_tag_t       tag,
16099         vm_map_entry_t          *map_entry)     /* OUT */
16100 {
16101         vm_map_entry_t  entry;
16102         vm_map_offset_t start;
16103         vm_map_offset_t end;
16104         vm_map_offset_t desired_empty_end;
16105         kern_return_t   kr;
16106         vm_map_entry_t          hole_entry;
16107
16108 StartAgain: ;
16109
16110         start = *address;
16111
16112         if (flags & VM_FLAGS_ANYWHERE)
16113         {
16114                 if (flags & VM_FLAGS_RANDOM_ADDR)
16115                 {
16116                         /*
16117                          * Get a random start address.
16118                          */
16119                         kr = vm_map_random_address_for_size(map, address, size);
16120                         if (kr != KERN_SUCCESS) {
16121                                 return(kr);
16122                         }
16123                         start = *address;
16124                 }
16125
16126                 /*
16127                  *      Calculate the first possible address.
16128                  */
16129
16130                 if (start < map->min_offset)
16131                         start = map->min_offset;
16132                 if (start > map->max_offset)
16133                         return(KERN_NO_SPACE);
16134
16135                 /*
16136                  *      Look for the first possible address;
16137                  *      if there's already something at this
16138                  *      address, we have to start after it.
16139                  */
16140
16141                 if( map->disable_vmentry_reuse == TRUE) {
16142                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
16143                 } else {
16144
16145                         if (map->holelistenabled) {
16146                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16147
16148                                 if (hole_entry == NULL) {
16149                                         /*
16150                                          * No more space in the map?
16151                                          */
16152                                         return(KERN_NO_SPACE);
16153                                 } else {
16154
16155                                         boolean_t found_hole = FALSE;
16156
16157                                         do {
16158                                                 if (hole_entry->vme_start >= start) {
16159                                                         start = hole_entry->vme_start;
16160                                                         found_hole = TRUE;
16161                                                         break;
16162                                                 }
16163
16164                                                 if (hole_entry->vme_end > start) {
16165                                                         found_hole = TRUE;
16166                                                         break;
16167                                                 }
16168                                                 hole_entry = hole_entry->vme_next;
16169
16170                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16171
16172                                         if (found_hole == FALSE) {
16173                                                 return (KERN_NO_SPACE);
16174                                         }
16175
16176                                         entry = hole_entry;
16177                                 }
16178                         } else {
16179                                 assert(first_free_is_valid(map));
16180                                 if (start == map->min_offset) {
16181                                         if ((entry = map->first_free) != vm_map_to_entry(map))
16182                                                 start = entry->vme_end;
16183                                 } else {
16184                                         vm_map_entry_t  tmp_entry;
16185                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
16186                                                 start = tmp_entry->vme_end;
16187                                         entry = tmp_entry;
16188                                 }
16189                         }
16190                         start = vm_map_round_page(start,
16191                                                   VM_MAP_PAGE_MASK(map));
16192                 }
16193
16194                 /*
16195                  *      In any case, the "entry" always precedes
16196                  *      the proposed new region throughout the
16197                  *      loop:
16198                  */
16199
16200                 while (TRUE) {
16201                         vm_map_entry_t  next;
16202
16203                         /*
16204                          *      Find the end of the proposed new region.
16205                          *      Be sure we didn't go beyond the end, or
16206                          *      wrap around the address.
16207                          */
16208
16209                         end = ((start + mask) & ~mask);
16210                         end = vm_map_round_page(end,
16211                                                 VM_MAP_PAGE_MASK(map));
16212                         if (end < start)
16213                                 return(KERN_NO_SPACE);
16214                         start = end;
16215                         end += size;
16216
16217                         /* We want an entire page of empty space, but don't increase the allocation size. */
16218                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16219
16220                         if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16221                                 if (map->wait_for_space) {
16222                                         if (size <= (map->max_offset -
16223                                                      map->min_offset)) {
16224                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16225                                                 vm_map_unlock(map);
16226                                                 thread_block(THREAD_CONTINUE_NULL);
16227                                                 vm_map_lock(map);
16228                                                 goto StartAgain;
16229                                         }
16230                                 }
16231
16232                                 return(KERN_NO_SPACE);
16233                         }
16234
16235                         next = entry->vme_next;
16236
16237                         if (map->holelistenabled) {
16238                                 if (entry->vme_end >= desired_empty_end)
16239                                         break;
16240                         } else {
16241                                 /*
16242                                  *      If there are no more entries, we must win.
16243                                  *
16244                                  *      OR
16245                                  *
16246                                  *      If there is another entry, it must be
16247                                  *      after the end of the potential new region.
16248                                  */
16249
16250                                 if (next == vm_map_to_entry(map))
16251                                         break;
16252
16253                                 if (next->vme_start >= desired_empty_end)
16254                                         break;
16255                         }
16256
16257                         /*
16258                          *      Didn't fit -- move to the next entry.
16259                          */
16260
16261                         entry = next;
16262
16263                         if (map->holelistenabled) {
16264                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16265                                         /*
16266                                          * Wrapped around
16267                                          */
16268                                         return(KERN_NO_SPACE);
16269                                 }
16270                                 start = entry->vme_start;
16271                         } else {
16272                                 start = entry->vme_end;
16273                         }
16274                 }
16275
16276                 if (map->holelistenabled) {
16277
16278                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16279                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16280                         }
16281                 }
16282
16283                 *address = start;
16284
16285         } else {
16286                 vm_map_entry_t          temp_entry;
16287
16288                 /*
16289                  *      Verify that:
16290                  *              the address doesn't itself violate
16291                  *              the mask requirement.
16292                  */
16293
16294                 if ((start & mask) != 0)
16295                         return(KERN_NO_SPACE);
16296
16297
16298                 /*
16299                  *      ...     the address is within bounds
16300                  */
16301
16302                 end = start + size;
16303
16304                 if ((start < map->min_offset) ||
16305                     (end > map->max_offset) ||
16306                     (start >= end)) {
16307                         return(KERN_INVALID_ADDRESS);
16308                 }
16309
16310                 /*
16311                  * If we're asked to overwrite whatever was mapped in that
16312                  * range, first deallocate that range.
16313                  */
16314                 if (flags & VM_FLAGS_OVERWRITE) {
16315                         vm_map_t zap_map;
16316                         int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16317
16318                         /*
16319                          * We use a "zap_map" to avoid having to unlock
16320                          * the "map" in vm_map_delete(), which would compromise
16321                          * the atomicity of the "deallocate" and then "remap"
16322                          * combination.
16323                          */
16324                         zap_map = vm_map_create(PMAP_NULL,
16325                                                 start,
16326                                                 end,
16327                                                 map->hdr.entries_pageable);
16328                         if (zap_map == VM_MAP_NULL) {
16329                                 return KERN_RESOURCE_SHORTAGE;
16330                         }
16331                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16332                         vm_map_disable_hole_optimization(zap_map);
16333
16334                         if (vmk_flags.vmkf_overwrite_immutable) {
16335                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16336                         }
16337                         kr = vm_map_delete(map, start, end,
16338                                            remove_flags,
16339                                            zap_map);
16340                         if (kr == KERN_SUCCESS) {
16341                                 vm_map_destroy(zap_map,
16342                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16343                                 zap_map = VM_MAP_NULL;
16344                         }
16345                 }
16346
16347                 /*
16348                  *      ...     the starting address isn't allocated
16349                  */
16350
16351                 if (vm_map_lookup_entry(map, start, &temp_entry))
16352                         return(KERN_NO_SPACE);
16353
16354                 entry = temp_entry;
16355
16356                 /*
16357                  *      ...     the next region doesn't overlap the
16358                  *              end point.
16359                  */
16360
16361                 if ((entry->vme_next != vm_map_to_entry(map)) &&
16362                     (entry->vme_next->vme_start < end))
16363                         return(KERN_NO_SPACE);
16364         }
16365         *map_entry = entry;
16366         return(KERN_SUCCESS);
16367 }
16368
16369 /*
16370  *      vm_map_switch:
16371  *
16372  *      Set the address map for the current thread to the specified map
16373  */
16374
16375 vm_map_t
16376 vm_map_switch(
16377         vm_map_t        map)
16378 {
16379         int             mycpu;
16380         thread_t        thread = current_thread();
16381         vm_map_t        oldmap = thread->map;
16382
16383         mp_disable_preemption();
16384         mycpu = cpu_number();
16385
16386         /*
16387          *      Deactivate the current map and activate the requested map
16388          */
16389         PMAP_SWITCH_USER(thread, map, mycpu);
16390
16391         mp_enable_preemption();
16392         return(oldmap);
16393 }
16394
16395
16396 /*
16397  *      Routine:        vm_map_write_user
16398  *
16399  *      Description:
16400  *              Copy out data from a kernel space into space in the
16401  *              destination map. The space must already exist in the
16402  *              destination map.
16403  *              NOTE:  This routine should only be called by threads
16404  *              which can block on a page fault. i.e. kernel mode user
16405  *              threads.
16406  *
16407  */
16408 kern_return_t
16409 vm_map_write_user(
16410         vm_map_t                map,
16411         void                    *src_p,
16412         vm_map_address_t        dst_addr,
16413         vm_size_t               size)
16414 {
16415         kern_return_t   kr = KERN_SUCCESS;
16416
16417         if(current_map() == map) {
16418                 if (copyout(src_p, dst_addr, size)) {
16419                         kr = KERN_INVALID_ADDRESS;
16420                 }
16421         } else {
16422                 vm_map_t        oldmap;
16423
16424                 /* take on the identity of the target map while doing */
16425                 /* the transfer */
16426
16427                 vm_map_reference(map);
16428                 oldmap = vm_map_switch(map);
16429                 if (copyout(src_p, dst_addr, size)) {
16430                         kr = KERN_INVALID_ADDRESS;
16431                 }
16432                 vm_map_switch(oldmap);
16433                 vm_map_deallocate(map);
16434         }
16435         return kr;
16436 }
16437
16438 /*
16439  *      Routine:        vm_map_read_user
16440  *
16441  *      Description:
16442  *              Copy in data from a user space source map into the
16443  *              kernel map. The space must already exist in the
16444  *              kernel map.
16445  *              NOTE:  This routine should only be called by threads
16446  *              which can block on a page fault. i.e. kernel mode user
16447  *              threads.
16448  *
16449  */
16450 kern_return_t
16451 vm_map_read_user(
16452         vm_map_t                map,
16453         vm_map_address_t        src_addr,
16454         void                    *dst_p,
16455         vm_size_t               size)
16456 {
16457         kern_return_t   kr = KERN_SUCCESS;
16458
16459         if(current_map() == map) {
16460                 if (copyin(src_addr, dst_p, size)) {
16461                         kr = KERN_INVALID_ADDRESS;
16462                 }
16463         } else {
16464                 vm_map_t        oldmap;
16465
16466                 /* take on the identity of the target map while doing */
16467                 /* the transfer */
16468
16469                 vm_map_reference(map);
16470                 oldmap = vm_map_switch(map);
16471                 if (copyin(src_addr, dst_p, size)) {
16472                         kr = KERN_INVALID_ADDRESS;
16473                 }
16474                 vm_map_switch(oldmap);
16475                 vm_map_deallocate(map);
16476         }
16477         return kr;
16478 }
16479
16480
16481 /*
16482  *      vm_map_check_protection:
16483  *
16484  *      Assert that the target map allows the specified
16485  *      privilege on the entire address region given.
16486  *      The entire region must be allocated.
16487  */
16488 boolean_t
16489 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16490                         vm_map_offset_t end, vm_prot_t protection)
16491 {
16492         vm_map_entry_t entry;
16493         vm_map_entry_t tmp_entry;
16494
16495         vm_map_lock(map);
16496
16497         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
16498         {
16499                 vm_map_unlock(map);
16500                 return (FALSE);
16501         }
16502
16503         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16504                 vm_map_unlock(map);
16505                 return(FALSE);
16506         }
16507
16508         entry = tmp_entry;
16509
16510         while (start < end) {
16511                 if (entry == vm_map_to_entry(map)) {
16512                         vm_map_unlock(map);
16513                         return(FALSE);
16514                 }
16515
16516                 /*
16517                  *      No holes allowed!
16518                  */
16519
16520                 if (start < entry->vme_start) {
16521                         vm_map_unlock(map);
16522                         return(FALSE);
16523                 }
16524
16525                 /*
16526                  * Check protection associated with entry.
16527                  */
16528
16529                 if ((entry->protection & protection) != protection) {
16530                         vm_map_unlock(map);
16531                         return(FALSE);
16532                 }
16533
16534                 /* go to next entry */
16535
16536                 start = entry->vme_end;
16537                 entry = entry->vme_next;
16538         }
16539         vm_map_unlock(map);
16540         return(TRUE);
16541 }
16542
16543 kern_return_t
16544 vm_map_purgable_control(
16545         vm_map_t                map,
16546         vm_map_offset_t         address,
16547         vm_purgable_t           control,
16548         int                     *state)
16549 {
16550         vm_map_entry_t          entry;
16551         vm_object_t             object;
16552         kern_return_t           kr;
16553         boolean_t               was_nonvolatile;
16554
16555         /*
16556          * Vet all the input parameters and current type and state of the
16557          * underlaying object.  Return with an error if anything is amiss.
16558          */
16559         if (map == VM_MAP_NULL)
16560                 return(KERN_INVALID_ARGUMENT);
16561
16562         if (control != VM_PURGABLE_SET_STATE &&
16563             control != VM_PURGABLE_GET_STATE &&
16564             control != VM_PURGABLE_PURGE_ALL &&
16565             control != VM_PURGABLE_SET_STATE_FROM_KERNEL)
16566                 return(KERN_INVALID_ARGUMENT);
16567
16568         if (control == VM_PURGABLE_PURGE_ALL) {
16569                 vm_purgeable_object_purge_all();
16570                 return KERN_SUCCESS;
16571         }
16572
16573         if ((control == VM_PURGABLE_SET_STATE ||
16574              control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16575             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16576              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
16577                 return(KERN_INVALID_ARGUMENT);
16578
16579         vm_map_lock_read(map);
16580
16581         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16582
16583                 /*
16584                  * Must pass a valid non-submap address.
16585                  */
16586                 vm_map_unlock_read(map);
16587                 return(KERN_INVALID_ADDRESS);
16588         }
16589
16590         if ((entry->protection & VM_PROT_WRITE) == 0) {
16591                 /*
16592                  * Can't apply purgable controls to something you can't write.
16593                  */
16594                 vm_map_unlock_read(map);
16595                 return(KERN_PROTECTION_FAILURE);
16596         }
16597
16598         object = VME_OBJECT(entry);
16599         if (object == VM_OBJECT_NULL ||
16600             object->purgable == VM_PURGABLE_DENY) {
16601                 /*
16602                  * Object must already be present and be purgeable.
16603                  */
16604                 vm_map_unlock_read(map);
16605                 return KERN_INVALID_ARGUMENT;
16606         }
16607
16608         vm_object_lock(object);
16609
16610 #if 00
16611         if (VME_OFFSET(entry) != 0 ||
16612             entry->vme_end - entry->vme_start != object->vo_size) {
16613                 /*
16614                  * Can only apply purgable controls to the whole (existing)
16615                  * object at once.
16616                  */
16617                 vm_map_unlock_read(map);
16618                 vm_object_unlock(object);
16619                 return KERN_INVALID_ARGUMENT;
16620         }
16621 #endif
16622
16623         assert(!entry->is_sub_map);
16624         assert(!entry->use_pmap); /* purgeable has its own accounting */
16625
16626         vm_map_unlock_read(map);
16627
16628         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16629
16630         kr = vm_object_purgable_control(object, control, state);
16631
16632         if (was_nonvolatile &&
16633             object->purgable != VM_PURGABLE_NONVOLATILE &&
16634             map->pmap == kernel_pmap) {
16635 #if DEBUG
16636                 object->vo_purgeable_volatilizer = kernel_task;
16637 #endif /* DEBUG */
16638         }
16639
16640         vm_object_unlock(object);
16641
16642         return kr;
16643 }
16644
16645 kern_return_t
16646 vm_map_page_query_internal(
16647         vm_map_t        target_map,
16648         vm_map_offset_t offset,
16649         int             *disposition,
16650         int             *ref_count)
16651 {
16652         kern_return_t                   kr;
16653         vm_page_info_basic_data_t       info;
16654         mach_msg_type_number_t          count;
16655
16656         count = VM_PAGE_INFO_BASIC_COUNT;
16657         kr = vm_map_page_info(target_map,
16658                               offset,
16659                               VM_PAGE_INFO_BASIC,
16660                               (vm_page_info_t) &info,
16661                               &count);
16662         if (kr == KERN_SUCCESS) {
16663                 *disposition = info.disposition;
16664                 *ref_count = info.ref_count;
16665         } else {
16666                 *disposition = 0;
16667                 *ref_count = 0;
16668         }
16669
16670         return kr;
16671 }
16672
16673 kern_return_t
16674 vm_map_page_info(
16675         vm_map_t                map,
16676         vm_map_offset_t         offset,
16677         vm_page_info_flavor_t   flavor,
16678         vm_page_info_t          info,
16679         mach_msg_type_number_t  *count)
16680 {
16681         return (vm_map_page_range_info_internal(map,
16682                                        offset, /* start of range */
16683                                        (offset + 1), /* this will get rounded in the call to the page boundary */
16684                                        flavor,
16685                                        info,
16686                                        count));
16687 }
16688
16689 kern_return_t
16690 vm_map_page_range_info_internal(
16691         vm_map_t                map,
16692         vm_map_offset_t         start_offset,
16693         vm_map_offset_t         end_offset,
16694         vm_page_info_flavor_t   flavor,
16695         vm_page_info_t          info,
16696         mach_msg_type_number_t  *count)
16697 {
16698         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
16699         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16700         vm_page_t               m = VM_PAGE_NULL;
16701         kern_return_t           retval = KERN_SUCCESS;
16702         int                     disposition = 0;
16703         int                     ref_count = 0;
16704         int                     depth = 0, info_idx = 0;
16705         vm_page_info_basic_t    basic_info = 0;
16706         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16707         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16708         boolean_t               do_region_footprint;
16709
16710         switch (flavor) {
16711         case VM_PAGE_INFO_BASIC:
16712                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
16713                         /*
16714                          * The "vm_page_info_basic_data" structure was not
16715                          * properly padded, so allow the size to be off by
16716                          * one to maintain backwards binary compatibility...
16717                          */
16718                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
16719                                 return KERN_INVALID_ARGUMENT;
16720                 }
16721                 break;
16722         default:
16723                 return KERN_INVALID_ARGUMENT;
16724         }
16725
16726         do_region_footprint = task_self_region_footprint();
16727         disposition = 0;
16728         ref_count = 0;
16729         depth = 0;
16730         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
16731         retval = KERN_SUCCESS;
16732
16733         offset_in_page = start_offset & PAGE_MASK;
16734         start = vm_map_trunc_page(start_offset, PAGE_MASK);
16735         end = vm_map_round_page(end_offset, PAGE_MASK);
16736
16737         assert ((end - start) <= MAX_PAGE_RANGE_QUERY);
16738
16739         vm_map_lock_read(map);
16740
16741         for (curr_s_offset = start; curr_s_offset < end;) {
16742                 /*
16743                  * New lookup needs reset of these variables.
16744                  */
16745                 curr_object = object = VM_OBJECT_NULL;
16746                 offset_in_object = 0;
16747                 ref_count = 0;
16748                 depth = 0;
16749
16750                 if (do_region_footprint &&
16751                     curr_s_offset >= vm_map_last_entry(map)->vme_end) {
16752                         ledger_amount_t nonvol_compressed;
16753
16754                         /*
16755                          * Request for "footprint" info about a page beyond
16756                          * the end of address space: this must be for
16757                          * the fake region vm_map_region_recurse_64()
16758                          * reported to account for non-volatile purgeable
16759                          * memory owned by this task.
16760                          */
16761                         disposition = 0;
16762                         nonvol_compressed = 0;
16763                         ledger_get_balance(
16764                                 map->pmap->ledger,
16765                                 task_ledgers.purgeable_nonvolatile_compressed,
16766                                 &nonvol_compressed);
16767                         if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
16768                             (unsigned) nonvol_compressed) {
16769                                 /*
16770                                  * We haven't reported all the "non-volatile
16771                                  * compressed" pages yet, so report this fake
16772                                  * page as "compressed".
16773                                  */
16774                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16775                         } else {
16776                                 /*
16777                                  * We've reported all the non-volatile
16778                                  * compressed page but not all the non-volatile
16779                                  * pages , so report this fake page as
16780                                  * "resident dirty".
16781                                  */
16782                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16783                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16784                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
16785                         }
16786                         switch (flavor) {
16787                         case VM_PAGE_INFO_BASIC:
16788                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16789                                 basic_info->disposition = disposition;
16790                                 basic_info->ref_count = 1;
16791                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16792                                 basic_info->offset = 0;
16793                                 basic_info->depth = 0;
16794
16795                                 info_idx++;
16796                                 break;
16797                         }
16798                         curr_s_offset += PAGE_SIZE;
16799                         continue;
16800                 }
16801
16802                 /*
16803                  * First, find the map entry covering "curr_s_offset", going down
16804                  * submaps if necessary.
16805                  */
16806                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16807                         /* no entry -> no object -> no page */
16808
16809                         if (curr_s_offset < vm_map_min(map)) {
16810                                 /*
16811                                  * Illegal address that falls below map min.
16812                                  */
16813                                 curr_e_offset = MIN(end, vm_map_min(map));
16814
16815                         } else if (curr_s_offset >= vm_map_max(map)) {
16816                                 /*
16817                                  * Illegal address that falls on/after map max.
16818                                  */
16819                                 curr_e_offset = end;
16820
16821                         } else if (map_entry == vm_map_to_entry(map)) {
16822                                 /*
16823                                  * Hit a hole.
16824                                  */
16825                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
16826                                         /*
16827                                          * Empty map.
16828                                          */
16829                                         curr_e_offset = MIN(map->max_offset, end);
16830                                 } else {
16831                                         /*
16832                                          * Hole at start of the map.
16833                                          */
16834                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16835                                 }
16836                         } else {
16837                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
16838                                         /*
16839                                          * Hole at the end of the map.
16840                                          */
16841                                         curr_e_offset = MIN(map->max_offset, end);
16842                                 } else {
16843                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16844                                 }
16845                         }
16846
16847                         assert(curr_e_offset >= curr_s_offset);
16848
16849                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16850
16851                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16852
16853                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16854
16855                         curr_s_offset = curr_e_offset;
16856
16857                         info_idx += num_pages;
16858
16859                         continue;
16860                 }
16861
16862                 /* compute offset from this map entry's start */
16863                 offset_in_object = curr_s_offset - map_entry->vme_start;
16864
16865                 /* compute offset into this map entry's object (or submap) */
16866                 offset_in_object += VME_OFFSET(map_entry);
16867
16868                 if (map_entry->is_sub_map) {
16869                         vm_map_t sub_map = VM_MAP_NULL;
16870                         vm_page_info_t submap_info = 0;
16871                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
16872
16873                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
16874
16875                         submap_s_offset = offset_in_object;
16876                         submap_e_offset = submap_s_offset + range_len;
16877
16878                         sub_map = VME_SUBMAP(map_entry);
16879
16880                         vm_map_reference(sub_map);
16881                         vm_map_unlock_read(map);
16882
16883                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16884
16885                         retval = vm_map_page_range_info_internal(sub_map,
16886                                               submap_s_offset,
16887                                               submap_e_offset,
16888                                               VM_PAGE_INFO_BASIC,
16889                                               (vm_page_info_t) submap_info,
16890                                               count);
16891
16892                         assert(retval == KERN_SUCCESS);
16893
16894                         vm_map_lock_read(map);
16895                         vm_map_deallocate(sub_map);
16896
16897                         /* Move the "info" index by the number of pages we inspected.*/
16898                         info_idx += range_len >> PAGE_SHIFT;
16899
16900                         /* Move our current offset by the size of the range we inspected.*/
16901                         curr_s_offset += range_len;
16902
16903                         continue;
16904                 }
16905
16906                 object = VME_OBJECT(map_entry);
16907                 if (object == VM_OBJECT_NULL) {
16908
16909                         /*
16910                          * We don't have an object here and, hence,
16911                          * no pages to inspect. We'll fill up the
16912                          * info structure appropriately.
16913                          */
16914
16915                         curr_e_offset = MIN(map_entry->vme_end, end);
16916
16917                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16918
16919                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16920
16921                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16922
16923                         curr_s_offset = curr_e_offset;
16924
16925                         info_idx += num_pages;
16926
16927                         continue;
16928                 }
16929
16930                 if (do_region_footprint) {
16931                         int pmap_disp;
16932
16933                         disposition = 0;
16934                         pmap_disp = 0;
16935                         if (map->has_corpse_footprint) {
16936                                 /*
16937                                  * Query the page info data we saved
16938                                  * while forking the corpse.
16939                                  */
16940                                 vm_map_corpse_footprint_query_page_info(
16941                                         map,
16942                                         curr_s_offset,
16943                                         &pmap_disp);
16944                         } else {
16945                                 /*
16946                                  * Query the pmap.
16947                                  */
16948                                 pmap_query_page_info(map->pmap,
16949                                                      curr_s_offset,
16950                                                      &pmap_disp);
16951                         }
16952                         if (object->purgable == VM_PURGABLE_NONVOLATILE &&
16953                             /* && not tagged as no-footprint? */
16954                             VM_OBJECT_OWNER(object) != NULL &&
16955                             VM_OBJECT_OWNER(object)->map == map) {
16956                                 if ((((curr_s_offset
16957                                        - map_entry->vme_start
16958                                        + VME_OFFSET(map_entry))
16959                                       / PAGE_SIZE) <
16960                                      (object->resident_page_count +
16961                                       vm_compressor_pager_get_count(object->pager)))) {
16962                                         /*
16963                                          * Non-volatile purgeable object owned
16964                                          * by this task: report the first
16965                                          * "#resident + #compressed" pages as
16966                                          * "resident" (to show that they
16967                                          * contribute to the footprint) but not
16968                                          * "dirty" (to avoid double-counting
16969                                          * with the fake "non-volatile" region
16970                                          * we'll report at the end of the
16971                                          * address space to account for all
16972                                          * (mapped or not) non-volatile memory
16973                                          * owned by this task.
16974                                          */
16975                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16976                                 }
16977                         } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
16978                                     object->purgable == VM_PURGABLE_EMPTY) &&
16979                                    /* && not tagged as no-footprint? */
16980                                    VM_OBJECT_OWNER(object) != NULL &&
16981                                    VM_OBJECT_OWNER(object)->map == map) {
16982                                 if ((((curr_s_offset
16983                                        - map_entry->vme_start
16984                                        + VME_OFFSET(map_entry))
16985                                       / PAGE_SIZE) <
16986                                      object->wired_page_count)) {
16987                                         /*
16988                                          * Volatile|empty purgeable object owned
16989                                          * by this task: report the first
16990                                          * "#wired" pages as "resident" (to
16991                                          * show that they contribute to the
16992                                          * footprint) but not "dirty" (to avoid
16993                                          * double-counting with the fake
16994                                          * "non-volatile" region we'll report
16995                                          * at the end of the address space to
16996                                          * account for all (mapped or not)
16997                                          * non-volatile memory owned by this
16998                                          * task.
16999                                          */
17000                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17001                                 }
17002                         } else if (map_entry->iokit_acct &&
17003                                    object->internal &&
17004                                    object->purgable == VM_PURGABLE_DENY) {
17005                                 /*
17006                                  * Non-purgeable IOKit memory: phys_footprint
17007                                  * includes the entire virtual mapping.
17008                                  */
17009                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17010                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17011                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17012                         } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17013                                                 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17014                                 /* alternate accounting */
17015 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17016                                 if (map->pmap->footprint_was_suspended ||
17017                                     /*
17018                                      * XXX corpse does not know if original
17019                                      * pmap had its footprint suspended...
17020                                      */
17021                                     map->has_corpse_footprint) {
17022                                         /*
17023                                          * The assertion below can fail if dyld
17024                                          * suspended footprint accounting
17025                                          * while doing some adjustments to
17026                                          * this page;  the mapping would say
17027                                          * "use pmap accounting" but the page
17028                                          * would be marked "alternate
17029                                          * accounting".
17030                                          */
17031                                 } else
17032 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17033                                         assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17034                                 pmap_disp = 0;
17035                         } else {
17036                                 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17037                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17038                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17039                                         disposition |= VM_PAGE_QUERY_PAGE_REF;
17040                                         if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17041                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17042                                         } else {
17043                                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17044                                         }
17045                                 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17046                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17047                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17048                                 }
17049                         }
17050                         switch (flavor) {
17051                         case VM_PAGE_INFO_BASIC:
17052                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17053                                 basic_info->disposition = disposition;
17054                                 basic_info->ref_count = 1;
17055                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17056                                 basic_info->offset = 0;
17057                                 basic_info->depth = 0;
17058
17059                                 info_idx++;
17060                                 break;
17061                         }
17062                         curr_s_offset += PAGE_SIZE;
17063                         continue;
17064                 }
17065
17066                 vm_object_reference(object);
17067                 /*
17068                  * Shared mode -- so we can allow other readers
17069                  * to grab the lock too.
17070                  */
17071                 vm_object_lock_shared(object);
17072
17073                 curr_e_offset = MIN(map_entry->vme_end, end);
17074
17075                 vm_map_unlock_read(map);
17076
17077                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17078
17079                 curr_object = object;
17080
17081                 for (; curr_s_offset < curr_e_offset;) {
17082
17083                         if (object == curr_object) {
17084                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17085                         } else {
17086                                 ref_count = curr_object->ref_count;
17087                         }
17088
17089                         curr_offset_in_object = offset_in_object;
17090
17091                         for (;;) {
17092                                 m = vm_page_lookup(curr_object, curr_offset_in_object);
17093
17094                                 if (m != VM_PAGE_NULL) {
17095
17096                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17097                                         break;
17098
17099                                 } else {
17100                                         if (curr_object->internal &&
17101                                             curr_object->alive &&
17102                                             !curr_object->terminating &&
17103                                             curr_object->pager_ready) {
17104
17105                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17106                                                     == VM_EXTERNAL_STATE_EXISTS) {
17107                                                         /* the pager has that page */
17108                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17109                                                         break;
17110                                                 }
17111                                         }
17112
17113                                         /*
17114                                          * Go down the VM object shadow chain until we find the page
17115                                          * we're looking for.
17116                                          */
17117
17118                                         if (curr_object->shadow != VM_OBJECT_NULL) {
17119                                                 vm_object_t shadow = VM_OBJECT_NULL;
17120
17121                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
17122                                                 shadow = curr_object->shadow;
17123
17124                                                 vm_object_lock_shared(shadow);
17125                                                 vm_object_unlock(curr_object);
17126
17127                                                 curr_object = shadow;
17128                                                 depth++;
17129                                                 continue;
17130                                         } else {
17131
17132                                                 break;
17133                                         }
17134                                 }
17135                         }
17136
17137                         /* The ref_count is not strictly accurate, it measures the number   */
17138                         /* of entities holding a ref on the object, they may not be mapping */
17139                         /* the object or may not be mapping the section holding the         */
17140                         /* target page but its still a ball park number and though an over- */
17141                         /* count, it picks up the copy-on-write cases                       */
17142
17143                         /* We could also get a picture of page sharing from pmap_attributes */
17144                         /* but this would under count as only faulted-in mappings would     */
17145                         /* show up.                                                         */
17146
17147                         if ((curr_object == object) && curr_object->shadow)
17148                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17149
17150                         if (! curr_object->internal)
17151                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17152
17153                         if (m != VM_PAGE_NULL) {
17154
17155                                 if (m->vmp_fictitious) {
17156
17157                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17158
17159                                 } else {
17160                                         if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
17161                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17162
17163                                         if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
17164                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
17165
17166                                         if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q)
17167                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17168
17169                                         if (m->vmp_cs_validated)
17170                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17171                                         if (m->vmp_cs_tainted)
17172                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17173                                         if (m->vmp_cs_nx)
17174                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17175                                 }
17176                         }
17177
17178                         switch (flavor) {
17179                         case VM_PAGE_INFO_BASIC:
17180                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17181                                 basic_info->disposition = disposition;
17182                                 basic_info->ref_count = ref_count;
17183                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17184                                         VM_KERNEL_ADDRPERM(curr_object);
17185                                 basic_info->offset =
17186                                         (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17187                                 basic_info->depth = depth;
17188
17189                                 info_idx++;
17190                                 break;
17191                         }
17192
17193                         disposition = 0;
17194                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17195
17196                         /*
17197                          * Move to next offset in the range and in our object.
17198                          */
17199                         curr_s_offset += PAGE_SIZE;
17200                         offset_in_object += PAGE_SIZE;
17201                         curr_offset_in_object = offset_in_object;
17202
17203                         if (curr_object != object) {
17204
17205                                 vm_object_unlock(curr_object);
17206
17207                                 curr_object = object;
17208
17209                                 vm_object_lock_shared(curr_object);
17210                         } else {
17211
17212                                 vm_object_lock_yield_shared(curr_object);
17213                         }
17214                 }
17215
17216                 vm_object_unlock(curr_object);
17217                 vm_object_deallocate(curr_object);
17218
17219                 vm_map_lock_read(map);
17220         }
17221
17222         vm_map_unlock_read(map);
17223         return retval;
17224 }
17225
17226 /*
17227  *      vm_map_msync
17228  *
17229  *      Synchronises the memory range specified with its backing store
17230  *      image by either flushing or cleaning the contents to the appropriate
17231  *      memory manager engaging in a memory object synchronize dialog with
17232  *      the manager.  The client doesn't return until the manager issues
17233  *      m_o_s_completed message.  MIG Magically converts user task parameter
17234  *      to the task's address map.
17235  *
17236  *      interpretation of sync_flags
17237  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
17238  *                                pages to manager.
17239  *
17240  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17241  *                              - discard pages, write dirty or precious
17242  *                                pages back to memory manager.
17243  *
17244  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17245  *                              - write dirty or precious pages back to
17246  *                                the memory manager.
17247  *
17248  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
17249  *                                is a hole in the region, and we would
17250  *                                have returned KERN_SUCCESS, return
17251  *                                KERN_INVALID_ADDRESS instead.
17252  *
17253  *      NOTE
17254  *      The memory object attributes have not yet been implemented, this
17255  *      function will have to deal with the invalidate attribute
17256  *
17257  *      RETURNS
17258  *      KERN_INVALID_TASK               Bad task parameter
17259  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
17260  *      KERN_SUCCESS                    The usual.
17261  *      KERN_INVALID_ADDRESS            There was a hole in the region.
17262  */
17263
17264 kern_return_t
17265 vm_map_msync(
17266         vm_map_t                map,
17267         vm_map_address_t        address,
17268         vm_map_size_t           size,
17269         vm_sync_t               sync_flags)
17270 {
17271         vm_map_entry_t          entry;
17272         vm_map_size_t           amount_left;
17273         vm_object_offset_t      offset;
17274         boolean_t               do_sync_req;
17275         boolean_t               had_hole = FALSE;
17276         vm_map_offset_t         pmap_offset;
17277
17278         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17279             (sync_flags & VM_SYNC_SYNCHRONOUS))
17280                 return(KERN_INVALID_ARGUMENT);
17281
17282         /*
17283          * align address and size on page boundaries
17284          */
17285         size = (vm_map_round_page(address + size,
17286                                   VM_MAP_PAGE_MASK(map)) -
17287                 vm_map_trunc_page(address,
17288                                   VM_MAP_PAGE_MASK(map)));
17289         address = vm_map_trunc_page(address,
17290                                     VM_MAP_PAGE_MASK(map));
17291
17292         if (map == VM_MAP_NULL)
17293                 return(KERN_INVALID_TASK);
17294
17295         if (size == 0)
17296                 return(KERN_SUCCESS);
17297
17298         amount_left = size;
17299
17300         while (amount_left > 0) {
17301                 vm_object_size_t        flush_size;
17302                 vm_object_t             object;
17303
17304                 vm_map_lock(map);
17305                 if (!vm_map_lookup_entry(map,
17306                                          address,
17307                                          &entry)) {
17308
17309                         vm_map_size_t   skip;
17310
17311                         /*
17312                          * hole in the address map.
17313                          */
17314                         had_hole = TRUE;
17315
17316                         if (sync_flags & VM_SYNC_KILLPAGES) {
17317                                 /*
17318                                  * For VM_SYNC_KILLPAGES, there should be
17319                                  * no holes in the range, since we couldn't
17320                                  * prevent someone else from allocating in
17321                                  * that hole and we wouldn't want to "kill"
17322                                  * their pages.
17323                                  */
17324                                 vm_map_unlock(map);
17325                                 break;
17326                         }
17327
17328                         /*
17329                          * Check for empty map.
17330                          */
17331                         if (entry == vm_map_to_entry(map) &&
17332                             entry->vme_next == entry) {
17333                                 vm_map_unlock(map);
17334                                 break;
17335                         }
17336                         /*
17337                          * Check that we don't wrap and that
17338                          * we have at least one real map entry.
17339                          */
17340                         if ((map->hdr.nentries == 0) ||
17341                             (entry->vme_next->vme_start < address)) {
17342                                 vm_map_unlock(map);
17343                                 break;
17344                         }
17345                         /*
17346                          * Move up to the next entry if needed
17347                          */
17348                         skip = (entry->vme_next->vme_start - address);
17349                         if (skip >= amount_left)
17350                                 amount_left = 0;
17351                         else
17352                                 amount_left -= skip;
17353                         address = entry->vme_next->vme_start;
17354                         vm_map_unlock(map);
17355                         continue;
17356                 }
17357
17358                 offset = address - entry->vme_start;
17359                 pmap_offset = address;
17360
17361                 /*
17362                  * do we have more to flush than is contained in this
17363                  * entry ?
17364                  */
17365                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17366                         flush_size = entry->vme_end -
17367                                 (entry->vme_start + offset);
17368                 } else {
17369                         flush_size = amount_left;
17370                 }
17371                 amount_left -= flush_size;
17372                 address += flush_size;
17373
17374                 if (entry->is_sub_map == TRUE) {
17375                         vm_map_t        local_map;
17376                         vm_map_offset_t local_offset;
17377
17378                         local_map = VME_SUBMAP(entry);
17379                         local_offset = VME_OFFSET(entry);
17380                         vm_map_unlock(map);
17381                         if (vm_map_msync(
17382                                     local_map,
17383                                     local_offset,
17384                                     flush_size,
17385                                     sync_flags) == KERN_INVALID_ADDRESS) {
17386                                 had_hole = TRUE;
17387                         }
17388                         continue;
17389                 }
17390                 object = VME_OBJECT(entry);
17391
17392                 /*
17393                  * We can't sync this object if the object has not been
17394                  * created yet
17395                  */
17396                 if (object == VM_OBJECT_NULL) {
17397                         vm_map_unlock(map);
17398                         continue;
17399                 }
17400                 offset += VME_OFFSET(entry);
17401
17402                 vm_object_lock(object);
17403
17404                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17405                         int kill_pages = 0;
17406                         boolean_t reusable_pages = FALSE;
17407
17408                         if (sync_flags & VM_SYNC_KILLPAGES) {
17409                                 if (((object->ref_count == 1) ||
17410                                      ((object->copy_strategy !=
17411                                        MEMORY_OBJECT_COPY_SYMMETRIC) &&
17412                                       (object->copy == VM_OBJECT_NULL))) &&
17413                                     (object->shadow == VM_OBJECT_NULL)) {
17414                                         if (object->ref_count != 1) {
17415                                                 vm_page_stats_reusable.free_shared++;
17416                                         }
17417                                         kill_pages = 1;
17418                                 } else {
17419                                         kill_pages = -1;
17420                                 }
17421                         }
17422                         if (kill_pages != -1)
17423                                 vm_object_deactivate_pages(
17424                                         object,
17425                                         offset,
17426                                         (vm_object_size_t) flush_size,
17427                                         kill_pages,
17428                                         reusable_pages,
17429                                         map->pmap,
17430                                         pmap_offset);
17431                         vm_object_unlock(object);
17432                         vm_map_unlock(map);
17433                         continue;
17434                 }
17435                 /*
17436                  * We can't sync this object if there isn't a pager.
17437                  * Don't bother to sync internal objects, since there can't
17438                  * be any "permanent" storage for these objects anyway.
17439                  */
17440                 if ((object->pager == MEMORY_OBJECT_NULL) ||
17441                     (object->internal) || (object->private)) {
17442                         vm_object_unlock(object);
17443                         vm_map_unlock(map);
17444                         continue;
17445                 }
17446                 /*
17447                  * keep reference on the object until syncing is done
17448                  */
17449                 vm_object_reference_locked(object);
17450                 vm_object_unlock(object);
17451
17452                 vm_map_unlock(map);
17453
17454                 do_sync_req = vm_object_sync(object,
17455                                              offset,
17456                                              flush_size,
17457                                              sync_flags & VM_SYNC_INVALIDATE,
17458                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17459                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17460                                              sync_flags & VM_SYNC_SYNCHRONOUS);
17461
17462                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17463                         /*
17464                          * clear out the clustering and read-ahead hints
17465                          */
17466                         vm_object_lock(object);
17467
17468                         object->pages_created = 0;
17469                         object->pages_used = 0;
17470                         object->sequential = 0;
17471                         object->last_alloc = 0;
17472
17473                         vm_object_unlock(object);
17474                 }
17475                 vm_object_deallocate(object);
17476         } /* while */
17477
17478         /* for proper msync() behaviour */
17479         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
17480                 return(KERN_INVALID_ADDRESS);
17481
17482         return(KERN_SUCCESS);
17483 }/* vm_msync */
17484
17485 /*
17486  *      Routine:        convert_port_entry_to_map
17487  *      Purpose:
17488  *              Convert from a port specifying an entry or a task
17489  *              to a map. Doesn't consume the port ref; produces a map ref,
17490  *              which may be null.  Unlike convert_port_to_map, the
17491  *              port may be task or a named entry backed.
17492  *      Conditions:
17493  *              Nothing locked.
17494  */
17495
17496
17497 vm_map_t
17498 convert_port_entry_to_map(
17499         ipc_port_t      port)
17500 {
17501         vm_map_t map;
17502         vm_named_entry_t        named_entry;
17503         uint32_t        try_failed_count = 0;
17504
17505         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17506                 while(TRUE) {
17507                         ip_lock(port);
17508                         if(ip_active(port) && (ip_kotype(port)
17509                                                == IKOT_NAMED_ENTRY)) {
17510                                 named_entry =
17511                                         (vm_named_entry_t)port->ip_kobject;
17512                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17513                                         ip_unlock(port);
17514
17515                                         try_failed_count++;
17516                                         mutex_pause(try_failed_count);
17517                                         continue;
17518                                 }
17519                                 named_entry->ref_count++;
17520                                 lck_mtx_unlock(&(named_entry)->Lock);
17521                                 ip_unlock(port);
17522                                 if ((named_entry->is_sub_map) &&
17523                                     (named_entry->protection
17524                                      & VM_PROT_WRITE)) {
17525                                         map = named_entry->backing.map;
17526                                 } else {
17527                                         mach_destroy_memory_entry(port);
17528                                         return VM_MAP_NULL;
17529                                 }
17530                                 vm_map_reference_swap(map);
17531                                 mach_destroy_memory_entry(port);
17532                                 break;
17533                         }
17534                         else
17535                                 return VM_MAP_NULL;
17536                 }
17537         }
17538         else
17539                 map = convert_port_to_map(port);
17540
17541         return map;
17542 }
17543
17544 /*
17545  *      Routine:        convert_port_entry_to_object
17546  *      Purpose:
17547  *              Convert from a port specifying a named entry to an
17548  *              object. Doesn't consume the port ref; produces a map ref,
17549  *              which may be null.
17550  *      Conditions:
17551  *              Nothing locked.
17552  */
17553
17554
17555 vm_object_t
17556 convert_port_entry_to_object(
17557         ipc_port_t      port)
17558 {
17559         vm_object_t             object = VM_OBJECT_NULL;
17560         vm_named_entry_t        named_entry;
17561         uint32_t                try_failed_count = 0;
17562
17563         if (IP_VALID(port) &&
17564             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17565         try_again:
17566                 ip_lock(port);
17567                 if (ip_active(port) &&
17568                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17569                         named_entry = (vm_named_entry_t)port->ip_kobject;
17570                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17571                                 ip_unlock(port);
17572                                 try_failed_count++;
17573                                 mutex_pause(try_failed_count);
17574                                 goto try_again;
17575                         }
17576                         named_entry->ref_count++;
17577                         lck_mtx_unlock(&(named_entry)->Lock);
17578                         ip_unlock(port);
17579                         if (!(named_entry->is_sub_map) &&
17580                             !(named_entry->is_copy) &&
17581                             (named_entry->protection & VM_PROT_WRITE)) {
17582                                 object = named_entry->backing.object;
17583                                 vm_object_reference(object);
17584                         }
17585                         mach_destroy_memory_entry(port);
17586                 }
17587         }
17588
17589         return object;
17590 }
17591
17592 /*
17593  * Export routines to other components for the things we access locally through
17594  * macros.
17595  */
17596 #undef current_map
17597 vm_map_t
17598 current_map(void)
17599 {
17600         return (current_map_fast());
17601 }
17602
17603 /*
17604  *      vm_map_reference:
17605  *
17606  *      Most code internal to the osfmk will go through a
17607  *      macro defining this.  This is always here for the
17608  *      use of other kernel components.
17609  */
17610 #undef vm_map_reference
17611 void
17612 vm_map_reference(
17613         vm_map_t        map)
17614 {
17615         if (map == VM_MAP_NULL)
17616                 return;
17617
17618         lck_mtx_lock(&map->s_lock);
17619 #if     TASK_SWAPPER
17620         assert(map->res_count > 0);
17621         assert(map->map_refcnt >= map->res_count);
17622         map->res_count++;
17623 #endif
17624         map->map_refcnt++;
17625         lck_mtx_unlock(&map->s_lock);
17626 }
17627
17628 /*
17629  *      vm_map_deallocate:
17630  *
17631  *      Removes a reference from the specified map,
17632  *      destroying it if no references remain.
17633  *      The map should not be locked.
17634  */
17635 void
17636 vm_map_deallocate(
17637         vm_map_t        map)
17638 {
17639         unsigned int            ref;
17640
17641         if (map == VM_MAP_NULL)
17642                 return;
17643
17644         lck_mtx_lock(&map->s_lock);
17645         ref = --map->map_refcnt;
17646         if (ref > 0) {
17647                 vm_map_res_deallocate(map);
17648                 lck_mtx_unlock(&map->s_lock);
17649                 return;
17650         }
17651         assert(map->map_refcnt == 0);
17652         lck_mtx_unlock(&map->s_lock);
17653
17654 #if     TASK_SWAPPER
17655         /*
17656          * The map residence count isn't decremented here because
17657          * the vm_map_delete below will traverse the entire map,
17658          * deleting entries, and the residence counts on objects
17659          * and sharing maps will go away then.
17660          */
17661 #endif
17662
17663         vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17664 }
17665
17666
17667 void
17668 vm_map_disable_NX(vm_map_t map)
17669 {
17670         if (map == NULL)
17671                 return;
17672         if (map->pmap == NULL)
17673                 return;
17674
17675         pmap_disable_NX(map->pmap);
17676 }
17677
17678 void
17679 vm_map_disallow_data_exec(vm_map_t map)
17680 {
17681     if (map == NULL)
17682         return;
17683
17684     map->map_disallow_data_exec = TRUE;
17685 }
17686
17687 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17688  * more descriptive.
17689  */
17690 void
17691 vm_map_set_32bit(vm_map_t map)
17692 {
17693 #if defined(__arm__) || defined(__arm64__)
17694         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17695 #else
17696         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17697 #endif
17698 }
17699
17700
17701 void
17702 vm_map_set_64bit(vm_map_t map)
17703 {
17704 #if defined(__arm__) || defined(__arm64__)
17705         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
17706 #else
17707         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
17708 #endif
17709 }
17710
17711 /*
17712  * Expand the maximum size of an existing map to the maximum supported.
17713  */
17714 void
17715 vm_map_set_jumbo(vm_map_t map)
17716 {
17717 #if defined (__arm64__)
17718         vm_map_set_max_addr(map, ~0);
17719 #else /* arm64 */
17720         (void) map;
17721 #endif
17722 }
17723
17724 /*
17725  * Expand the maximum size of an existing map.
17726  */
17727 void
17728 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
17729 {
17730 #if defined(__arm64__)
17731         vm_map_offset_t max_supported_offset = 0;
17732         vm_map_offset_t old_max_offset = map->max_offset;
17733         max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
17734
17735         new_max_offset = trunc_page(new_max_offset);
17736
17737         /* The address space cannot be shrunk using this routine. */
17738         if (old_max_offset >= new_max_offset) {
17739                 return;
17740         }
17741
17742         if (max_supported_offset < new_max_offset) {
17743                 new_max_offset = max_supported_offset;
17744         }
17745
17746         map->max_offset = new_max_offset;
17747
17748         if (map->holes_list->prev->vme_end == old_max_offset) {
17749                 /*
17750                  * There is already a hole at the end of the map; simply make it bigger.
17751                  */
17752                 map->holes_list->prev->vme_end = map->max_offset;
17753         } else {
17754                 /*
17755                  * There is no hole at the end, so we need to create a new hole
17756                  * for the new empty space we're creating.
17757                  */
17758                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
17759                 new_hole->start = old_max_offset;
17760                 new_hole->end = map->max_offset;
17761                 new_hole->prev = map->holes_list->prev;
17762                 new_hole->next = (struct vm_map_entry *)map->holes_list;
17763                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
17764                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
17765         }
17766 #else
17767         (void)map;
17768         (void)new_max_offset;
17769 #endif
17770 }
17771
17772 vm_map_offset_t
17773 vm_compute_max_offset(boolean_t is64)
17774 {
17775 #if defined(__arm__) || defined(__arm64__)
17776         return (pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE));
17777 #else
17778         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
17779 #endif
17780 }
17781
17782 void
17783 vm_map_get_max_aslr_slide_section(
17784                 vm_map_t                map __unused,
17785                 int64_t                 *max_sections,
17786                 int64_t                 *section_size)
17787 {
17788 #if defined(__arm64__)
17789         *max_sections = 3;
17790         *section_size = ARM_TT_TWIG_SIZE;
17791 #else
17792         *max_sections = 1;
17793         *section_size = 0;
17794 #endif
17795 }
17796
17797 uint64_t
17798 vm_map_get_max_aslr_slide_pages(vm_map_t map)
17799 {
17800 #if defined(__arm64__)
17801         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
17802          * limited embedded address space; this is also meant to minimize pmap
17803          * memory usage on 16KB page systems.
17804          */
17805         return (1 << (24 - VM_MAP_PAGE_SHIFT(map)));
17806 #else
17807         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17808 #endif
17809 }
17810
17811 uint64_t
17812 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
17813 {
17814 #if defined(__arm64__)
17815         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
17816          * of independent entropy on 16KB page systems.
17817          */
17818         return (1 << (22 - VM_MAP_PAGE_SHIFT(map)));
17819 #else
17820         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17821 #endif
17822 }
17823
17824 #ifndef __arm__
17825 boolean_t
17826 vm_map_is_64bit(
17827                 vm_map_t map)
17828 {
17829         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
17830 }
17831 #endif
17832
17833 boolean_t
17834 vm_map_has_hard_pagezero(
17835                 vm_map_t        map,
17836                 vm_map_offset_t pagezero_size)
17837 {
17838         /*
17839          * XXX FBDP
17840          * We should lock the VM map (for read) here but we can get away
17841          * with it for now because there can't really be any race condition:
17842          * the VM map's min_offset is changed only when the VM map is created
17843          * and when the zero page is established (when the binary gets loaded),
17844          * and this routine gets called only when the task terminates and the
17845          * VM map is being torn down, and when a new map is created via
17846          * load_machfile()/execve().
17847          */
17848         return (map->min_offset >= pagezero_size);
17849 }
17850
17851 /*
17852  * Raise a VM map's maximun offset.
17853  */
17854 kern_return_t
17855 vm_map_raise_max_offset(
17856         vm_map_t        map,
17857         vm_map_offset_t new_max_offset)
17858 {
17859         kern_return_t   ret;
17860
17861         vm_map_lock(map);
17862         ret = KERN_INVALID_ADDRESS;
17863
17864         if (new_max_offset >= map->max_offset) {
17865                 if (!vm_map_is_64bit(map)) {
17866                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
17867                                 map->max_offset = new_max_offset;
17868                                 ret = KERN_SUCCESS;
17869                         }
17870                 } else {
17871                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
17872                                 map->max_offset = new_max_offset;
17873                                 ret = KERN_SUCCESS;
17874                         }
17875                 }
17876         }
17877
17878         vm_map_unlock(map);
17879         return ret;
17880 }
17881
17882
17883 /*
17884  * Raise a VM map's minimum offset.
17885  * To strictly enforce "page zero" reservation.
17886  */
17887 kern_return_t
17888 vm_map_raise_min_offset(
17889         vm_map_t        map,
17890         vm_map_offset_t new_min_offset)
17891 {
17892         vm_map_entry_t  first_entry;
17893
17894         new_min_offset = vm_map_round_page(new_min_offset,
17895                                            VM_MAP_PAGE_MASK(map));
17896
17897         vm_map_lock(map);
17898
17899         if (new_min_offset < map->min_offset) {
17900                 /*
17901                  * Can't move min_offset backwards, as that would expose
17902                  * a part of the address space that was previously, and for
17903                  * possibly good reasons, inaccessible.
17904                  */
17905                 vm_map_unlock(map);
17906                 return KERN_INVALID_ADDRESS;
17907         }
17908         if (new_min_offset >= map->max_offset) {
17909                 /* can't go beyond the end of the address space */
17910                 vm_map_unlock(map);
17911                 return KERN_INVALID_ADDRESS;
17912         }
17913
17914         first_entry = vm_map_first_entry(map);
17915         if (first_entry != vm_map_to_entry(map) &&
17916             first_entry->vme_start < new_min_offset) {
17917                 /*
17918                  * Some memory was already allocated below the new
17919                  * minimun offset.  It's too late to change it now...
17920                  */
17921                 vm_map_unlock(map);
17922                 return KERN_NO_SPACE;
17923         }
17924
17925         map->min_offset = new_min_offset;
17926
17927         assert(map->holes_list);
17928         map->holes_list->start = new_min_offset;
17929         assert(new_min_offset < map->holes_list->end);
17930
17931         vm_map_unlock(map);
17932
17933         return KERN_SUCCESS;
17934 }
17935
17936 /*
17937  * Set the limit on the maximum amount of user wired memory allowed for this map.
17938  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
17939  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
17940  * don't have to reach over to the BSD data structures.
17941  */
17942
17943 void
17944 vm_map_set_user_wire_limit(vm_map_t     map,
17945                            vm_size_t    limit)
17946 {
17947         map->user_wire_limit = limit;
17948 }
17949
17950
17951 void vm_map_switch_protect(vm_map_t     map,
17952                            boolean_t    val)
17953 {
17954         vm_map_lock(map);
17955         map->switch_protect=val;
17956         vm_map_unlock(map);
17957 }
17958
17959 /*
17960  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
17961  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
17962  * bump both counters.
17963  */
17964 void
17965 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
17966 {
17967         pmap_t pmap = vm_map_pmap(map);
17968
17969         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17970         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17971 }
17972
17973 void
17974 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
17975 {
17976         pmap_t pmap = vm_map_pmap(map);
17977
17978         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17979         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17980 }
17981
17982 /* Add (generate) code signature for memory range */
17983 #if CONFIG_DYNAMIC_CODE_SIGNING
17984 kern_return_t vm_map_sign(vm_map_t map,
17985                  vm_map_offset_t start,
17986                  vm_map_offset_t end)
17987 {
17988         vm_map_entry_t entry;
17989         vm_page_t m;
17990         vm_object_t object;
17991
17992         /*
17993          * Vet all the input parameters and current type and state of the
17994          * underlaying object.  Return with an error if anything is amiss.
17995          */
17996         if (map == VM_MAP_NULL)
17997                 return(KERN_INVALID_ARGUMENT);
17998
17999         vm_map_lock_read(map);
18000
18001         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18002                 /*
18003                  * Must pass a valid non-submap address.
18004                  */
18005                 vm_map_unlock_read(map);
18006                 return(KERN_INVALID_ADDRESS);
18007         }
18008
18009         if((entry->vme_start > start) || (entry->vme_end < end)) {
18010                 /*
18011                  * Map entry doesn't cover the requested range. Not handling
18012                  * this situation currently.
18013                  */
18014                 vm_map_unlock_read(map);
18015                 return(KERN_INVALID_ARGUMENT);
18016         }
18017
18018         object = VME_OBJECT(entry);
18019         if (object == VM_OBJECT_NULL) {
18020                 /*
18021                  * Object must already be present or we can't sign.
18022                  */
18023                 vm_map_unlock_read(map);
18024                 return KERN_INVALID_ARGUMENT;
18025         }
18026
18027         vm_object_lock(object);
18028         vm_map_unlock_read(map);
18029
18030         while(start < end) {
18031                 uint32_t refmod;
18032
18033                 m = vm_page_lookup(object,
18034                                    start - entry->vme_start + VME_OFFSET(entry));
18035                 if (m==VM_PAGE_NULL) {
18036                         /* shoud we try to fault a page here? we can probably
18037                          * demand it exists and is locked for this request */
18038                         vm_object_unlock(object);
18039                         return KERN_FAILURE;
18040                 }
18041                 /* deal with special page status */
18042                 if (m->vmp_busy ||
18043                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18044                         vm_object_unlock(object);
18045                         return KERN_FAILURE;
18046                 }
18047
18048                 /* Page is OK... now "validate" it */
18049                 /* This is the place where we'll call out to create a code
18050                  * directory, later */
18051                 m->vmp_cs_validated = TRUE;
18052
18053                 /* The page is now "clean" for codesigning purposes. That means
18054                  * we don't consider it as modified (wpmapped) anymore. But
18055                  * we'll disconnect the page so we note any future modification
18056                  * attempts. */
18057                 m->vmp_wpmapped = FALSE;
18058                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18059
18060                 /* Pull the dirty status from the pmap, since we cleared the
18061                  * wpmapped bit */
18062                 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18063                         SET_PAGE_DIRTY(m, FALSE);
18064                 }
18065
18066                 /* On to the next page */
18067                 start += PAGE_SIZE;
18068         }
18069         vm_object_unlock(object);
18070
18071         return KERN_SUCCESS;
18072 }
18073 #endif
18074
18075 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18076 {
18077         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
18078         vm_map_entry_t next_entry;
18079         kern_return_t   kr = KERN_SUCCESS;
18080         vm_map_t        zap_map;
18081
18082         vm_map_lock(map);
18083
18084         /*
18085          * We use a "zap_map" to avoid having to unlock
18086          * the "map" in vm_map_delete().
18087          */
18088         zap_map = vm_map_create(PMAP_NULL,
18089                                 map->min_offset,
18090                                 map->max_offset,
18091                                 map->hdr.entries_pageable);
18092
18093         if (zap_map == VM_MAP_NULL) {
18094                 return KERN_RESOURCE_SHORTAGE;
18095         }
18096
18097         vm_map_set_page_shift(zap_map,
18098                               VM_MAP_PAGE_SHIFT(map));
18099         vm_map_disable_hole_optimization(zap_map);
18100
18101         for (entry = vm_map_first_entry(map);
18102              entry != vm_map_to_entry(map);
18103              entry = next_entry) {
18104                 next_entry = entry->vme_next;
18105
18106                 if (VME_OBJECT(entry) &&
18107                     !entry->is_sub_map &&
18108                     (VME_OBJECT(entry)->internal == TRUE) &&
18109                     (VME_OBJECT(entry)->ref_count == 1)) {
18110
18111                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18112                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18113
18114                         (void)vm_map_delete(map,
18115                                             entry->vme_start,
18116                                             entry->vme_end,
18117                                             VM_MAP_REMOVE_SAVE_ENTRIES,
18118                                             zap_map);
18119                 }
18120         }
18121
18122         vm_map_unlock(map);
18123
18124         /*
18125          * Get rid of the "zap_maps" and all the map entries that
18126          * they may still contain.
18127          */
18128         if (zap_map != VM_MAP_NULL) {
18129                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18130                 zap_map = VM_MAP_NULL;
18131         }
18132
18133         return kr;
18134 }
18135
18136
18137 #if DEVELOPMENT || DEBUG
18138
18139 int
18140 vm_map_disconnect_page_mappings(
18141         vm_map_t map,
18142         boolean_t do_unnest)
18143 {
18144         vm_map_entry_t entry;
18145         int     page_count = 0;
18146
18147         if (do_unnest == TRUE) {
18148 #ifndef NO_NESTED_PMAP
18149                 vm_map_lock(map);
18150
18151                 for (entry = vm_map_first_entry(map);
18152                      entry != vm_map_to_entry(map);
18153                      entry = entry->vme_next) {
18154
18155                         if (entry->is_sub_map && entry->use_pmap) {
18156                                 /*
18157                                  * Make sure the range between the start of this entry and
18158                                  * the end of this entry is no longer nested, so that
18159                                  * we will only remove mappings from the pmap in use by this
18160                                  * this task
18161                                  */
18162                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18163                         }
18164                 }
18165                 vm_map_unlock(map);
18166 #endif
18167         }
18168         vm_map_lock_read(map);
18169
18170         page_count = map->pmap->stats.resident_count;
18171
18172         for (entry = vm_map_first_entry(map);
18173              entry != vm_map_to_entry(map);
18174              entry = entry->vme_next) {
18175
18176                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18177                                            (VME_OBJECT(entry)->phys_contiguous))) {
18178                         continue;
18179                 }
18180                 if (entry->is_sub_map)
18181                         assert(!entry->use_pmap);
18182
18183                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18184         }
18185         vm_map_unlock_read(map);
18186
18187         return page_count;
18188 }
18189
18190 #endif
18191
18192
18193 #if CONFIG_FREEZE
18194
18195
18196 int c_freezer_swapout_page_count;
18197 int c_freezer_compression_count = 0;
18198 AbsoluteTime c_freezer_last_yield_ts = 0;
18199
18200 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18201 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18202
18203 kern_return_t
18204 vm_map_freeze(
18205                 vm_map_t map,
18206                 unsigned int *purgeable_count,
18207                 unsigned int *wired_count,
18208                 unsigned int *clean_count,
18209                 unsigned int *dirty_count,
18210                 __unused unsigned int dirty_budget,
18211                 unsigned int *shared_count,
18212                 int          *freezer_error_code,
18213                 boolean_t    eval_only)
18214 {
18215         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
18216         kern_return_t   kr = KERN_SUCCESS;
18217         boolean_t       evaluation_phase = TRUE;
18218         vm_object_t     cur_shared_object = NULL;
18219         int             cur_shared_obj_ref_cnt = 0;
18220         unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18221
18222         *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18223
18224         /*
18225          * We need the exclusive lock here so that we can
18226          * block any page faults or lookups while we are
18227          * in the middle of freezing this vm map.
18228          */
18229         vm_map_lock(map);
18230
18231         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18232
18233         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18234                 if (vm_compressor_low_on_space()) {
18235                         *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18236                 }
18237
18238                 if (vm_swap_low_on_space()) {
18239                         *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18240                 }
18241
18242                 kr = KERN_NO_SPACE;
18243                 goto done;
18244         }
18245
18246         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18247                 /*
18248                  * In-memory compressor backing the freezer. No disk.
18249                  * So no need to do the evaluation phase.
18250                  */
18251                 evaluation_phase = FALSE;
18252
18253                 if (eval_only == TRUE) {
18254                         /*
18255                          * We don't support 'eval_only' mode
18256                          * in this non-swap config.
18257                          */
18258                         *freezer_error_code = FREEZER_ERROR_GENERIC;
18259                         kr = KERN_INVALID_ARGUMENT;
18260                         goto done;
18261                 }
18262
18263                 c_freezer_compression_count = 0;
18264                 clock_get_uptime(&c_freezer_last_yield_ts);
18265         }
18266 again:
18267
18268         for (entry2 = vm_map_first_entry(map);
18269              entry2 != vm_map_to_entry(map);
18270              entry2 = entry2->vme_next) {
18271
18272                 vm_object_t     src_object = VME_OBJECT(entry2);
18273
18274                 if (src_object &&
18275                     !entry2->is_sub_map &&
18276                     !src_object->phys_contiguous) {
18277                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
18278
18279                         if (src_object->internal == TRUE) {
18280
18281                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18282                                         /*
18283                                          * Pages belonging to this object could be swapped to disk.
18284                                          * Make sure it's not a shared object because we could end
18285                                          * up just bringing it back in again.
18286                                          *
18287                                          * We try to optimize somewhat by checking for objects that are mapped
18288                                          * more than once within our own map. But we don't do full searches,
18289                                          * we just look at the entries following our current entry.
18290                                          */
18291                                         if (src_object->ref_count > 1) {
18292                                                 if (src_object != cur_shared_object) {
18293                                                         obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18294                                                         dirty_shared_count += obj_pages_snapshot;
18295
18296                                                         cur_shared_object = src_object;
18297                                                         cur_shared_obj_ref_cnt = 1;
18298                                                         continue;
18299                                                 } else {
18300                                                         cur_shared_obj_ref_cnt++;
18301                                                         if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18302                                                                 /*
18303                                                                  * Fall through to below and treat this object as private.
18304                                                                  * So deduct its pages from our shared total and add it to the
18305                                                                  * private total.
18306                                                                  */
18307
18308                                                                 dirty_shared_count -= obj_pages_snapshot;
18309                                                                 dirty_private_count += obj_pages_snapshot;
18310                                                         } else {
18311                                                                 continue;
18312                                                         }
18313                                                 }
18314                                         }
18315
18316
18317                                         if (src_object->ref_count == 1) {
18318                                                 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18319                                         }
18320
18321                                         if (evaluation_phase == TRUE) {
18322
18323                                                 continue;
18324                                         }
18325                                 }
18326
18327                                 vm_object_compressed_freezer_pageout(src_object);
18328
18329                                 *wired_count += src_object->wired_page_count;
18330
18331                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18332                                         if (vm_compressor_low_on_space()) {
18333                                                 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18334                                         }
18335
18336                                         if (vm_swap_low_on_space()) {
18337                                                 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18338                                         }
18339
18340                                         kr = KERN_NO_SPACE;
18341                                         break;
18342                                 }
18343                         }
18344                 }
18345         }
18346
18347         if (evaluation_phase) {
18348
18349                 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18350
18351                 if (dirty_shared_count > shared_pages_threshold) {
18352                         *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18353                         kr = KERN_FAILURE;
18354                         goto done;
18355                 }
18356
18357                 if (dirty_shared_count &&
18358                    ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18359                         *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18360                         kr = KERN_FAILURE;
18361                         goto done;
18362                 }
18363
18364                 evaluation_phase = FALSE;
18365                 dirty_shared_count = dirty_private_count = 0;
18366
18367                 c_freezer_compression_count = 0;
18368                 clock_get_uptime(&c_freezer_last_yield_ts);
18369
18370                 if (eval_only) {
18371                         kr = KERN_SUCCESS;
18372                         goto done;
18373                 }
18374
18375                 goto again;
18376
18377         } else {
18378
18379                 kr = KERN_SUCCESS;
18380                 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18381         }
18382
18383 done:
18384         vm_map_unlock(map);
18385
18386         if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18387                 vm_object_compressed_freezer_done();
18388
18389                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18390                         /*
18391                          * reset the counter tracking the # of swapped compressed pages
18392                          * because we are now done with this freeze session and task.
18393                          */
18394
18395                         *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18396                         c_freezer_swapout_page_count = 0;
18397                 }
18398         }
18399         return kr;
18400 }
18401
18402 #endif
18403
18404 /*
18405  * vm_map_entry_should_cow_for_true_share:
18406  *
18407  * Determines if the map entry should be clipped and setup for copy-on-write
18408  * to avoid applying "true_share" to a large VM object when only a subset is
18409  * targeted.
18410  *
18411  * For now, we target only the map entries created for the Objective C
18412  * Garbage Collector, which initially have the following properties:
18413  *      - alias == VM_MEMORY_MALLOC
18414  *      - wired_count == 0
18415  *      - !needs_copy
18416  * and a VM object with:
18417  *      - internal
18418  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18419  *      - !true_share
18420  *      - vo_size == ANON_CHUNK_SIZE
18421  *
18422  * Only non-kernel map entries.
18423  */
18424 boolean_t
18425 vm_map_entry_should_cow_for_true_share(
18426         vm_map_entry_t  entry)
18427 {
18428         vm_object_t     object;
18429
18430         if (entry->is_sub_map) {
18431                 /* entry does not point at a VM object */
18432                 return FALSE;
18433         }
18434
18435         if (entry->needs_copy) {
18436                 /* already set for copy_on_write: done! */
18437                 return FALSE;
18438         }
18439
18440         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18441             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18442                 /* not a malloc heap or Obj-C Garbage Collector heap */
18443                 return FALSE;
18444         }
18445
18446         if (entry->wired_count) {
18447                 /* wired: can't change the map entry... */
18448                 vm_counters.should_cow_but_wired++;
18449                 return FALSE;
18450         }
18451
18452         object = VME_OBJECT(entry);
18453
18454         if (object == VM_OBJECT_NULL) {
18455                 /* no object yet... */
18456                 return FALSE;
18457         }
18458
18459         if (!object->internal) {
18460                 /* not an internal object */
18461                 return FALSE;
18462         }
18463
18464         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18465                 /* not the default copy strategy */
18466                 return FALSE;
18467         }
18468
18469         if (object->true_share) {
18470                 /* already true_share: too late to avoid it */
18471                 return FALSE;
18472         }
18473
18474         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18475             object->vo_size != ANON_CHUNK_SIZE) {
18476                 /* ... not an object created for the ObjC Garbage Collector */
18477                 return FALSE;
18478         }
18479
18480         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18481             object->vo_size != 2048 * 4096) {
18482                 /* ... not a "MALLOC_SMALL" heap */
18483                 return FALSE;
18484         }
18485
18486         /*
18487          * All the criteria match: we have a large object being targeted for "true_share".
18488          * To limit the adverse side-effects linked with "true_share", tell the caller to
18489          * try and avoid setting up the entire object for "true_share" by clipping the
18490          * targeted range and setting it up for copy-on-write.
18491          */
18492         return TRUE;
18493 }
18494
18495 vm_map_offset_t
18496 vm_map_round_page_mask(
18497         vm_map_offset_t offset,
18498         vm_map_offset_t mask)
18499 {
18500         return VM_MAP_ROUND_PAGE(offset, mask);
18501 }
18502
18503 vm_map_offset_t
18504 vm_map_trunc_page_mask(
18505         vm_map_offset_t offset,
18506         vm_map_offset_t mask)
18507 {
18508         return VM_MAP_TRUNC_PAGE(offset, mask);
18509 }
18510
18511 boolean_t
18512 vm_map_page_aligned(
18513         vm_map_offset_t offset,
18514         vm_map_offset_t mask)
18515 {
18516         return ((offset) & mask) == 0;
18517 }
18518
18519 int
18520 vm_map_page_shift(
18521         vm_map_t map)
18522 {
18523         return VM_MAP_PAGE_SHIFT(map);
18524 }
18525
18526 int
18527 vm_map_page_size(
18528         vm_map_t map)
18529 {
18530         return VM_MAP_PAGE_SIZE(map);
18531 }
18532
18533 vm_map_offset_t
18534 vm_map_page_mask(
18535         vm_map_t map)
18536 {
18537         return VM_MAP_PAGE_MASK(map);
18538 }
18539
18540 kern_return_t
18541 vm_map_set_page_shift(
18542         vm_map_t        map,
18543         int             pageshift)
18544 {
18545         if (map->hdr.nentries != 0) {
18546                 /* too late to change page size */
18547                 return KERN_FAILURE;
18548         }
18549
18550         map->hdr.page_shift = pageshift;
18551
18552         return KERN_SUCCESS;
18553 }
18554
18555 kern_return_t
18556 vm_map_query_volatile(
18557         vm_map_t        map,
18558         mach_vm_size_t  *volatile_virtual_size_p,
18559         mach_vm_size_t  *volatile_resident_size_p,
18560         mach_vm_size_t  *volatile_compressed_size_p,
18561         mach_vm_size_t  *volatile_pmap_size_p,
18562         mach_vm_size_t  *volatile_compressed_pmap_size_p)
18563 {
18564         mach_vm_size_t  volatile_virtual_size;
18565         mach_vm_size_t  volatile_resident_count;
18566         mach_vm_size_t  volatile_compressed_count;
18567         mach_vm_size_t  volatile_pmap_count;
18568         mach_vm_size_t  volatile_compressed_pmap_count;
18569         mach_vm_size_t  resident_count;
18570         vm_map_entry_t  entry;
18571         vm_object_t     object;
18572
18573         /* map should be locked by caller */
18574
18575         volatile_virtual_size = 0;
18576         volatile_resident_count = 0;
18577         volatile_compressed_count = 0;
18578         volatile_pmap_count = 0;
18579         volatile_compressed_pmap_count = 0;
18580
18581         for (entry = vm_map_first_entry(map);
18582              entry != vm_map_to_entry(map);
18583              entry = entry->vme_next) {
18584                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
18585
18586                 if (entry->is_sub_map) {
18587                         continue;
18588                 }
18589                 if (! (entry->protection & VM_PROT_WRITE)) {
18590                         continue;
18591                 }
18592                 object = VME_OBJECT(entry);
18593                 if (object == VM_OBJECT_NULL) {
18594                         continue;
18595                 }
18596                 if (object->purgable != VM_PURGABLE_VOLATILE &&
18597                     object->purgable != VM_PURGABLE_EMPTY) {
18598                         continue;
18599                 }
18600                 if (VME_OFFSET(entry)) {
18601                         /*
18602                          * If the map entry has been split and the object now
18603                          * appears several times in the VM map, we don't want
18604                          * to count the object's resident_page_count more than
18605                          * once.  We count it only for the first one, starting
18606                          * at offset 0 and ignore the other VM map entries.
18607                          */
18608                         continue;
18609                 }
18610                 resident_count = object->resident_page_count;
18611                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18612                         resident_count = 0;
18613                 } else {
18614                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18615                 }
18616
18617                 volatile_virtual_size += entry->vme_end - entry->vme_start;
18618                 volatile_resident_count += resident_count;
18619                 if (object->pager) {
18620                         volatile_compressed_count +=
18621                                 vm_compressor_pager_get_count(object->pager);
18622                 }
18623                 pmap_compressed_bytes = 0;
18624                 pmap_resident_bytes =
18625                         pmap_query_resident(map->pmap,
18626                                             entry->vme_start,
18627                                             entry->vme_end,
18628                                             &pmap_compressed_bytes);
18629                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18630                 volatile_compressed_pmap_count += (pmap_compressed_bytes
18631                                                    / PAGE_SIZE);
18632         }
18633
18634         /* map is still locked on return */
18635
18636         *volatile_virtual_size_p = volatile_virtual_size;
18637         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
18638         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
18639         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
18640         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
18641
18642         return KERN_SUCCESS;
18643 }
18644
18645 void
18646 vm_map_sizes(vm_map_t map,
18647                 vm_map_size_t * psize,
18648                 vm_map_size_t * pfree,
18649                 vm_map_size_t * plargest_free)
18650 {
18651     vm_map_entry_t  entry;
18652     vm_map_offset_t prev;
18653     vm_map_size_t   free, total_free, largest_free;
18654     boolean_t       end;
18655
18656     if (!map)
18657     {
18658         *psize = *pfree = *plargest_free = 0;
18659         return;
18660     }
18661     total_free = largest_free = 0;
18662
18663     vm_map_lock_read(map);
18664     if (psize) *psize = map->max_offset - map->min_offset;
18665
18666     prev = map->min_offset;
18667     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
18668     {
18669         end = (entry == vm_map_to_entry(map));
18670
18671         if (end) free = entry->vme_end   - prev;
18672         else     free = entry->vme_start - prev;
18673
18674         total_free += free;
18675         if (free > largest_free) largest_free = free;
18676
18677         if (end) break;
18678         prev = entry->vme_end;
18679     }
18680     vm_map_unlock_read(map);
18681     if (pfree)         *pfree = total_free;
18682     if (plargest_free) *plargest_free = largest_free;
18683 }
18684
18685 #if VM_SCAN_FOR_SHADOW_CHAIN
18686 int vm_map_shadow_max(vm_map_t map);
18687 int vm_map_shadow_max(
18688         vm_map_t map)
18689 {
18690         int             shadows, shadows_max;
18691         vm_map_entry_t  entry;
18692         vm_object_t     object, next_object;
18693
18694         if (map == NULL)
18695                 return 0;
18696
18697         shadows_max = 0;
18698
18699         vm_map_lock_read(map);
18700
18701         for (entry = vm_map_first_entry(map);
18702              entry != vm_map_to_entry(map);
18703              entry = entry->vme_next) {
18704                 if (entry->is_sub_map) {
18705                         continue;
18706                 }
18707                 object = VME_OBJECT(entry);
18708                 if (object == NULL) {
18709                         continue;
18710                 }
18711                 vm_object_lock_shared(object);
18712                 for (shadows = 0;
18713                      object->shadow != NULL;
18714                      shadows++, object = next_object) {
18715                         next_object = object->shadow;
18716                         vm_object_lock_shared(next_object);
18717                         vm_object_unlock(object);
18718                 }
18719                 vm_object_unlock(object);
18720                 if (shadows > shadows_max) {
18721                         shadows_max = shadows;
18722                 }
18723         }
18724
18725         vm_map_unlock_read(map);
18726
18727         return shadows_max;
18728 }
18729 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
18730
18731 void vm_commit_pagezero_status(vm_map_t lmap) {
18732         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
18733 }
18734
18735 #if __x86_64__
18736 void
18737 vm_map_set_high_start(
18738         vm_map_t        map,
18739         vm_map_offset_t high_start)
18740 {
18741         map->vmmap_high_start = high_start;
18742 }
18743 #endif /* __x86_64__ */
18744
18745 #if PMAP_CS
18746 kern_return_t
18747 vm_map_entry_cs_associate(
18748         vm_map_t                map,
18749         vm_map_entry_t          entry,
18750         vm_map_kernel_flags_t   vmk_flags)
18751 {
18752         vm_object_t cs_object, cs_shadow;
18753         vm_object_offset_t cs_offset;
18754         void *cs_blobs;
18755         struct vnode *cs_vnode;
18756         kern_return_t cs_ret;
18757
18758         if (map->pmap == NULL ||
18759             entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
18760             VME_OBJECT(entry) == VM_OBJECT_NULL ||
18761             ! (entry->protection & VM_PROT_EXECUTE)) {
18762                 return KERN_SUCCESS;
18763         }
18764
18765         vm_map_lock_assert_exclusive(map);
18766
18767         if (entry->used_for_jit) {
18768                 cs_ret = pmap_cs_associate(map->pmap,
18769                                            PMAP_CS_ASSOCIATE_JIT,
18770                                            entry->vme_start,
18771                                            entry->vme_end - entry->vme_start);
18772                 goto done;
18773         }
18774
18775         if (vmk_flags.vmkf_remap_prot_copy) {
18776                 cs_ret = pmap_cs_associate(map->pmap,
18777                                            PMAP_CS_ASSOCIATE_COW,
18778                                            entry->vme_start,
18779                                            entry->vme_end - entry->vme_start);
18780                 goto done;
18781         }
18782
18783         vm_object_lock_shared(VME_OBJECT(entry));
18784         cs_offset = VME_OFFSET(entry);
18785         for (cs_object = VME_OBJECT(entry);
18786              (cs_object != VM_OBJECT_NULL &&
18787               !cs_object->code_signed);
18788              cs_object = cs_shadow) {
18789                 cs_shadow = cs_object->shadow;
18790                 if (cs_shadow != VM_OBJECT_NULL) {
18791                         cs_offset += cs_object->vo_shadow_offset;
18792                         vm_object_lock_shared(cs_shadow);
18793                 }
18794                 vm_object_unlock(cs_object);
18795         }
18796         if (cs_object == VM_OBJECT_NULL) {
18797                 return KERN_SUCCESS;
18798         }
18799
18800         cs_offset += cs_object->paging_offset;
18801         cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
18802         cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
18803                                           &cs_blobs);
18804         assert(cs_ret == KERN_SUCCESS);
18805         cs_ret = cs_associate_blob_with_mapping(map->pmap,
18806                                                 entry->vme_start,
18807                                                 (entry->vme_end -
18808                                                  entry->vme_start),
18809                                                 cs_offset,
18810                                                 cs_blobs);
18811         vm_object_unlock(cs_object);
18812         cs_object = VM_OBJECT_NULL;
18813
18814         done:
18815         if (cs_ret == KERN_SUCCESS) {
18816                 DTRACE_VM2(vm_map_entry_cs_associate_success,
18817                            vm_map_offset_t, entry->vme_start,
18818                            vm_map_offset_t, entry->vme_end);
18819                 if (vm_map_executable_immutable) {
18820                         /*
18821                          * Prevent this executable
18822                          * mapping from being unmapped
18823                          * or modified.
18824                          */
18825                         entry->permanent = TRUE;
18826                 }
18827                 /*
18828                  * pmap says it will validate the
18829                  * code-signing validity of pages
18830                  * faulted in via this mapping, so
18831                  * this map entry should be marked so
18832                  * that vm_fault() bypasses code-signing
18833                  * validation for faults coming through
18834                  * this mapping.
18835                  */
18836                 entry->pmap_cs_associated = TRUE;
18837         } else if (cs_ret == KERN_NOT_SUPPORTED) {
18838                 /*
18839                  * pmap won't check the code-signing
18840                  * validity of pages faulted in via
18841                  * this mapping, so VM should keep
18842                  * doing it.
18843                  */
18844                 DTRACE_VM3(vm_map_entry_cs_associate_off,
18845                            vm_map_offset_t, entry->vme_start,
18846                            vm_map_offset_t, entry->vme_end,
18847                            int, cs_ret);
18848         } else {
18849                 /*
18850                  * A real error: do not allow
18851                  * execution in this mapping.
18852                  */
18853                 DTRACE_VM3(vm_map_entry_cs_associate_failure,
18854                            vm_map_offset_t, entry->vme_start,
18855                            vm_map_offset_t, entry->vme_end,
18856                            int, cs_ret);
18857                 entry->protection &= ~VM_PROT_EXECUTE;
18858                 entry->max_protection &= ~VM_PROT_EXECUTE;
18859         }
18860
18861         return cs_ret;
18862 }
18863 #endif /* PMAP_CS */
18864
18865 /*
18866  * FORKED CORPSE FOOTPRINT
18867  *
18868  * A forked corpse gets a copy of the original VM map but its pmap is mostly
18869  * empty since it never ran and never got to fault in any pages.
18870  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
18871  * a forked corpse would therefore return very little information.
18872  *
18873  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
18874  * to vm_map_fork() to collect footprint information from the original VM map
18875  * and its pmap, and store it in the forked corpse's VM map.  That information
18876  * is stored in place of the VM map's "hole list" since we'll never need to
18877  * lookup for holes in the corpse's map.
18878  *
18879  * The corpse's footprint info looks like this:
18880  *
18881  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
18882  * as follows:
18883  *                     +---------------------------------------+
18884  *            header-> | cf_size                               |
18885  *                     +-------------------+-------------------+
18886  *                     | cf_last_region    | cf_last_zeroes    |
18887  *                     +-------------------+-------------------+
18888  *           region1-> | cfr_vaddr                             |
18889  *                     +-------------------+-------------------+
18890  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
18891  *                     +---------------------------------------+
18892  *                     | d4 | d5 | ...                         |
18893  *                     +---------------------------------------+
18894  *                     | ...                                   |
18895  *                     +-------------------+-------------------+
18896  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
18897  *                     +-------------------+-------------------+
18898  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
18899  *                     +---------------------------------------+
18900  *                     | d0 | d1 ...                           |
18901  *                     +---------------------------------------+
18902  *                       ...
18903  *                     +---------------------------------------+
18904  *       last region-> | cfr_vaddr                             |
18905  *                     +---------------------------------------+
18906  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
18907  *                     +---------------------------------------+
18908  *                       ...
18909  *                     +---------------------------------------+
18910  *                     | dx | dy | dz | na | na | na | na | na |
18911  *                     +---------------------------------------+
18912  *
18913  * where:
18914  *      cf_size:        total size of the buffer (rounded to page size)
18915  *      cf_last_region: offset in the buffer of the last "region" sub-header
18916  *      cf_last_zeroes: number of trailing "zero" dispositions at the end
18917  *                      of last region
18918  *      cfr_vaddr:      virtual address of the start of the covered "region"
18919  *      cfr_num_pages:  number of pages in the covered "region"
18920  *      d*:             disposition of the page at that virtual address
18921  * Regions in the buffer are word-aligned.
18922  *
18923  * We estimate the size of the buffer based on the number of memory regions
18924  * and the virtual size of the address space.  While copying each memory region
18925  * during vm_map_fork(), we also collect the footprint info for that region
18926  * and store it in the buffer, packing it as much as possible (coalescing
18927  * contiguous memory regions to avoid having too many region headers and
18928  * avoiding long streaks of "zero" page dispositions by splitting footprint
18929  * "regions", so the number of regions in the footprint buffer might not match
18930  * the number of memory regions in the address space.
18931  *
18932  * We also have to copy the original task's "nonvolatile" ledgers since that's
18933  * part of the footprint and will need to be reported to any tool asking for
18934  * the footprint information of the forked corpse.
18935  */
18936
18937 uint64_t vm_map_corpse_footprint_count = 0;
18938 uint64_t vm_map_corpse_footprint_size_avg = 0;
18939 uint64_t vm_map_corpse_footprint_size_max = 0;
18940 uint64_t vm_map_corpse_footprint_full = 0;
18941 uint64_t vm_map_corpse_footprint_no_buf = 0;
18942
18943 /*
18944  * vm_map_corpse_footprint_new_region:
18945  *      closes the current footprint "region" and creates a new one
18946  *
18947  * Returns NULL if there's not enough space in the buffer for a new region.
18948  */
18949 static struct vm_map_corpse_footprint_region *
18950 vm_map_corpse_footprint_new_region(
18951         struct vm_map_corpse_footprint_header *footprint_header)
18952 {
18953         uintptr_t       footprint_edge;
18954         uint32_t        new_region_offset;
18955         struct vm_map_corpse_footprint_region *footprint_region;
18956         struct vm_map_corpse_footprint_region *new_footprint_region;
18957
18958         footprint_edge = ((uintptr_t)footprint_header +
18959                           footprint_header->cf_size);
18960         footprint_region = ((struct vm_map_corpse_footprint_region *)
18961                             ((char *)footprint_header +
18962                              footprint_header->cf_last_region));
18963         assert((uintptr_t)footprint_region + sizeof (*footprint_region) <=
18964                footprint_edge);
18965
18966         /* get rid of trailing zeroes in the last region */
18967         assert(footprint_region->cfr_num_pages >=
18968                footprint_header->cf_last_zeroes);
18969         footprint_region->cfr_num_pages -=
18970                         footprint_header->cf_last_zeroes;
18971         footprint_header->cf_last_zeroes = 0;
18972
18973         /* reuse this region if it's now empty */
18974         if (footprint_region->cfr_num_pages == 0) {
18975                 return footprint_region;
18976         }
18977
18978         /* compute offset of new region */
18979         new_region_offset = footprint_header->cf_last_region;
18980         new_region_offset += sizeof (*footprint_region);
18981         new_region_offset += footprint_region->cfr_num_pages;
18982         new_region_offset = roundup(new_region_offset, sizeof (int));
18983
18984         /* check if we're going over the edge */
18985         if (((uintptr_t)footprint_header +
18986              new_region_offset +
18987              sizeof (*footprint_region)) >=
18988             footprint_edge) {
18989                 /* over the edge: no new region */
18990                 return NULL;
18991         }
18992
18993         /* adjust offset of last region in header */
18994         footprint_header->cf_last_region = new_region_offset;
18995
18996         new_footprint_region = (struct vm_map_corpse_footprint_region *)
18997                 ((char *)footprint_header +
18998                  footprint_header->cf_last_region);
18999         new_footprint_region->cfr_vaddr = 0;
19000         new_footprint_region->cfr_num_pages = 0;
19001         /* caller needs to initialize new region */
19002
19003         return new_footprint_region;
19004 }
19005
19006 /*
19007  * vm_map_corpse_footprint_collect:
19008  *      collect footprint information for "old_entry" in "old_map" and
19009  *      stores it in "new_map"'s vmmap_footprint_info.
19010  */
19011 kern_return_t
19012 vm_map_corpse_footprint_collect(
19013         vm_map_t        old_map,
19014         vm_map_entry_t  old_entry,
19015         vm_map_t        new_map)
19016 {
19017         vm_map_offset_t va;
19018         int             disp;
19019         kern_return_t   kr;
19020         struct vm_map_corpse_footprint_header *footprint_header;
19021         struct vm_map_corpse_footprint_region *footprint_region;
19022         struct vm_map_corpse_footprint_region *new_footprint_region;
19023         unsigned char   *next_disp_p;
19024         uintptr_t       footprint_edge;
19025         uint32_t        num_pages_tmp;
19026
19027         va = old_entry->vme_start;
19028
19029         vm_map_lock_assert_exclusive(old_map);
19030         vm_map_lock_assert_exclusive(new_map);
19031
19032         assert(new_map->has_corpse_footprint);
19033         assert(!old_map->has_corpse_footprint);
19034         if (!new_map->has_corpse_footprint ||
19035             old_map->has_corpse_footprint) {
19036                 /*
19037                  * This can only transfer footprint info from a
19038                  * map with a live pmap to a map with a corpse footprint.
19039                  */
19040                 return KERN_NOT_SUPPORTED;
19041         }
19042
19043         if (new_map->vmmap_corpse_footprint == NULL) {
19044                 vm_offset_t     buf;
19045                 vm_size_t       buf_size;
19046
19047                 buf = 0;
19048                 buf_size = (sizeof (*footprint_header) +
19049                             (old_map->hdr.nentries
19050                              *
19051                              (sizeof (*footprint_region) +
19052                               + 3)) /* potential alignment for each region */
19053                             +
19054                             ((old_map->size / PAGE_SIZE)
19055                              *
19056                              sizeof (char))); /* disposition for each page */
19057 //              printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19058                 buf_size = round_page(buf_size);
19059
19060                 /* limit buffer to 1 page to validate overflow detection */
19061 //              buf_size = PAGE_SIZE;
19062
19063                 /* limit size to a somewhat sane amount */
19064 #if CONFIG_EMBEDDED
19065 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
19066 #else /* CONFIG_EMBEDDED */
19067 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
19068 #endif /* CONFIG_EMBEDDED */
19069                 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19070                         buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19071                 }
19072
19073                 /*
19074                  * Allocate the pageable buffer (with a trailing guard page).
19075                  * It will be zero-filled on demand.
19076                  */
19077                 kr = kernel_memory_allocate(kernel_map,
19078                                             &buf,
19079                                             (buf_size
19080                                              + PAGE_SIZE), /* trailing guard page */
19081                                             0, /* mask */
19082                                             KMA_PAGEABLE | KMA_GUARD_LAST,
19083                                             VM_KERN_MEMORY_DIAG);
19084                 if (kr != KERN_SUCCESS) {
19085                         vm_map_corpse_footprint_no_buf++;
19086                         return kr;
19087                 }
19088
19089                 /* initialize header and 1st region */
19090                 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19091                 new_map->vmmap_corpse_footprint = footprint_header;
19092
19093                 footprint_header->cf_size = buf_size;
19094                 footprint_header->cf_last_region =
19095                         sizeof (*footprint_header);
19096                 footprint_header->cf_last_zeroes = 0;
19097
19098                 footprint_region = (struct vm_map_corpse_footprint_region *)
19099                         ((char *)footprint_header +
19100                          footprint_header->cf_last_region);
19101                 footprint_region->cfr_vaddr = 0;
19102                 footprint_region->cfr_num_pages = 0;
19103         } else {
19104                 /* retrieve header and last region */
19105                 footprint_header = (struct vm_map_corpse_footprint_header *)
19106                         new_map->vmmap_corpse_footprint;
19107                 footprint_region = (struct vm_map_corpse_footprint_region *)
19108                         ((char *)footprint_header +
19109                          footprint_header->cf_last_region);
19110         }
19111         footprint_edge = ((uintptr_t)footprint_header +
19112                           footprint_header->cf_size);
19113
19114         if ((footprint_region->cfr_vaddr +
19115              (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19116               PAGE_SIZE))
19117             != old_entry->vme_start) {
19118                 uint64_t num_pages_delta;
19119                 uint32_t region_offset_delta;
19120
19121                 /*
19122                  * Not the next contiguous virtual address:
19123                  * start a new region or store "zero" dispositions for
19124                  * the missing pages?
19125                  */
19126                 /* size of gap in actual page dispositions */
19127                 num_pages_delta = (((old_entry->vme_start -
19128                                      footprint_region->cfr_vaddr) / PAGE_SIZE)
19129                                    - footprint_region->cfr_num_pages);
19130                 /* size of gap as a new footprint region header */
19131                 region_offset_delta =
19132                         (sizeof (*footprint_region) +
19133                          roundup((footprint_region->cfr_num_pages -
19134                                   footprint_header->cf_last_zeroes),
19135                                  sizeof (int)) -
19136                          (footprint_region->cfr_num_pages -
19137                           footprint_header->cf_last_zeroes));
19138 //              printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19139                 if (region_offset_delta < num_pages_delta ||
19140                     os_add3_overflow(footprint_region->cfr_num_pages,
19141                                      (uint32_t) num_pages_delta,
19142                                      1,
19143                                      &num_pages_tmp)) {
19144                         /*
19145                          * Storing data for this gap would take more space
19146                          * than inserting a new footprint region header:
19147                          * let's start a new region and save space. If it's a
19148                          * tie, let's avoid using a new region, since that
19149                          * would require more region hops to find the right
19150                          * range during lookups.
19151                          *
19152                          * If the current region's cfr_num_pages would overflow
19153                          * if we added "zero" page dispositions for the gap,
19154                          * no choice but to start a new region.
19155                          */
19156 //                      printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19157                         new_footprint_region =
19158                                 vm_map_corpse_footprint_new_region(footprint_header);
19159                         /* check that we're not going over the edge */
19160                         if (new_footprint_region == NULL) {
19161                                 goto over_the_edge;
19162                         }
19163                         footprint_region = new_footprint_region;
19164                         /* initialize new region as empty */
19165                         footprint_region->cfr_vaddr = old_entry->vme_start;
19166                         footprint_region->cfr_num_pages = 0;
19167                 } else {
19168                         /*
19169                          * Store "zero" page dispositions for the missing
19170                          * pages.
19171                          */
19172 //                      printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19173                         for (; num_pages_delta > 0; num_pages_delta--) {
19174                                 next_disp_p =
19175                                         ((unsigned char *) footprint_region +
19176                                          sizeof (*footprint_region) +
19177                                          footprint_region->cfr_num_pages);
19178                                 /* check that we're not going over the edge */
19179                                 if ((uintptr_t)next_disp_p >= footprint_edge) {
19180                                         goto over_the_edge;
19181                                 }
19182                                 /* store "zero" disposition for this gap page */
19183                                 footprint_region->cfr_num_pages++;
19184                                 *next_disp_p = (unsigned char) 0;
19185                                 footprint_header->cf_last_zeroes++;
19186                         }
19187                 }
19188         }
19189
19190         for (va = old_entry->vme_start;
19191              va < old_entry->vme_end;
19192              va += PAGE_SIZE) {
19193                 vm_object_t     object;
19194
19195                 object = VME_OBJECT(old_entry);
19196                 if (!old_entry->is_sub_map &&
19197                     old_entry->iokit_acct &&
19198                     object != VM_OBJECT_NULL &&
19199                     object->internal &&
19200                     object->purgable == VM_PURGABLE_DENY) {
19201                         /*
19202                          * Non-purgeable IOKit memory: phys_footprint
19203                          * includes the entire virtual mapping.
19204                          * Since the forked corpse's VM map entry will not
19205                          * have "iokit_acct", pretend that this page's
19206                          * disposition is "present & internal", so that it
19207                          * shows up in the forked corpse's footprint.
19208                          */
19209                         disp = (PMAP_QUERY_PAGE_PRESENT |
19210                                 PMAP_QUERY_PAGE_INTERNAL);
19211                 } else {
19212                         disp = 0;
19213                         pmap_query_page_info(old_map->pmap,
19214                                              va,
19215                                              &disp);
19216                 }
19217
19218 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19219
19220                 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19221                         /*
19222                          * Ignore "zero" dispositions at start of
19223                          * region: just move start of region.
19224                          */
19225                         footprint_region->cfr_vaddr += PAGE_SIZE;
19226                         continue;
19227                 }
19228
19229                 /* would region's cfr_num_pages overflow? */
19230                 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19231                                     &num_pages_tmp)) {
19232                         /* overflow: create a new region */
19233                         new_footprint_region =
19234                                 vm_map_corpse_footprint_new_region(
19235                                         footprint_header);
19236                         if (new_footprint_region == NULL) {
19237                                 goto over_the_edge;
19238                         }
19239                         footprint_region = new_footprint_region;
19240                         footprint_region->cfr_vaddr = va;
19241                         footprint_region->cfr_num_pages = 0;
19242                 }
19243
19244                 next_disp_p = ((unsigned char *)footprint_region +
19245                                sizeof (*footprint_region) +
19246                                footprint_region->cfr_num_pages);
19247                 /* check that we're not going over the edge */
19248                 if ((uintptr_t)next_disp_p >= footprint_edge) {
19249                         goto over_the_edge;
19250                 }
19251                 /* store this dispostion */
19252                 *next_disp_p = (unsigned char) disp;
19253                 footprint_region->cfr_num_pages++;
19254
19255                 if (disp != 0) {
19256                         /* non-zero disp: break the current zero streak */
19257                         footprint_header->cf_last_zeroes = 0;
19258                         /* done */
19259                         continue;
19260                 }
19261
19262                 /* zero disp: add to the current streak of zeroes */
19263                 footprint_header->cf_last_zeroes++;
19264                 if ((footprint_header->cf_last_zeroes +
19265                      roundup((footprint_region->cfr_num_pages -
19266                               footprint_header->cf_last_zeroes) &
19267                              (sizeof (int) - 1),
19268                              sizeof (int))) <
19269                     (sizeof (*footprint_header))) {
19270                         /*
19271                          * There are not enough trailing "zero" dispositions
19272                          * (+ the extra padding we would need for the previous
19273                          * region); creating a new region would not save space
19274                          * at this point, so let's keep this "zero" disposition
19275                          * in this region and reconsider later.
19276                          */
19277                         continue;
19278                 }
19279                 /*
19280                  * Create a new region to avoid having too many consecutive
19281                  * "zero" dispositions.
19282                  */
19283                 new_footprint_region =
19284                         vm_map_corpse_footprint_new_region(footprint_header);
19285                 if (new_footprint_region == NULL) {
19286                         goto over_the_edge;
19287                 }
19288                 footprint_region = new_footprint_region;
19289                 /* initialize the new region as empty ... */
19290                 footprint_region->cfr_num_pages = 0;
19291                 /* ... and skip this "zero" disp */
19292                 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19293         }
19294
19295         return KERN_SUCCESS;
19296
19297 over_the_edge:
19298 //      printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19299         vm_map_corpse_footprint_full++;
19300         return KERN_RESOURCE_SHORTAGE;
19301 }
19302
19303 /*
19304  * vm_map_corpse_footprint_collect_done:
19305  *      completes the footprint collection by getting rid of any remaining
19306  *      trailing "zero" dispositions and trimming the unused part of the
19307  *      kernel buffer
19308  */
19309 void
19310 vm_map_corpse_footprint_collect_done(
19311         vm_map_t        new_map)
19312 {
19313         struct vm_map_corpse_footprint_header *footprint_header;
19314         struct vm_map_corpse_footprint_region *footprint_region;
19315         vm_size_t       buf_size, actual_size;
19316         kern_return_t   kr;
19317
19318         assert(new_map->has_corpse_footprint);
19319         if (!new_map->has_corpse_footprint ||
19320             new_map->vmmap_corpse_footprint == NULL) {
19321                 return;
19322         }
19323
19324         footprint_header = (struct vm_map_corpse_footprint_header *)
19325                 new_map->vmmap_corpse_footprint;
19326         buf_size = footprint_header->cf_size;
19327
19328         footprint_region = (struct vm_map_corpse_footprint_region *)
19329                 ((char *)footprint_header +
19330                  footprint_header->cf_last_region);
19331
19332         /* get rid of trailing zeroes in last region */
19333         assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19334         footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19335         footprint_header->cf_last_zeroes = 0;
19336
19337         actual_size = (vm_size_t)(footprint_header->cf_last_region +
19338                                   sizeof (*footprint_region) +
19339                                   footprint_region->cfr_num_pages);
19340
19341 //      printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19342         vm_map_corpse_footprint_size_avg =
19343                 (((vm_map_corpse_footprint_size_avg *
19344                    vm_map_corpse_footprint_count) +
19345                   actual_size) /
19346                  (vm_map_corpse_footprint_count + 1));
19347         vm_map_corpse_footprint_count++;
19348         if (actual_size > vm_map_corpse_footprint_size_max) {
19349                 vm_map_corpse_footprint_size_max = actual_size;
19350         }
19351
19352         actual_size = round_page(actual_size);
19353         if (buf_size > actual_size) {
19354                 kr = vm_deallocate(kernel_map,
19355                                    ((vm_address_t)footprint_header +
19356                                     actual_size +
19357                                     PAGE_SIZE), /* trailing guard page */
19358                                    (buf_size - actual_size));
19359                 assertf(kr == KERN_SUCCESS,
19360                         "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19361                         footprint_header,
19362                         (uint64_t) buf_size,
19363                         (uint64_t) actual_size,
19364                         kr);
19365                 kr = vm_protect(kernel_map,
19366                                 ((vm_address_t)footprint_header +
19367                                  actual_size),
19368                                 PAGE_SIZE,
19369                                 FALSE, /* set_maximum */
19370                                 VM_PROT_NONE);
19371                 assertf(kr == KERN_SUCCESS,
19372                         "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19373                         footprint_header,
19374                         (uint64_t) buf_size,
19375                         (uint64_t) actual_size,
19376                         kr);
19377         }
19378
19379         footprint_header->cf_size = actual_size;
19380 }
19381
19382 /*
19383  * vm_map_corpse_footprint_query_page_info:
19384  *      retrieves the disposition of the page at virtual address "vaddr"
19385  *      in the forked corpse's VM map
19386  *
19387  * This is the equivalent of pmap_query_page_info() for a forked corpse.
19388  */
19389 kern_return_t
19390 vm_map_corpse_footprint_query_page_info(
19391         vm_map_t        map,
19392         vm_map_offset_t va,
19393         int             *disp)
19394 {
19395         struct vm_map_corpse_footprint_header *footprint_header;
19396         struct vm_map_corpse_footprint_region *footprint_region;
19397         uint32_t        footprint_region_offset;
19398         vm_map_offset_t region_start, region_end;
19399         int             disp_idx;
19400         kern_return_t   kr;
19401
19402         if (!map->has_corpse_footprint) {
19403                 *disp = 0;
19404                 kr = KERN_INVALID_ARGUMENT;
19405                 goto done;
19406         }
19407
19408         footprint_header = map->vmmap_corpse_footprint;
19409         if (footprint_header == NULL) {
19410                 *disp = 0;
19411 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19412                 kr = KERN_INVALID_ARGUMENT;
19413                 goto done;
19414         }
19415
19416         /* start looking at the hint ("cf_hint_region") */
19417         footprint_region_offset = footprint_header->cf_hint_region;
19418
19419 lookup_again:
19420         if (footprint_region_offset < sizeof (*footprint_header)) {
19421                 /* hint too low: start from 1st region */
19422                 footprint_region_offset = sizeof (*footprint_header);
19423         }
19424         if (footprint_region_offset >= footprint_header->cf_last_region) {
19425                 /* hint too high: re-start from 1st region */
19426                 footprint_region_offset = sizeof (*footprint_header);
19427         }
19428         footprint_region = (struct vm_map_corpse_footprint_region *)
19429                 ((char *)footprint_header + footprint_region_offset);
19430         region_start = footprint_region->cfr_vaddr;
19431         region_end = (region_start +
19432                       ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19433                        PAGE_SIZE));
19434         if (va < region_start &&
19435             footprint_region_offset != sizeof (*footprint_header)) {
19436                 /* our range starts before the hint region */
19437
19438                 /* reset the hint (in a racy way...) */
19439                 footprint_header->cf_hint_region = sizeof (*footprint_header);
19440                 /* lookup "va" again from 1st region */
19441                 footprint_region_offset = sizeof (*footprint_header);
19442                 goto lookup_again;
19443         }
19444
19445         while (va >= region_end) {
19446                 if (footprint_region_offset >= footprint_header->cf_last_region) {
19447                         break;
19448                 }
19449                 /* skip the region's header */
19450                 footprint_region_offset += sizeof (*footprint_region);
19451                 /* skip the region's page dispositions */
19452                 footprint_region_offset += footprint_region->cfr_num_pages;
19453                 /* align to next word boundary */
19454                 footprint_region_offset =
19455                         roundup(footprint_region_offset,
19456                                 sizeof (int));
19457                 footprint_region = (struct vm_map_corpse_footprint_region *)
19458                         ((char *)footprint_header + footprint_region_offset);
19459                 region_start = footprint_region->cfr_vaddr;
19460                 region_end = (region_start +
19461                               ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19462                                PAGE_SIZE));
19463         }
19464         if (va < region_start || va >= region_end) {
19465                 /* page not found */
19466                 *disp = 0;
19467 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19468                 kr = KERN_SUCCESS;
19469                 goto done;
19470         }
19471
19472         /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19473         footprint_header->cf_hint_region = footprint_region_offset;
19474
19475         /* get page disposition for "va" in this region */
19476         disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19477         *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19478
19479         kr = KERN_SUCCESS;
19480 done:
19481 //      if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19482         /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19483         DTRACE_VM4(footprint_query_page_info,
19484                    vm_map_t, map,
19485                    vm_map_offset_t, va,
19486                    int, *disp,
19487                    kern_return_t, kr);
19488
19489         return kr;
19490 }
19491
19492
19493 static void
19494 vm_map_corpse_footprint_destroy(
19495         vm_map_t        map)
19496 {
19497         if (map->has_corpse_footprint &&
19498             map->vmmap_corpse_footprint != 0) {
19499                 struct vm_map_corpse_footprint_header *footprint_header;
19500                 vm_size_t buf_size;
19501                 kern_return_t kr;
19502
19503                 footprint_header = map->vmmap_corpse_footprint;
19504                 buf_size = footprint_header->cf_size;
19505                 kr = vm_deallocate(kernel_map,
19506                                    (vm_offset_t) map->vmmap_corpse_footprint,
19507                                    ((vm_size_t) buf_size
19508                                     + PAGE_SIZE)); /* trailing guard page */
19509                 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19510                 map->vmmap_corpse_footprint = 0;
19511                 map->has_corpse_footprint = FALSE;
19512         }
19513 }
19514
19515 /*
19516  * vm_map_copy_footprint_ledgers:
19517  *      copies any ledger that's relevant to the memory footprint of "old_task"
19518  *      into the forked corpse's task ("new_task")
19519  */
19520 void
19521 vm_map_copy_footprint_ledgers(
19522         task_t  old_task,
19523         task_t  new_task)
19524 {
19525         vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19526         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19527         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19528     vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19529     vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19530     vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19531     vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19532     vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19533     vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19534     vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19535     vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19536     vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19537 }
19538
19539 /*
19540  * vm_map_copy_ledger:
19541  *      copy a single ledger from "old_task" to "new_task"
19542  */
19543 void
19544 vm_map_copy_ledger(
19545         task_t  old_task,
19546         task_t  new_task,
19547         int     ledger_entry)
19548 {
19549         ledger_amount_t old_balance, new_balance, delta;
19550
19551         assert(new_task->map->has_corpse_footprint);
19552         if (!new_task->map->has_corpse_footprint)
19553                 return;
19554
19555         /* turn off sanity checks for the ledger we're about to mess with */
19556         ledger_disable_panic_on_negative(new_task->ledger,
19557                                          ledger_entry);
19558
19559         /* adjust "new_task" to match "old_task" */
19560         ledger_get_balance(old_task->ledger,
19561                            ledger_entry,
19562                            &old_balance);
19563         ledger_get_balance(new_task->ledger,
19564                            ledger_entry,
19565                            &new_balance);
19566         if (new_balance == old_balance) {
19567                 /* new == old: done */
19568         } else if (new_balance > old_balance) {
19569                 /* new > old ==> new -= new - old */
19570                 delta = new_balance - old_balance;
19571                 ledger_debit(new_task->ledger,
19572                              ledger_entry,
19573                              delta);
19574         } else {
19575                 /* new < old ==> new += old - new */
19576                 delta = old_balance - new_balance;
19577                 ledger_credit(new_task->ledger,
19578                               ledger_entry,
19579                               delta);
19580         }
19581 }