osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/kalloc.h>
  88 #include <kern/zalloc.h>
  89
  90 #include <vm/cpm.h>
  91 #include <vm/vm_compressor_pager.h>
  92 #include <vm/vm_init.h>
  93 #include <vm/vm_fault.h>
  94 #include <vm/vm_map.h>
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_page.h>
  97 #include <vm/vm_pageout.h>
  98 #include <vm/vm_kern.h>
  99 #include <ipc/ipc_port.h>
 100 #include <kern/sched_prim.h>
 101 #include <kern/misc_protos.h>
 102 #include <kern/xpr.h>
 103
 104 #include <mach/vm_map_server.h>
 105 #include <mach/mach_host_server.h>
 106 #include <vm/vm_protos.h>
 107 #include <vm/vm_purgeable_internal.h>
 108
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_shared_region.h>
 111 #include <vm/vm_map_store.h>
 112
 113 #include <san/kasan.h>
 114
 115 #if __arm64__
 116 extern int fourk_binary_compatibility_unsafe;
 117 extern int fourk_binary_compatibility_allow_wx;
 118 #endif /* __arm64__ */
 119 extern int proc_selfpid(void);
 120 extern char *proc_name_address(void *p);
 121
 122 #if VM_MAP_DEBUG_APPLE_PROTECT
 123 int vm_map_debug_apple_protect = 0;
 124 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 125 #if VM_MAP_DEBUG_FOURK
 126 int vm_map_debug_fourk = 0;
 127 #endif /* VM_MAP_DEBUG_FOURK */
 128
 129 int vm_map_executable_immutable = 0;
 130 int vm_map_executable_immutable_no_log = 0;
 131
 132 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 133 /* Internal prototypes
 134  */
 135
 136 static void vm_map_simplify_range(
 137         vm_map_t        map,
 138         vm_map_offset_t start,
 139         vm_map_offset_t end);   /* forward */
 140
 141 static boolean_t        vm_map_range_check(
 142         vm_map_t        map,
 143         vm_map_offset_t start,
 144         vm_map_offset_t end,
 145         vm_map_entry_t  *entry);
 146
 147 static vm_map_entry_t   _vm_map_entry_create(
 148         struct vm_map_header    *map_header, boolean_t map_locked);
 149
 150 static void             _vm_map_entry_dispose(
 151         struct vm_map_header    *map_header,
 152         vm_map_entry_t          entry);
 153
 154 static void             vm_map_pmap_enter(
 155         vm_map_t                map,
 156         vm_map_offset_t         addr,
 157         vm_map_offset_t         end_addr,
 158         vm_object_t             object,
 159         vm_object_offset_t      offset,
 160         vm_prot_t               protection);
 161
 162 static void             _vm_map_clip_end(
 163         struct vm_map_header    *map_header,
 164         vm_map_entry_t          entry,
 165         vm_map_offset_t         end);
 166
 167 static void             _vm_map_clip_start(
 168         struct vm_map_header    *map_header,
 169         vm_map_entry_t          entry,
 170         vm_map_offset_t         start);
 171
 172 static void             vm_map_entry_delete(
 173         vm_map_t        map,
 174         vm_map_entry_t  entry);
 175
 176 static kern_return_t    vm_map_delete(
 177         vm_map_t        map,
 178         vm_map_offset_t start,
 179         vm_map_offset_t end,
 180         int             flags,
 181         vm_map_t        zap_map);
 182
 183 static kern_return_t    vm_map_copy_overwrite_unaligned(
 184         vm_map_t        dst_map,
 185         vm_map_entry_t  entry,
 186         vm_map_copy_t   copy,
 187         vm_map_address_t start,
 188         boolean_t       discard_on_success);
 189
 190 static kern_return_t    vm_map_copy_overwrite_aligned(
 191         vm_map_t        dst_map,
 192         vm_map_entry_t  tmp_entry,
 193         vm_map_copy_t   copy,
 194         vm_map_offset_t start,
 195         pmap_t          pmap);
 196
 197 static kern_return_t    vm_map_copyin_kernel_buffer(
 198         vm_map_t        src_map,
 199         vm_map_address_t src_addr,
 200         vm_map_size_t   len,
 201         boolean_t       src_destroy,
 202         vm_map_copy_t   *copy_result);  /* OUT */
 203
 204 static kern_return_t    vm_map_copyout_kernel_buffer(
 205         vm_map_t        map,
 206         vm_map_address_t *addr, /* IN/OUT */
 207         vm_map_copy_t   copy,
 208         vm_map_size_t   copy_size,
 209         boolean_t       overwrite,
 210         boolean_t       consume_on_success);
 211
 212 static void             vm_map_fork_share(
 213         vm_map_t        old_map,
 214         vm_map_entry_t  old_entry,
 215         vm_map_t        new_map);
 216
 217 static boolean_t        vm_map_fork_copy(
 218         vm_map_t        old_map,
 219         vm_map_entry_t  *old_entry_p,
 220         vm_map_t        new_map,
 221         int             vm_map_copyin_flags);
 222
 223 static kern_return_t    vm_map_wire_nested(
 224         vm_map_t                   map,
 225         vm_map_offset_t            start,
 226         vm_map_offset_t            end,
 227         vm_prot_t                  caller_prot,
 228         vm_tag_t                   tag,
 229         boolean_t                  user_wire,
 230         pmap_t                     map_pmap,
 231         vm_map_offset_t            pmap_addr,
 232         ppnum_t                    *physpage_p);
 233
 234 static kern_return_t    vm_map_unwire_nested(
 235         vm_map_t                   map,
 236         vm_map_offset_t            start,
 237         vm_map_offset_t            end,
 238         boolean_t                  user_wire,
 239         pmap_t                     map_pmap,
 240         vm_map_offset_t            pmap_addr);
 241
 242 static kern_return_t    vm_map_overwrite_submap_recurse(
 243         vm_map_t                   dst_map,
 244         vm_map_offset_t            dst_addr,
 245         vm_map_size_t              dst_size);
 246
 247 static kern_return_t    vm_map_copy_overwrite_nested(
 248         vm_map_t                   dst_map,
 249         vm_map_offset_t            dst_addr,
 250         vm_map_copy_t              copy,
 251         boolean_t                  interruptible,
 252         pmap_t                     pmap,
 253         boolean_t                  discard_on_success);
 254
 255 static kern_return_t    vm_map_remap_extract(
 256         vm_map_t                map,
 257         vm_map_offset_t         addr,
 258         vm_map_size_t           size,
 259         boolean_t               copy,
 260         struct vm_map_header    *map_header,
 261         vm_prot_t               *cur_protection,
 262         vm_prot_t               *max_protection,
 263         vm_inherit_t            inheritance,
 264         boolean_t               pageable,
 265         boolean_t               same_map,
 266         vm_map_kernel_flags_t   vmk_flags);
 267
 268 static kern_return_t    vm_map_remap_range_allocate(
 269         vm_map_t                map,
 270         vm_map_address_t        *address,
 271         vm_map_size_t           size,
 272         vm_map_offset_t         mask,
 273         int                     flags,
 274         vm_map_kernel_flags_t   vmk_flags,
 275         vm_tag_t                tag,
 276         vm_map_entry_t          *map_entry);
 277
 278 static void             vm_map_region_look_for_page(
 279         vm_map_t                   map,
 280         vm_map_offset_t            va,
 281         vm_object_t                object,
 282         vm_object_offset_t         offset,
 283         int                        max_refcnt,
 284         int                        depth,
 285         vm_region_extended_info_t  extended,
 286         mach_msg_type_number_t count);
 287
 288 static int              vm_map_region_count_obj_refs(
 289         vm_map_entry_t             entry,
 290         vm_object_t                object);
 291
 292
 293 static kern_return_t    vm_map_willneed(
 294         vm_map_t        map,
 295         vm_map_offset_t start,
 296         vm_map_offset_t end);
 297
 298 static kern_return_t    vm_map_reuse_pages(
 299         vm_map_t        map,
 300         vm_map_offset_t start,
 301         vm_map_offset_t end);
 302
 303 static kern_return_t    vm_map_reusable_pages(
 304         vm_map_t        map,
 305         vm_map_offset_t start,
 306         vm_map_offset_t end);
 307
 308 static kern_return_t    vm_map_can_reuse(
 309         vm_map_t        map,
 310         vm_map_offset_t start,
 311         vm_map_offset_t end);
 312
 313 #if MACH_ASSERT
 314 static kern_return_t    vm_map_pageout(
 315         vm_map_t        map,
 316         vm_map_offset_t start,
 317         vm_map_offset_t end);
 318 #endif /* MACH_ASSERT */
 319
 320 pid_t find_largest_process_vm_map_entries(void);
 321
 322 /*
 323  * Macros to copy a vm_map_entry. We must be careful to correctly
 324  * manage the wired page count. vm_map_entry_copy() creates a new
 325  * map entry to the same memory - the wired count in the new entry
 326  * must be set to zero. vm_map_entry_copy_full() creates a new
 327  * entry that is identical to the old entry.  This preserves the
 328  * wire count; it's used for map splitting and zone changing in
 329  * vm_map_copyout.
 330  */
 331
 332 #define vm_map_entry_copy(NEW,OLD)      \
 333 MACRO_BEGIN                             \
 334 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 335         *(NEW) = *(OLD);                \
 336         (NEW)->is_shared = FALSE;       \
 337         (NEW)->needs_wakeup = FALSE;    \
 338         (NEW)->in_transition = FALSE;   \
 339         (NEW)->wired_count = 0;         \
 340         (NEW)->user_wired_count = 0;    \
 341         (NEW)->permanent = FALSE;       \
 342         (NEW)->used_for_jit = FALSE;    \
 343         (NEW)->from_reserved_zone = _vmec_reserved;     \
 344         if ((NEW)->iokit_acct) {                        \
 345              assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
 346              (NEW)->iokit_acct = FALSE;                 \
 347              (NEW)->use_pmap = TRUE;                    \
 348         }                                               \
 349         (NEW)->vme_resilient_codesign = FALSE; \
 350         (NEW)->vme_resilient_media = FALSE;     \
 351         (NEW)->vme_atomic = FALSE;      \
 352 MACRO_END
 353
 354 #define vm_map_entry_copy_full(NEW,OLD)                 \
 355 MACRO_BEGIN                                             \
 356 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 357 (*(NEW) = *(OLD));                                      \
 358 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 359 MACRO_END
 360
 361 /*
 362  *      Decide if we want to allow processes to execute from their data or stack areas.
 363  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 364  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 365  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 366  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 367  *      specific pmap files since the default behavior varies according to architecture.  The
 368  *      main reason it varies is because of the need to provide binary compatibility with old
 369  *      applications that were written before these restrictions came into being.  In the old
 370  *      days, an app could execute anything it could read, but this has slowly been tightened
 371  *      up over time.  The default behavior is:
 372  *
 373  *      32-bit PPC apps         may execute from both stack and data areas
 374  *      32-bit Intel apps       may exeucte from data areas but not stack
 375  *      64-bit PPC/Intel apps   may not execute from either data or stack
 376  *
 377  *      An application on any architecture may override these defaults by explicitly
 378  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 379  *      system call.  This code here just determines what happens when an app tries to
 380  *      execute from a page that lacks execute permission.
 381  *
 382  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 383  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 384  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 385  *      execution from data areas for a particular binary even if the arch normally permits it. As
 386  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 387  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 388  *      are not all NX-safe.
 389  */
 390
 391 extern int allow_data_exec, allow_stack_exec;
 392
 393 int
 394 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 395 {
 396         int current_abi;
 397
 398         if (map->pmap == kernel_pmap) return FALSE;
 399
 400         /*
 401          * Determine if the app is running in 32 or 64 bit mode.
 402          */
 403
 404         if (vm_map_is_64bit(map))
 405                 current_abi = VM_ABI_64;
 406         else
 407                 current_abi = VM_ABI_32;
 408
 409         /*
 410          * Determine if we should allow the execution based on whether it's a
 411          * stack or data area and the current architecture.
 412          */
 413
 414         if (user_tag == VM_MEMORY_STACK)
 415                 return allow_stack_exec & current_abi;
 416
 417         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 418 }
 419
 420
 421 /*
 422  *      Virtual memory maps provide for the mapping, protection,
 423  *      and sharing of virtual memory objects.  In addition,
 424  *      this module provides for an efficient virtual copy of
 425  *      memory from one map to another.
 426  *
 427  *      Synchronization is required prior to most operations.
 428  *
 429  *      Maps consist of an ordered doubly-linked list of simple
 430  *      entries; a single hint is used to speed up lookups.
 431  *
 432  *      Sharing maps have been deleted from this version of Mach.
 433  *      All shared objects are now mapped directly into the respective
 434  *      maps.  This requires a change in the copy on write strategy;
 435  *      the asymmetric (delayed) strategy is used for shared temporary
 436  *      objects instead of the symmetric (shadow) strategy.  All maps
 437  *      are now "top level" maps (either task map, kernel map or submap
 438  *      of the kernel map).
 439  *
 440  *      Since portions of maps are specified by start/end addreses,
 441  *      which may not align with existing map entries, all
 442  *      routines merely "clip" entries to these start/end values.
 443  *      [That is, an entry is split into two, bordering at a
 444  *      start or end value.]  Note that these clippings may not
 445  *      always be necessary (as the two resulting entries are then
 446  *      not changed); however, the clipping is done for convenience.
 447  *      No attempt is currently made to "glue back together" two
 448  *      abutting entries.
 449  *
 450  *      The symmetric (shadow) copy strategy implements virtual copy
 451  *      by copying VM object references from one map to
 452  *      another, and then marking both regions as copy-on-write.
 453  *      It is important to note that only one writeable reference
 454  *      to a VM object region exists in any map when this strategy
 455  *      is used -- this means that shadow object creation can be
 456  *      delayed until a write operation occurs.  The symmetric (delayed)
 457  *      strategy allows multiple maps to have writeable references to
 458  *      the same region of a vm object, and hence cannot delay creating
 459  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 460  *      Copying of permanent objects is completely different; see
 461  *      vm_object_copy_strategically() in vm_object.c.
 462  */
 463
 464 static zone_t   vm_map_zone;                            /* zone for vm_map structures */
 465 zone_t                  vm_map_entry_zone;                      /* zone for vm_map_entry structures */
 466 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking allocations */
 467 static zone_t   vm_map_copy_zone;                       /* zone for vm_map_copy structures */
 468 zone_t                  vm_map_holes_zone;                      /* zone for vm map holes (vm_map_links) structures */
 469
 470
 471 /*
 472  *      Placeholder object for submap operations.  This object is dropped
 473  *      into the range by a call to vm_map_find, and removed when
 474  *      vm_map_submap creates the submap.
 475  */
 476
 477 vm_object_t     vm_submap_object;
 478
 479 static void             *map_data;
 480 static vm_size_t        map_data_size;
 481 static void             *kentry_data;
 482 static vm_size_t        kentry_data_size;
 483 static void             *map_holes_data;
 484 static vm_size_t        map_holes_data_size;
 485
 486 #if CONFIG_EMBEDDED
 487 #define         NO_COALESCE_LIMIT  0
 488 #else
 489 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 490 #endif
 491
 492 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 493 unsigned int not_in_kdp = 1;
 494
 495 unsigned int vm_map_set_cache_attr_count = 0;
 496
 497 kern_return_t
 498 vm_map_set_cache_attr(
 499         vm_map_t        map,
 500         vm_map_offset_t va)
 501 {
 502         vm_map_entry_t  map_entry;
 503         vm_object_t     object;
 504         kern_return_t   kr = KERN_SUCCESS;
 505
 506         vm_map_lock_read(map);
 507
 508         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 509             map_entry->is_sub_map) {
 510                 /*
 511                  * that memory is not properly mapped
 512                  */
 513                 kr = KERN_INVALID_ARGUMENT;
 514                 goto done;
 515         }
 516         object = VME_OBJECT(map_entry);
 517
 518         if (object == VM_OBJECT_NULL) {
 519                 /*
 520                  * there should be a VM object here at this point
 521                  */
 522                 kr = KERN_INVALID_ARGUMENT;
 523                 goto done;
 524         }
 525         vm_object_lock(object);
 526         object->set_cache_attr = TRUE;
 527         vm_object_unlock(object);
 528
 529         vm_map_set_cache_attr_count++;
 530 done:
 531         vm_map_unlock_read(map);
 532
 533         return kr;
 534 }
 535
 536
 537 #if CONFIG_CODE_DECRYPTION
 538 /*
 539  * vm_map_apple_protected:
 540  * This remaps the requested part of the object with an object backed by
 541  * the decrypting pager.
 542  * crypt_info contains entry points and session data for the crypt module.
 543  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 544  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 545  */
 546 kern_return_t
 547 vm_map_apple_protected(
 548         vm_map_t                map,
 549         vm_map_offset_t         start,
 550         vm_map_offset_t         end,
 551         vm_object_offset_t      crypto_backing_offset,
 552         struct pager_crypt_info *crypt_info)
 553 {
 554         boolean_t       map_locked;
 555         kern_return_t   kr;
 556         vm_map_entry_t  map_entry;
 557         struct vm_map_entry tmp_entry;
 558         memory_object_t unprotected_mem_obj;
 559         vm_object_t     protected_object;
 560         vm_map_offset_t map_addr;
 561         vm_map_offset_t start_aligned, end_aligned;
 562         vm_object_offset_t      crypto_start, crypto_end;
 563         int             vm_flags;
 564         vm_map_kernel_flags_t vmk_flags;
 565
 566         vm_flags = 0;
 567         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 568
 569         map_locked = FALSE;
 570         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 571
 572         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 573         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 574         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 575         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 576
 577 #if __arm64__
 578         /*
 579          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 580          * so we might have to loop and establish up to 3 mappings:
 581          *
 582          * + the first 16K-page, which might overlap with the previous
 583          *   4K-aligned mapping,
 584          * + the center,
 585          * + the last 16K-page, which might overlap with the next
 586          *   4K-aligned mapping.
 587          * Each of these mapping might be backed by a vnode pager (if
 588          * properly page-aligned) or a "fourk_pager", itself backed by a
 589          * vnode pager (if 4K-aligned but not page-aligned).
 590          */
 591 #else /* __arm64__ */
 592         assert(start_aligned == start);
 593         assert(end_aligned == end);
 594 #endif /* __arm64__ */
 595
 596         map_addr = start_aligned;
 597         for (map_addr = start_aligned;
 598              map_addr < end;
 599              map_addr = tmp_entry.vme_end) {
 600                 vm_map_lock(map);
 601                 map_locked = TRUE;
 602
 603                 /* lookup the protected VM object */
 604                 if (!vm_map_lookup_entry(map,
 605                                          map_addr,
 606                                          &map_entry) ||
 607                     map_entry->is_sub_map ||
 608                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 609                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 610                         /* that memory is not properly mapped */
 611                         kr = KERN_INVALID_ARGUMENT;
 612                         goto done;
 613                 }
 614
 615                 /* get the protected object to be decrypted */
 616                 protected_object = VME_OBJECT(map_entry);
 617                 if (protected_object == VM_OBJECT_NULL) {
 618                         /* there should be a VM object here at this point */
 619                         kr = KERN_INVALID_ARGUMENT;
 620                         goto done;
 621                 }
 622                 /* ensure protected object stays alive while map is unlocked */
 623                 vm_object_reference(protected_object);
 624
 625                 /* limit the map entry to the area we want to cover */
 626                 vm_map_clip_start(map, map_entry, start_aligned);
 627                 vm_map_clip_end(map, map_entry, end_aligned);
 628
 629                 tmp_entry = *map_entry;
 630                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 631                 vm_map_unlock(map);
 632                 map_locked = FALSE;
 633
 634                 /*
 635                  * This map entry might be only partially encrypted
 636                  * (if not fully "page-aligned").
 637                  */
 638                 crypto_start = 0;
 639                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 640                 if (tmp_entry.vme_start < start) {
 641                         if (tmp_entry.vme_start != start_aligned) {
 642                                 kr = KERN_INVALID_ADDRESS;
 643                         }
 644                         crypto_start += (start - tmp_entry.vme_start);
 645                 }
 646                 if (tmp_entry.vme_end > end) {
 647                         if (tmp_entry.vme_end != end_aligned) {
 648                                 kr = KERN_INVALID_ADDRESS;
 649                         }
 650                         crypto_end -= (tmp_entry.vme_end - end);
 651                 }
 652
 653                 /*
 654                  * This "extra backing offset" is needed to get the decryption
 655                  * routine to use the right key.  It adjusts for the possibly
 656                  * relative offset of an interposed "4K" pager...
 657                  */
 658                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 659                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 660                 }
 661
 662                 /*
 663                  * Lookup (and create if necessary) the protected memory object
 664                  * matching that VM object.
 665                  * If successful, this also grabs a reference on the memory object,
 666                  * to guarantee that it doesn't go away before we get a chance to map
 667                  * it.
 668                  */
 669                 unprotected_mem_obj = apple_protect_pager_setup(
 670                         protected_object,
 671                         VME_OFFSET(&tmp_entry),
 672                         crypto_backing_offset,
 673                         crypt_info,
 674                         crypto_start,
 675                         crypto_end);
 676
 677                 /* release extra ref on protected object */
 678                 vm_object_deallocate(protected_object);
 679
 680                 if (unprotected_mem_obj == NULL) {
 681                         kr = KERN_FAILURE;
 682                         goto done;
 683                 }
 684
 685                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 686                 /* can overwrite an immutable mapping */
 687                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 688 #if __arm64__
 689                 if (tmp_entry.used_for_jit &&
 690                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 691                      PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 692                     fourk_binary_compatibility_unsafe &&
 693                     fourk_binary_compatibility_allow_wx) {
 694                         printf("** FOURK_COMPAT [%d]: "
 695                                "allowing write+execute at 0x%llx\n",
 696                                proc_selfpid(), tmp_entry.vme_start);
 697                         vmk_flags.vmkf_map_jit = TRUE;
 698                 }
 699 #endif /* __arm64__ */
 700
 701                 /* map this memory object in place of the current one */
 702                 map_addr = tmp_entry.vme_start;
 703                 kr = vm_map_enter_mem_object(map,
 704                                              &map_addr,
 705                                              (tmp_entry.vme_end -
 706                                               tmp_entry.vme_start),
 707                                              (mach_vm_offset_t) 0,
 708                                              vm_flags,
 709                                              vmk_flags,
 710                                              VM_KERN_MEMORY_NONE,
 711                                              (ipc_port_t) unprotected_mem_obj,
 712                                              0,
 713                                              TRUE,
 714                                              tmp_entry.protection,
 715                                              tmp_entry.max_protection,
 716                                              tmp_entry.inheritance);
 717                 assertf(kr == KERN_SUCCESS,
 718                         "kr = 0x%x\n", kr);
 719                 assertf(map_addr == tmp_entry.vme_start,
 720                         "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 721                         (uint64_t)map_addr,
 722                         (uint64_t) tmp_entry.vme_start,
 723                         &tmp_entry);
 724
 725 #if VM_MAP_DEBUG_APPLE_PROTECT
 726                 if (vm_map_debug_apple_protect) {
 727                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 728                                " backing:[object:%p,offset:0x%llx,"
 729                                "crypto_backing_offset:0x%llx,"
 730                                "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 731                                map,
 732                                (uint64_t) map_addr,
 733                                (uint64_t) (map_addr + (tmp_entry.vme_end -
 734                                                        tmp_entry.vme_start)),
 735                                unprotected_mem_obj,
 736                                protected_object,
 737                                VME_OFFSET(&tmp_entry),
 738                                crypto_backing_offset,
 739                                crypto_start,
 740                                crypto_end);
 741                 }
 742 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 743
 744                 /*
 745                  * Release the reference obtained by
 746                  * apple_protect_pager_setup().
 747                  * The mapping (if it succeeded) is now holding a reference on
 748                  * the memory object.
 749                  */
 750                 memory_object_deallocate(unprotected_mem_obj);
 751                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 752
 753                 /* continue with next map entry */
 754                 crypto_backing_offset += (tmp_entry.vme_end -
 755                                           tmp_entry.vme_start);
 756                 crypto_backing_offset -= crypto_start;
 757         }
 758         kr = KERN_SUCCESS;
 759
 760 done:
 761         if (map_locked) {
 762                 vm_map_unlock(map);
 763         }
 764         return kr;
 765 }
 766 #endif  /* CONFIG_CODE_DECRYPTION */
 767
 768
 769 lck_grp_t               vm_map_lck_grp;
 770 lck_grp_attr_t  vm_map_lck_grp_attr;
 771 lck_attr_t              vm_map_lck_attr;
 772 lck_attr_t              vm_map_lck_rw_attr;
 773
 774
 775 /*
 776  *      vm_map_init:
 777  *
 778  *      Initialize the vm_map module.  Must be called before
 779  *      any other vm_map routines.
 780  *
 781  *      Map and entry structures are allocated from zones -- we must
 782  *      initialize those zones.
 783  *
 784  *      There are three zones of interest:
 785  *
 786  *      vm_map_zone:            used to allocate maps.
 787  *      vm_map_entry_zone:      used to allocate map entries.
 788  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 789  *
 790  *      The kernel allocates map entries from a special zone that is initially
 791  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 792  *      the kernel to allocate more memory to a entry zone when it became
 793  *      empty since the very act of allocating memory implies the creation
 794  *      of a new entry.
 795  */
 796 void
 797 vm_map_init(
 798         void)
 799 {
 800         vm_size_t entry_zone_alloc_size;
 801         const char *mez_name = "VM map entries";
 802
 803         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 804                             PAGE_SIZE, "maps");
 805         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 806 #if     defined(__LP64__)
 807         entry_zone_alloc_size = PAGE_SIZE * 5;
 808 #else
 809         entry_zone_alloc_size = PAGE_SIZE * 6;
 810 #endif
 811         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 812                                   1024*1024, entry_zone_alloc_size,
 813                                   mez_name);
 814         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 815         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 816         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 817
 818         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 819                                    kentry_data_size * 64, kentry_data_size,
 820                                    "Reserved VM map entries");
 821         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 822         /* Don't quarantine because we always need elements available */
 823         zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
 824
 825         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 826                                  16*1024, PAGE_SIZE, "VM map copies");
 827         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 828
 829         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 830                                  16*1024, PAGE_SIZE, "VM map holes");
 831         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 832
 833         /*
 834          *      Cram the map and kentry zones with initial data.
 835          *      Set reserved_zone non-collectible to aid zone_gc().
 836          */
 837         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 838         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 839         zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
 840
 841         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 842         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 843         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 844         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 845         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 846         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 847         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 848
 849         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 850         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 851         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 852         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 853         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 854         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 855
 856         /*
 857          * Add the stolen memory to zones, adjust zone size and stolen counts.
 858          * zcram only up to the maximum number of pages for each zone chunk.
 859          */
 860         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 861
 862         const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
 863         for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
 864                 zcram(vm_map_entry_reserved_zone,
 865                                 (vm_offset_t)kentry_data + off,
 866                                 MIN(kentry_data_size - off, stride));
 867         }
 868         for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
 869                 zcram(vm_map_holes_zone,
 870                                 (vm_offset_t)map_holes_data + off,
 871                                 MIN(map_holes_data_size - off, stride));
 872         }
 873
 874         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 875
 876         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 877         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 878         lck_attr_setdefault(&vm_map_lck_attr);
 879
 880         lck_attr_setdefault(&vm_map_lck_rw_attr);
 881         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 882
 883 #if VM_MAP_DEBUG_APPLE_PROTECT
 884         PE_parse_boot_argn("vm_map_debug_apple_protect",
 885                            &vm_map_debug_apple_protect,
 886                            sizeof(vm_map_debug_apple_protect));
 887 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 888 #if VM_MAP_DEBUG_APPLE_FOURK
 889         PE_parse_boot_argn("vm_map_debug_fourk",
 890                            &vm_map_debug_fourk,
 891                            sizeof(vm_map_debug_fourk));
 892 #endif /* VM_MAP_DEBUG_FOURK */
 893         PE_parse_boot_argn("vm_map_executable_immutable",
 894                            &vm_map_executable_immutable,
 895                            sizeof(vm_map_executable_immutable));
 896         PE_parse_boot_argn("vm_map_executable_immutable_no_log",
 897                            &vm_map_executable_immutable_no_log,
 898                            sizeof(vm_map_executable_immutable_no_log));
 899 }
 900
 901 void
 902 vm_map_steal_memory(
 903         void)
 904 {
 905         uint32_t kentry_initial_pages;
 906
 907         map_data_size = round_page(10 * sizeof(struct _vm_map));
 908         map_data = pmap_steal_memory(map_data_size);
 909
 910         /*
 911          * kentry_initial_pages corresponds to the number of kernel map entries
 912          * required during bootstrap until the asynchronous replenishment
 913          * scheme is activated and/or entries are available from the general
 914          * map entry pool.
 915          */
 916 #if     defined(__LP64__)
 917         kentry_initial_pages = 10;
 918 #else
 919         kentry_initial_pages = 6;
 920 #endif
 921
 922 #if CONFIG_GZALLOC
 923         /* If using the guard allocator, reserve more memory for the kernel
 924          * reserved map entry pool.
 925         */
 926         if (gzalloc_enabled())
 927                 kentry_initial_pages *= 1024;
 928 #endif
 929
 930         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 931         kentry_data = pmap_steal_memory(kentry_data_size);
 932
 933         map_holes_data_size = kentry_data_size;
 934         map_holes_data = pmap_steal_memory(map_holes_data_size);
 935 }
 936
 937 boolean_t vm_map_supports_hole_optimization = FALSE;
 938
 939 void
 940 vm_kernel_reserved_entry_init(void) {
 941         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 942
 943         /*
 944          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
 945          */
 946         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
 947         vm_map_supports_hole_optimization = TRUE;
 948 }
 949
 950 void
 951 vm_map_disable_hole_optimization(vm_map_t map)
 952 {
 953         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
 954
 955         if (map->holelistenabled) {
 956
 957                 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
 958
 959                 while (hole_entry != NULL) {
 960
 961                         next_hole_entry = hole_entry->vme_next;
 962
 963                         hole_entry->vme_next = NULL;
 964                         hole_entry->vme_prev = NULL;
 965                         zfree(vm_map_holes_zone, hole_entry);
 966
 967                         if (next_hole_entry == head_entry) {
 968                                 hole_entry = NULL;
 969                         } else {
 970                                 hole_entry = next_hole_entry;
 971                         }
 972                 }
 973
 974                 map->holes_list = NULL;
 975                 map->holelistenabled = FALSE;
 976
 977                 map->first_free = vm_map_first_entry(map);
 978                 SAVE_HINT_HOLE_WRITE(map, NULL);
 979         }
 980 }
 981
 982 boolean_t
 983 vm_kernel_map_is_kernel(vm_map_t map) {
 984         return (map->pmap == kernel_pmap);
 985 }
 986
 987 /*
 988  *      vm_map_create:
 989  *
 990  *      Creates and returns a new empty VM map with
 991  *      the given physical map structure, and having
 992  *      the given lower and upper address bounds.
 993  */
 994
 995 vm_map_t
 996 vm_map_create(
 997         pmap_t                  pmap,
 998         vm_map_offset_t min,
 999         vm_map_offset_t max,
1000         boolean_t               pageable)
1001 {
1002         static int              color_seed = 0;
1003         vm_map_t        result;
1004         struct vm_map_links     *hole_entry = NULL;
1005
1006         result = (vm_map_t) zalloc(vm_map_zone);
1007         if (result == VM_MAP_NULL)
1008                 panic("vm_map_create");
1009
1010         vm_map_first_entry(result) = vm_map_to_entry(result);
1011         vm_map_last_entry(result)  = vm_map_to_entry(result);
1012         result->hdr.nentries = 0;
1013         result->hdr.entries_pageable = pageable;
1014
1015         vm_map_store_init( &(result->hdr) );
1016
1017         result->hdr.page_shift = PAGE_SHIFT;
1018
1019         result->size = 0;
1020         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1021         result->user_wire_size  = 0;
1022 #if __x86_64__
1023         result->vmmap_high_start = 0;
1024 #endif /* __x86_64__ */
1025         result->ref_count = 1;
1026 #if     TASK_SWAPPER
1027         result->res_count = 1;
1028         result->sw_state = MAP_SW_IN;
1029 #endif  /* TASK_SWAPPER */
1030         result->pmap = pmap;
1031         result->min_offset = min;
1032         result->max_offset = max;
1033         result->wiring_required = FALSE;
1034         result->no_zero_fill = FALSE;
1035         result->mapped_in_other_pmaps = FALSE;
1036         result->wait_for_space = FALSE;
1037         result->switch_protect = FALSE;
1038         result->disable_vmentry_reuse = FALSE;
1039         result->map_disallow_data_exec = FALSE;
1040         result->is_nested_map = FALSE;
1041         result->map_disallow_new_exec = FALSE;
1042         result->highest_entry_end = 0;
1043         result->first_free = vm_map_to_entry(result);
1044         result->hint = vm_map_to_entry(result);
1045         result->color_rr = (color_seed++) & vm_color_mask;
1046         result->jit_entry_exists = FALSE;
1047
1048         if (vm_map_supports_hole_optimization) {
1049                 hole_entry = zalloc(vm_map_holes_zone);
1050
1051                 hole_entry->start = min;
1052 #if defined(__arm__) || defined(__arm64__)
1053                 hole_entry->end = result->max_offset;
1054 #else
1055                 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1056 #endif
1057                 result->holes_list = result->hole_hint = hole_entry;
1058                 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
1059                 result->holelistenabled = TRUE;
1060
1061         } else {
1062
1063                 result->holelistenabled = FALSE;
1064         }
1065
1066         vm_map_lock_init(result);
1067         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1068
1069         return(result);
1070 }
1071
1072 /*
1073  *      vm_map_entry_create:    [ internal use only ]
1074  *
1075  *      Allocates a VM map entry for insertion in the
1076  *      given map (or map copy).  No fields are filled.
1077  */
1078 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1079
1080 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1081         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1082 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1083
1084 static vm_map_entry_t
1085 _vm_map_entry_create(
1086         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1087 {
1088         zone_t  zone;
1089         vm_map_entry_t  entry;
1090
1091         zone = vm_map_entry_zone;
1092
1093         assert(map_header->entries_pageable ? !map_locked : TRUE);
1094
1095         if (map_header->entries_pageable) {
1096                 entry = (vm_map_entry_t) zalloc(zone);
1097         }
1098         else {
1099                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1100
1101                 if (entry == VM_MAP_ENTRY_NULL) {
1102                         zone = vm_map_entry_reserved_zone;
1103                         entry = (vm_map_entry_t) zalloc(zone);
1104                         OSAddAtomic(1, &reserved_zalloc_count);
1105                 } else
1106                         OSAddAtomic(1, &nonreserved_zalloc_count);
1107         }
1108
1109         if (entry == VM_MAP_ENTRY_NULL)
1110                 panic("vm_map_entry_create");
1111         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1112
1113         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1114 #if     MAP_ENTRY_CREATION_DEBUG
1115         entry->vme_creation_maphdr = map_header;
1116         backtrace(&entry->vme_creation_bt[0],
1117                   (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1118 #endif
1119         return(entry);
1120 }
1121
1122 /*
1123  *      vm_map_entry_dispose:   [ internal use only ]
1124  *
1125  *      Inverse of vm_map_entry_create.
1126  *
1127  *      write map lock held so no need to
1128  *      do anything special to insure correctness
1129  *      of the stores
1130  */
1131 #define vm_map_entry_dispose(map, entry)                        \
1132         _vm_map_entry_dispose(&(map)->hdr, (entry))
1133
1134 #define vm_map_copy_entry_dispose(map, entry) \
1135         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1136
1137 static void
1138 _vm_map_entry_dispose(
1139         struct vm_map_header    *map_header,
1140         vm_map_entry_t          entry)
1141 {
1142         zone_t          zone;
1143
1144         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1145                 zone = vm_map_entry_zone;
1146         else
1147                 zone = vm_map_entry_reserved_zone;
1148
1149         if (!map_header->entries_pageable) {
1150                 if (zone == vm_map_entry_zone)
1151                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1152                 else
1153                         OSAddAtomic(-1, &reserved_zalloc_count);
1154         }
1155
1156         zfree(zone, entry);
1157 }
1158
1159 #if MACH_ASSERT
1160 static boolean_t first_free_check = FALSE;
1161 boolean_t
1162 first_free_is_valid(
1163         vm_map_t        map)
1164 {
1165         if (!first_free_check)
1166                 return TRUE;
1167
1168         return( first_free_is_valid_store( map ));
1169 }
1170 #endif /* MACH_ASSERT */
1171
1172
1173 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1174         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1175
1176 #define vm_map_copy_entry_unlink(copy, entry)                           \
1177         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1178
1179 #if     MACH_ASSERT && TASK_SWAPPER
1180 /*
1181  *      vm_map_res_reference:
1182  *
1183  *      Adds another valid residence count to the given map.
1184  *
1185  *      Map is locked so this function can be called from
1186  *      vm_map_swapin.
1187  *
1188  */
1189 void vm_map_res_reference(vm_map_t map)
1190 {
1191         /* assert map is locked */
1192         assert(map->res_count >= 0);
1193         assert(map->ref_count >= map->res_count);
1194         if (map->res_count == 0) {
1195                 lck_mtx_unlock(&map->s_lock);
1196                 vm_map_lock(map);
1197                 vm_map_swapin(map);
1198                 lck_mtx_lock(&map->s_lock);
1199                 ++map->res_count;
1200                 vm_map_unlock(map);
1201         } else
1202                 ++map->res_count;
1203 }
1204
1205 /*
1206  *      vm_map_reference_swap:
1207  *
1208  *      Adds valid reference and residence counts to the given map.
1209  *
1210  *      The map may not be in memory (i.e. zero residence count).
1211  *
1212  */
1213 void vm_map_reference_swap(vm_map_t map)
1214 {
1215         assert(map != VM_MAP_NULL);
1216         lck_mtx_lock(&map->s_lock);
1217         assert(map->res_count >= 0);
1218         assert(map->ref_count >= map->res_count);
1219         map->ref_count++;
1220         vm_map_res_reference(map);
1221         lck_mtx_unlock(&map->s_lock);
1222 }
1223
1224 /*
1225  *      vm_map_res_deallocate:
1226  *
1227  *      Decrement residence count on a map; possibly causing swapout.
1228  *
1229  *      The map must be in memory (i.e. non-zero residence count).
1230  *
1231  *      The map is locked, so this function is callable from vm_map_deallocate.
1232  *
1233  */
1234 void vm_map_res_deallocate(vm_map_t map)
1235 {
1236         assert(map->res_count > 0);
1237         if (--map->res_count == 0) {
1238                 lck_mtx_unlock(&map->s_lock);
1239                 vm_map_lock(map);
1240                 vm_map_swapout(map);
1241                 vm_map_unlock(map);
1242                 lck_mtx_lock(&map->s_lock);
1243         }
1244         assert(map->ref_count >= map->res_count);
1245 }
1246 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1247
1248 /*
1249  *      vm_map_destroy:
1250  *
1251  *      Actually destroy a map.
1252  */
1253 void
1254 vm_map_destroy(
1255         vm_map_t        map,
1256         int             flags)
1257 {
1258         vm_map_lock(map);
1259
1260         /* final cleanup: no need to unnest shared region */
1261         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1262         /* final cleanup: ok to remove immutable mappings */
1263         flags |= VM_MAP_REMOVE_IMMUTABLE;
1264
1265         /* clean up regular map entries */
1266         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1267                              flags, VM_MAP_NULL);
1268         /* clean up leftover special mappings (commpage, etc...) */
1269 #if     !defined(__arm__) && !defined(__arm64__)
1270         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1271                              flags, VM_MAP_NULL);
1272 #endif /* !__arm__ && !__arm64__ */
1273
1274         vm_map_disable_hole_optimization(map);
1275         vm_map_unlock(map);
1276
1277         assert(map->hdr.nentries == 0);
1278
1279         if(map->pmap)
1280                 pmap_destroy(map->pmap);
1281
1282         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1283                 /*
1284                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1285                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1286                  * structure or kalloc'ed via lck_mtx_init.
1287                  * An example is s_lock_ext within struct _vm_map.
1288                  *
1289                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1290                  * can add another tag to detect embedded vs alloc'ed indirect external
1291                  * mutexes but that'll be additional checks in the lock path and require
1292                  * updating dependencies for the old vs new tag.
1293                  *
1294                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1295                  * just when lock debugging is ON, we choose to forego explicitly destroying
1296                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1297                  * count on vm_map_lck_grp, which has no serious side-effect.
1298                  */
1299         } else {
1300                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1301                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1302         }
1303
1304         zfree(vm_map_zone, map);
1305 }
1306
1307 /*
1308  * Returns pid of the task with the largest number of VM map entries.
1309  * Used in the zone-map-exhaustion jetsam path.
1310  */
1311 pid_t
1312 find_largest_process_vm_map_entries(void)
1313 {
1314         pid_t victim_pid = -1;
1315         int max_vm_map_entries = 0;
1316         task_t task = TASK_NULL;
1317         queue_head_t *task_list = &tasks;
1318
1319         lck_mtx_lock(&tasks_threads_lock);
1320         queue_iterate(task_list, task, task_t, tasks) {
1321                 if (task == kernel_task || !task->active)
1322                         continue;
1323
1324                 vm_map_t task_map = task->map;
1325                 if (task_map != VM_MAP_NULL) {
1326                         int task_vm_map_entries = task_map->hdr.nentries;
1327                         if (task_vm_map_entries > max_vm_map_entries) {
1328                                 max_vm_map_entries = task_vm_map_entries;
1329                                 victim_pid = pid_from_task(task);
1330                         }
1331                 }
1332         }
1333         lck_mtx_unlock(&tasks_threads_lock);
1334
1335         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1336         return victim_pid;
1337 }
1338
1339 #if     TASK_SWAPPER
1340 /*
1341  * vm_map_swapin/vm_map_swapout
1342  *
1343  * Swap a map in and out, either referencing or releasing its resources.
1344  * These functions are internal use only; however, they must be exported
1345  * because they may be called from macros, which are exported.
1346  *
1347  * In the case of swapout, there could be races on the residence count,
1348  * so if the residence count is up, we return, assuming that a
1349  * vm_map_deallocate() call in the near future will bring us back.
1350  *
1351  * Locking:
1352  *      -- We use the map write lock for synchronization among races.
1353  *      -- The map write lock, and not the simple s_lock, protects the
1354  *         swap state of the map.
1355  *      -- If a map entry is a share map, then we hold both locks, in
1356  *         hierarchical order.
1357  *
1358  * Synchronization Notes:
1359  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1360  *      will block on the map lock and proceed when swapout is through.
1361  *      2) A vm_map_reference() call at this time is illegal, and will
1362  *      cause a panic.  vm_map_reference() is only allowed on resident
1363  *      maps, since it refuses to block.
1364  *      3) A vm_map_swapin() call during a swapin will block, and
1365  *      proceeed when the first swapin is done, turning into a nop.
1366  *      This is the reason the res_count is not incremented until
1367  *      after the swapin is complete.
1368  *      4) There is a timing hole after the checks of the res_count, before
1369  *      the map lock is taken, during which a swapin may get the lock
1370  *      before a swapout about to happen.  If this happens, the swapin
1371  *      will detect the state and increment the reference count, causing
1372  *      the swapout to be a nop, thereby delaying it until a later
1373  *      vm_map_deallocate.  If the swapout gets the lock first, then
1374  *      the swapin will simply block until the swapout is done, and
1375  *      then proceed.
1376  *
1377  * Because vm_map_swapin() is potentially an expensive operation, it
1378  * should be used with caution.
1379  *
1380  * Invariants:
1381  *      1) A map with a residence count of zero is either swapped, or
1382  *         being swapped.
1383  *      2) A map with a non-zero residence count is either resident,
1384  *         or being swapped in.
1385  */
1386
1387 int vm_map_swap_enable = 1;
1388
1389 void vm_map_swapin (vm_map_t map)
1390 {
1391         vm_map_entry_t entry;
1392
1393         if (!vm_map_swap_enable)        /* debug */
1394                 return;
1395
1396         /*
1397          * Map is locked
1398          * First deal with various races.
1399          */
1400         if (map->sw_state == MAP_SW_IN)
1401                 /*
1402                  * we raced with swapout and won.  Returning will incr.
1403                  * the res_count, turning the swapout into a nop.
1404                  */
1405                 return;
1406
1407         /*
1408          * The residence count must be zero.  If we raced with another
1409          * swapin, the state would have been IN; if we raced with a
1410          * swapout (after another competing swapin), we must have lost
1411          * the race to get here (see above comment), in which case
1412          * res_count is still 0.
1413          */
1414         assert(map->res_count == 0);
1415
1416         /*
1417          * There are no intermediate states of a map going out or
1418          * coming in, since the map is locked during the transition.
1419          */
1420         assert(map->sw_state == MAP_SW_OUT);
1421
1422         /*
1423          * We now operate upon each map entry.  If the entry is a sub-
1424          * or share-map, we call vm_map_res_reference upon it.
1425          * If the entry is an object, we call vm_object_res_reference
1426          * (this may iterate through the shadow chain).
1427          * Note that we hold the map locked the entire time,
1428          * even if we get back here via a recursive call in
1429          * vm_map_res_reference.
1430          */
1431         entry = vm_map_first_entry(map);
1432
1433         while (entry != vm_map_to_entry(map)) {
1434                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1435                         if (entry->is_sub_map) {
1436                                 vm_map_t lmap = VME_SUBMAP(entry);
1437                                 lck_mtx_lock(&lmap->s_lock);
1438                                 vm_map_res_reference(lmap);
1439                                 lck_mtx_unlock(&lmap->s_lock);
1440                         } else {
1441                                 vm_object_t object = VME_OBEJCT(entry);
1442                                 vm_object_lock(object);
1443                                 /*
1444                                  * This call may iterate through the
1445                                  * shadow chain.
1446                                  */
1447                                 vm_object_res_reference(object);
1448                                 vm_object_unlock(object);
1449                         }
1450                 }
1451                 entry = entry->vme_next;
1452         }
1453         assert(map->sw_state == MAP_SW_OUT);
1454         map->sw_state = MAP_SW_IN;
1455 }
1456
1457 void vm_map_swapout(vm_map_t map)
1458 {
1459         vm_map_entry_t entry;
1460
1461         /*
1462          * Map is locked
1463          * First deal with various races.
1464          * If we raced with a swapin and lost, the residence count
1465          * will have been incremented to 1, and we simply return.
1466          */
1467         lck_mtx_lock(&map->s_lock);
1468         if (map->res_count != 0) {
1469                 lck_mtx_unlock(&map->s_lock);
1470                 return;
1471         }
1472         lck_mtx_unlock(&map->s_lock);
1473
1474         /*
1475          * There are no intermediate states of a map going out or
1476          * coming in, since the map is locked during the transition.
1477          */
1478         assert(map->sw_state == MAP_SW_IN);
1479
1480         if (!vm_map_swap_enable)
1481                 return;
1482
1483         /*
1484          * We now operate upon each map entry.  If the entry is a sub-
1485          * or share-map, we call vm_map_res_deallocate upon it.
1486          * If the entry is an object, we call vm_object_res_deallocate
1487          * (this may iterate through the shadow chain).
1488          * Note that we hold the map locked the entire time,
1489          * even if we get back here via a recursive call in
1490          * vm_map_res_deallocate.
1491          */
1492         entry = vm_map_first_entry(map);
1493
1494         while (entry != vm_map_to_entry(map)) {
1495                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1496                         if (entry->is_sub_map) {
1497                                 vm_map_t lmap = VME_SUBMAP(entry);
1498                                 lck_mtx_lock(&lmap->s_lock);
1499                                 vm_map_res_deallocate(lmap);
1500                                 lck_mtx_unlock(&lmap->s_lock);
1501                         } else {
1502                                 vm_object_t object = VME_OBJECT(entry);
1503                                 vm_object_lock(object);
1504                                 /*
1505                                  * This call may take a long time,
1506                                  * since it could actively push
1507                                  * out pages (if we implement it
1508                                  * that way).
1509                                  */
1510                                 vm_object_res_deallocate(object);
1511                                 vm_object_unlock(object);
1512                         }
1513                 }
1514                 entry = entry->vme_next;
1515         }
1516         assert(map->sw_state == MAP_SW_IN);
1517         map->sw_state = MAP_SW_OUT;
1518 }
1519
1520 #endif  /* TASK_SWAPPER */
1521
1522 /*
1523  *      vm_map_lookup_entry:    [ internal use only ]
1524  *
1525  *      Calls into the vm map store layer to find the map
1526  *      entry containing (or immediately preceding) the
1527  *      specified address in the given map; the entry is returned
1528  *      in the "entry" parameter.  The boolean
1529  *      result indicates whether the address is
1530  *      actually contained in the map.
1531  */
1532 boolean_t
1533 vm_map_lookup_entry(
1534         vm_map_t                map,
1535         vm_map_offset_t address,
1536         vm_map_entry_t          *entry)         /* OUT */
1537 {
1538         return ( vm_map_store_lookup_entry( map, address, entry ));
1539 }
1540
1541 /*
1542  *      Routine:        vm_map_find_space
1543  *      Purpose:
1544  *              Allocate a range in the specified virtual address map,
1545  *              returning the entry allocated for that range.
1546  *              Used by kmem_alloc, etc.
1547  *
1548  *              The map must be NOT be locked. It will be returned locked
1549  *              on KERN_SUCCESS, unlocked on failure.
1550  *
1551  *              If an entry is allocated, the object/offset fields
1552  *              are initialized to zero.
1553  */
1554 kern_return_t
1555 vm_map_find_space(
1556         vm_map_t        map,
1557         vm_map_offset_t         *address,       /* OUT */
1558         vm_map_size_t           size,
1559         vm_map_offset_t         mask,
1560         int                     flags __unused,
1561         vm_map_kernel_flags_t   vmk_flags,
1562         vm_tag_t                tag,
1563         vm_map_entry_t          *o_entry)       /* OUT */
1564 {
1565         vm_map_entry_t                  entry, new_entry;
1566         vm_map_offset_t start;
1567         vm_map_offset_t end;
1568         vm_map_entry_t                  hole_entry;
1569
1570         if (size == 0) {
1571                 *address = 0;
1572                 return KERN_INVALID_ARGUMENT;
1573         }
1574
1575         if (vmk_flags.vmkf_guard_after) {
1576                 /* account for the back guard page in the size */
1577                 size += VM_MAP_PAGE_SIZE(map);
1578         }
1579
1580         new_entry = vm_map_entry_create(map, FALSE);
1581
1582         /*
1583          *      Look for the first possible address; if there's already
1584          *      something at this address, we have to start after it.
1585          */
1586
1587         vm_map_lock(map);
1588
1589         if( map->disable_vmentry_reuse == TRUE) {
1590                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1591         } else {
1592                 if (map->holelistenabled) {
1593                         hole_entry = (vm_map_entry_t)map->holes_list;
1594
1595                         if (hole_entry == NULL) {
1596                                 /*
1597                                  * No more space in the map?
1598                                  */
1599                                 vm_map_entry_dispose(map, new_entry);
1600                                 vm_map_unlock(map);
1601                                 return(KERN_NO_SPACE);
1602                         }
1603
1604                         entry = hole_entry;
1605                         start = entry->vme_start;
1606                 } else {
1607                         assert(first_free_is_valid(map));
1608                         if ((entry = map->first_free) == vm_map_to_entry(map))
1609                                 start = map->min_offset;
1610                         else
1611                                 start = entry->vme_end;
1612                 }
1613         }
1614
1615         /*
1616          *      In any case, the "entry" always precedes
1617          *      the proposed new region throughout the loop:
1618          */
1619
1620         while (TRUE) {
1621                 vm_map_entry_t  next;
1622
1623                 /*
1624                  *      Find the end of the proposed new region.
1625                  *      Be sure we didn't go beyond the end, or
1626                  *      wrap around the address.
1627                  */
1628
1629                 if (vmk_flags.vmkf_guard_before) {
1630                         /* reserve space for the front guard page */
1631                         start += VM_MAP_PAGE_SIZE(map);
1632                 }
1633                 end = ((start + mask) & ~mask);
1634
1635                 if (end < start) {
1636                         vm_map_entry_dispose(map, new_entry);
1637                         vm_map_unlock(map);
1638                         return(KERN_NO_SPACE);
1639                 }
1640                 start = end;
1641                 end += size;
1642
1643                 if ((end > map->max_offset) || (end < start)) {
1644                         vm_map_entry_dispose(map, new_entry);
1645                         vm_map_unlock(map);
1646                         return(KERN_NO_SPACE);
1647                 }
1648
1649                 next = entry->vme_next;
1650
1651                 if (map->holelistenabled) {
1652                         if (entry->vme_end >= end)
1653                                 break;
1654                 } else {
1655                         /*
1656                          *      If there are no more entries, we must win.
1657                          *
1658                          *      OR
1659                          *
1660                          *      If there is another entry, it must be
1661                          *      after the end of the potential new region.
1662                          */
1663
1664                         if (next == vm_map_to_entry(map))
1665                                 break;
1666
1667                         if (next->vme_start >= end)
1668                                 break;
1669                 }
1670
1671                 /*
1672                  *      Didn't fit -- move to the next entry.
1673                  */
1674
1675                 entry = next;
1676
1677                 if (map->holelistenabled) {
1678                         if (entry == (vm_map_entry_t) map->holes_list) {
1679                                 /*
1680                                  * Wrapped around
1681                                  */
1682                                 vm_map_entry_dispose(map, new_entry);
1683                                 vm_map_unlock(map);
1684                                 return(KERN_NO_SPACE);
1685                         }
1686                         start = entry->vme_start;
1687                 } else {
1688                         start = entry->vme_end;
1689                 }
1690         }
1691
1692         if (map->holelistenabled) {
1693                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1694                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1695                 }
1696         }
1697
1698         /*
1699          *      At this point,
1700          *              "start" and "end" should define the endpoints of the
1701          *                      available new range, and
1702          *              "entry" should refer to the region before the new
1703          *                      range, and
1704          *
1705          *              the map should be locked.
1706          */
1707
1708         if (vmk_flags.vmkf_guard_before) {
1709                 /* go back for the front guard page */
1710                 start -= VM_MAP_PAGE_SIZE(map);
1711         }
1712         *address = start;
1713
1714         assert(start < end);
1715         new_entry->vme_start = start;
1716         new_entry->vme_end = end;
1717         assert(page_aligned(new_entry->vme_start));
1718         assert(page_aligned(new_entry->vme_end));
1719         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1720                                    VM_MAP_PAGE_MASK(map)));
1721         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1722                                    VM_MAP_PAGE_MASK(map)));
1723
1724         new_entry->is_shared = FALSE;
1725         new_entry->is_sub_map = FALSE;
1726         new_entry->use_pmap = TRUE;
1727         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1728         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1729
1730         new_entry->needs_copy = FALSE;
1731
1732         new_entry->inheritance = VM_INHERIT_DEFAULT;
1733         new_entry->protection = VM_PROT_DEFAULT;
1734         new_entry->max_protection = VM_PROT_ALL;
1735         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1736         new_entry->wired_count = 0;
1737         new_entry->user_wired_count = 0;
1738
1739         new_entry->in_transition = FALSE;
1740         new_entry->needs_wakeup = FALSE;
1741         new_entry->no_cache = FALSE;
1742         new_entry->permanent = FALSE;
1743         new_entry->superpage_size = FALSE;
1744         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1745                 new_entry->map_aligned = TRUE;
1746         } else {
1747                 new_entry->map_aligned = FALSE;
1748         }
1749
1750         new_entry->used_for_jit = FALSE;
1751         new_entry->zero_wired_pages = FALSE;
1752         new_entry->iokit_acct = FALSE;
1753         new_entry->vme_resilient_codesign = FALSE;
1754         new_entry->vme_resilient_media = FALSE;
1755         if (vmk_flags.vmkf_atomic_entry)
1756                 new_entry->vme_atomic = TRUE;
1757         else
1758                 new_entry->vme_atomic = FALSE;
1759
1760         VME_ALIAS_SET(new_entry, tag);
1761
1762         /*
1763          *      Insert the new entry into the list
1764          */
1765
1766         vm_map_store_entry_link(map, entry, new_entry);
1767
1768         map->size += size;
1769
1770         /*
1771          *      Update the lookup hint
1772          */
1773         SAVE_HINT_MAP_WRITE(map, new_entry);
1774
1775         *o_entry = new_entry;
1776         return(KERN_SUCCESS);
1777 }
1778
1779 int vm_map_pmap_enter_print = FALSE;
1780 int vm_map_pmap_enter_enable = FALSE;
1781
1782 /*
1783  *      Routine:        vm_map_pmap_enter [internal only]
1784  *
1785  *      Description:
1786  *              Force pages from the specified object to be entered into
1787  *              the pmap at the specified address if they are present.
1788  *              As soon as a page not found in the object the scan ends.
1789  *
1790  *      Returns:
1791  *              Nothing.
1792  *
1793  *      In/out conditions:
1794  *              The source map should not be locked on entry.
1795  */
1796 __unused static void
1797 vm_map_pmap_enter(
1798         vm_map_t                map,
1799         vm_map_offset_t         addr,
1800         vm_map_offset_t         end_addr,
1801         vm_object_t             object,
1802         vm_object_offset_t      offset,
1803         vm_prot_t               protection)
1804 {
1805         int                     type_of_fault;
1806         kern_return_t           kr;
1807
1808         if(map->pmap == 0)
1809                 return;
1810
1811         while (addr < end_addr) {
1812                 vm_page_t       m;
1813
1814
1815                 /*
1816                  * TODO:
1817                  * From vm_map_enter(), we come into this function without the map
1818                  * lock held or the object lock held.
1819                  * We haven't taken a reference on the object either.
1820                  * We should do a proper lookup on the map to make sure
1821                  * that things are sane before we go locking objects that
1822                  * could have been deallocated from under us.
1823                  */
1824
1825                 vm_object_lock(object);
1826
1827                 m = vm_page_lookup(object, offset);
1828
1829                 if (m == VM_PAGE_NULL || m->busy || m->fictitious ||
1830                     (m->unusual && ( m->error || m->restart || m->absent))) {
1831                         vm_object_unlock(object);
1832                         return;
1833                 }
1834
1835                 if (vm_map_pmap_enter_print) {
1836                         printf("vm_map_pmap_enter:");
1837                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1838                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1839                 }
1840                 type_of_fault = DBG_CACHE_HIT_FAULT;
1841                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1842                                                     VM_PAGE_WIRED(m),
1843                                                     FALSE, /* change_wiring */
1844                                                     VM_KERN_MEMORY_NONE, /* tag - not wiring */
1845                                                     FALSE, /* no_cache */
1846                                                     FALSE, /* cs_bypass */
1847                                                     0,     /* XXX need user tag / alias? */
1848                                                     0,     /* pmap_options */
1849                                                     NULL,  /* need_retry */
1850                                                     &type_of_fault);
1851
1852                 vm_object_unlock(object);
1853
1854                 offset += PAGE_SIZE_64;
1855                 addr += PAGE_SIZE;
1856         }
1857 }
1858
1859 boolean_t vm_map_pmap_is_empty(
1860         vm_map_t        map,
1861         vm_map_offset_t start,
1862         vm_map_offset_t end);
1863 boolean_t vm_map_pmap_is_empty(
1864         vm_map_t        map,
1865         vm_map_offset_t start,
1866         vm_map_offset_t end)
1867 {
1868 #ifdef MACHINE_PMAP_IS_EMPTY
1869         return pmap_is_empty(map->pmap, start, end);
1870 #else   /* MACHINE_PMAP_IS_EMPTY */
1871         vm_map_offset_t offset;
1872         ppnum_t         phys_page;
1873
1874         if (map->pmap == NULL) {
1875                 return TRUE;
1876         }
1877
1878         for (offset = start;
1879              offset < end;
1880              offset += PAGE_SIZE) {
1881                 phys_page = pmap_find_phys(map->pmap, offset);
1882                 if (phys_page) {
1883                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1884                                 "page %d at 0x%llx\n",
1885                                 map, (long long)start, (long long)end,
1886                                 phys_page, (long long)offset);
1887                         return FALSE;
1888                 }
1889         }
1890         return TRUE;
1891 #endif  /* MACHINE_PMAP_IS_EMPTY */
1892 }
1893
1894 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1895 kern_return_t
1896 vm_map_random_address_for_size(
1897         vm_map_t        map,
1898         vm_map_offset_t *address,
1899         vm_map_size_t   size)
1900 {
1901         kern_return_t   kr = KERN_SUCCESS;
1902         int             tries = 0;
1903         vm_map_offset_t random_addr = 0;
1904         vm_map_offset_t hole_end;
1905
1906         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1907         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1908         vm_map_size_t   vm_hole_size = 0;
1909         vm_map_size_t   addr_space_size;
1910
1911         addr_space_size = vm_map_max(map) - vm_map_min(map);
1912
1913         assert(page_aligned(size));
1914
1915         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1916                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1917                 random_addr = vm_map_trunc_page(
1918                         vm_map_min(map) +(random_addr % addr_space_size),
1919                         VM_MAP_PAGE_MASK(map));
1920
1921                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1922                         if (prev_entry == vm_map_to_entry(map)) {
1923                                 next_entry = vm_map_first_entry(map);
1924                         } else {
1925                                 next_entry = prev_entry->vme_next;
1926                         }
1927                         if (next_entry == vm_map_to_entry(map)) {
1928                                 hole_end = vm_map_max(map);
1929                         } else {
1930                                 hole_end = next_entry->vme_start;
1931                         }
1932                         vm_hole_size = hole_end - random_addr;
1933                         if (vm_hole_size >= size) {
1934                                 *address = random_addr;
1935                                 break;
1936                         }
1937                 }
1938                 tries++;
1939         }
1940
1941         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1942                 kr = KERN_NO_SPACE;
1943         }
1944         return kr;
1945 }
1946
1947 /*
1948  *      Routine:        vm_map_enter
1949  *
1950  *      Description:
1951  *              Allocate a range in the specified virtual address map.
1952  *              The resulting range will refer to memory defined by
1953  *              the given memory object and offset into that object.
1954  *
1955  *              Arguments are as defined in the vm_map call.
1956  */
1957 int _map_enter_debug = 0;
1958 static unsigned int vm_map_enter_restore_successes = 0;
1959 static unsigned int vm_map_enter_restore_failures = 0;
1960 kern_return_t
1961 vm_map_enter(
1962         vm_map_t                map,
1963         vm_map_offset_t         *address,       /* IN/OUT */
1964         vm_map_size_t           size,
1965         vm_map_offset_t         mask,
1966         int                     flags,
1967         vm_map_kernel_flags_t   vmk_flags,
1968         vm_tag_t                alias,
1969         vm_object_t             object,
1970         vm_object_offset_t      offset,
1971         boolean_t               needs_copy,
1972         vm_prot_t               cur_protection,
1973         vm_prot_t               max_protection,
1974         vm_inherit_t            inheritance)
1975 {
1976         vm_map_entry_t          entry, new_entry;
1977         vm_map_offset_t         start, tmp_start, tmp_offset;
1978         vm_map_offset_t         end, tmp_end;
1979         vm_map_offset_t         tmp2_start, tmp2_end;
1980         vm_map_offset_t         step;
1981         kern_return_t           result = KERN_SUCCESS;
1982         vm_map_t                zap_old_map = VM_MAP_NULL;
1983         vm_map_t                zap_new_map = VM_MAP_NULL;
1984         boolean_t               map_locked = FALSE;
1985         boolean_t               pmap_empty = TRUE;
1986         boolean_t               new_mapping_established = FALSE;
1987         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
1988         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1989         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1990         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1991         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1992         boolean_t               is_submap = vmk_flags.vmkf_submap;
1993         boolean_t               permanent = vmk_flags.vmkf_permanent;
1994         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
1995         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
1996         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1997         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1998         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
1999         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2000         vm_tag_t                user_alias;
2001         vm_map_offset_t         effective_min_offset, effective_max_offset;
2002         kern_return_t           kr;
2003         boolean_t               clear_map_aligned = FALSE;
2004         vm_map_entry_t          hole_entry;
2005         vm_map_size_t           chunk_size = 0;
2006
2007         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2008
2009         if (flags & VM_FLAGS_4GB_CHUNK) {
2010 #if defined(__LP64__)
2011                 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2012 #else /* __LP64__ */
2013                 chunk_size = ANON_CHUNK_SIZE;
2014 #endif /* __LP64__ */
2015         } else {
2016                 chunk_size = ANON_CHUNK_SIZE;
2017         }
2018
2019         if (superpage_size) {
2020                 switch (superpage_size) {
2021                         /*
2022                          * Note that the current implementation only supports
2023                          * a single size for superpages, SUPERPAGE_SIZE, per
2024                          * architecture. As soon as more sizes are supposed
2025                          * to be supported, SUPERPAGE_SIZE has to be replaced
2026                          * with a lookup of the size depending on superpage_size.
2027                          */
2028 #ifdef __x86_64__
2029                         case SUPERPAGE_SIZE_ANY:
2030                                 /* handle it like 2 MB and round up to page size */
2031                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
2032                         case SUPERPAGE_SIZE_2MB:
2033                                 break;
2034 #endif
2035                         default:
2036                                 return KERN_INVALID_ARGUMENT;
2037                 }
2038                 mask = SUPERPAGE_SIZE-1;
2039                 if (size & (SUPERPAGE_SIZE-1))
2040                         return KERN_INVALID_ARGUMENT;
2041                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2042         }
2043
2044
2045 #if CONFIG_EMBEDDED
2046         if (cur_protection & VM_PROT_WRITE){
2047                 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
2048                         printf("EMBEDDED: %s: curprot cannot be write+execute. "
2049                                "turning off execute\n",
2050                                __FUNCTION__);
2051                         cur_protection &= ~VM_PROT_EXECUTE;
2052                 }
2053         }
2054 #endif /* CONFIG_EMBEDDED */
2055
2056         /*
2057          * If the task has requested executable lockdown,
2058          * deny any new executable mapping.
2059          */
2060         if (map->map_disallow_new_exec == TRUE) {
2061                 if (cur_protection & VM_PROT_EXECUTE) {
2062                         return KERN_PROTECTION_FAILURE;
2063                 }
2064         }
2065
2066         if (resilient_codesign || resilient_media) {
2067                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2068                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2069                         return KERN_PROTECTION_FAILURE;
2070                 }
2071         }
2072
2073         if (is_submap) {
2074                 if (purgable) {
2075                         /* submaps can not be purgeable */
2076                         return KERN_INVALID_ARGUMENT;
2077                 }
2078                 if (object == VM_OBJECT_NULL) {
2079                         /* submaps can not be created lazily */
2080                         return KERN_INVALID_ARGUMENT;
2081                 }
2082         }
2083         if (vmk_flags.vmkf_already) {
2084                 /*
2085                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2086                  * is already present.  For it to be meaningul, the requested
2087                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2088                  * we shouldn't try and remove what was mapped there first
2089                  * (!VM_FLAGS_OVERWRITE).
2090                  */
2091                 if ((flags & VM_FLAGS_ANYWHERE) ||
2092                     (flags & VM_FLAGS_OVERWRITE)) {
2093                         return KERN_INVALID_ARGUMENT;
2094                 }
2095         }
2096
2097         effective_min_offset = map->min_offset;
2098
2099         if (vmk_flags.vmkf_beyond_max) {
2100                 /*
2101                  * Allow an insertion beyond the map's max offset.
2102                  */
2103 #if     !defined(__arm__) && !defined(__arm64__)
2104                 if (vm_map_is_64bit(map))
2105                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2106                 else
2107 #endif  /* __arm__ */
2108                         effective_max_offset = 0x00000000FFFFF000ULL;
2109         } else {
2110                 effective_max_offset = map->max_offset;
2111         }
2112
2113         if (size == 0 ||
2114             (offset & PAGE_MASK_64) != 0) {
2115                 *address = 0;
2116                 return KERN_INVALID_ARGUMENT;
2117         }
2118
2119         if (map->pmap == kernel_pmap) {
2120                 user_alias = VM_KERN_MEMORY_NONE;
2121         } else {
2122                 user_alias = alias;
2123         }
2124
2125 #define RETURN(value)   { result = value; goto BailOut; }
2126
2127         assert(page_aligned(*address));
2128         assert(page_aligned(size));
2129
2130         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2131                 /*
2132                  * In most cases, the caller rounds the size up to the
2133                  * map's page size.
2134                  * If we get a size that is explicitly not map-aligned here,
2135                  * we'll have to respect the caller's wish and mark the
2136                  * mapping as "not map-aligned" to avoid tripping the
2137                  * map alignment checks later.
2138                  */
2139                 clear_map_aligned = TRUE;
2140         }
2141         if (!anywhere &&
2142             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2143                 /*
2144                  * We've been asked to map at a fixed address and that
2145                  * address is not aligned to the map's specific alignment.
2146                  * The caller should know what it's doing (i.e. most likely
2147                  * mapping some fragmented copy map, transferring memory from
2148                  * a VM map with a different alignment), so clear map_aligned
2149                  * for this new VM map entry and proceed.
2150                  */
2151                 clear_map_aligned = TRUE;
2152         }
2153
2154         /*
2155          * Only zero-fill objects are allowed to be purgable.
2156          * LP64todo - limit purgable objects to 32-bits for now
2157          */
2158         if (purgable &&
2159             (offset != 0 ||
2160              (object != VM_OBJECT_NULL &&
2161               (object->vo_size != size ||
2162                object->purgable == VM_PURGABLE_DENY))
2163              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2164                 return KERN_INVALID_ARGUMENT;
2165
2166         if (!anywhere && overwrite) {
2167                 /*
2168                  * Create a temporary VM map to hold the old mappings in the
2169                  * affected area while we create the new one.
2170                  * This avoids releasing the VM map lock in
2171                  * vm_map_entry_delete() and allows atomicity
2172                  * when we want to replace some mappings with a new one.
2173                  * It also allows us to restore the old VM mappings if the
2174                  * new mapping fails.
2175                  */
2176                 zap_old_map = vm_map_create(PMAP_NULL,
2177                                             *address,
2178                                             *address + size,
2179                                             map->hdr.entries_pageable);
2180                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2181                 vm_map_disable_hole_optimization(zap_old_map);
2182         }
2183
2184 StartAgain: ;
2185
2186         start = *address;
2187
2188         if (anywhere) {
2189                 vm_map_lock(map);
2190                 map_locked = TRUE;
2191
2192                 if (entry_for_jit) {
2193                         if (map->jit_entry_exists) {
2194                                 result = KERN_INVALID_ARGUMENT;
2195                                 goto BailOut;
2196                         }
2197                         random_address = TRUE;
2198                 }
2199
2200                 if (random_address) {
2201                         /*
2202                          * Get a random start address.
2203                          */
2204                         result = vm_map_random_address_for_size(map, address, size);
2205                         if (result != KERN_SUCCESS) {
2206                                 goto BailOut;
2207                         }
2208                         start = *address;
2209                 }
2210 #if __x86_64__
2211                 else if ((start == 0 || start == vm_map_min(map)) &&
2212                          !map->disable_vmentry_reuse &&
2213                          map->vmmap_high_start != 0) {
2214                         start = map->vmmap_high_start;
2215                 }
2216 #endif /* __x86_64__ */
2217
2218
2219                 /*
2220                  *      Calculate the first possible address.
2221                  */
2222
2223                 if (start < effective_min_offset)
2224                         start = effective_min_offset;
2225                 if (start > effective_max_offset)
2226                         RETURN(KERN_NO_SPACE);
2227
2228                 /*
2229                  *      Look for the first possible address;
2230                  *      if there's already something at this
2231                  *      address, we have to start after it.
2232                  */
2233
2234                 if( map->disable_vmentry_reuse == TRUE) {
2235                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2236                 } else {
2237
2238                         if (map->holelistenabled) {
2239                                 hole_entry = (vm_map_entry_t)map->holes_list;
2240
2241                                 if (hole_entry == NULL) {
2242                                         /*
2243                                          * No more space in the map?
2244                                          */
2245                                         result = KERN_NO_SPACE;
2246                                         goto BailOut;
2247                                 } else {
2248
2249                                         boolean_t found_hole = FALSE;
2250
2251                                         do {
2252                                                 if (hole_entry->vme_start >= start) {
2253                                                         start = hole_entry->vme_start;
2254                                                         found_hole = TRUE;
2255                                                         break;
2256                                                 }
2257
2258                                                 if (hole_entry->vme_end > start) {
2259                                                         found_hole = TRUE;
2260                                                         break;
2261                                                 }
2262                                                 hole_entry = hole_entry->vme_next;
2263
2264                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
2265
2266                                         if (found_hole == FALSE) {
2267                                                 result = KERN_NO_SPACE;
2268                                                 goto BailOut;
2269                                         }
2270
2271                                         entry = hole_entry;
2272
2273                                         if (start == 0)
2274                                                 start += PAGE_SIZE_64;
2275                                 }
2276                         } else {
2277                                 assert(first_free_is_valid(map));
2278
2279                                 entry = map->first_free;
2280
2281                                 if (entry == vm_map_to_entry(map)) {
2282                                         entry = NULL;
2283                                 } else {
2284                                        if (entry->vme_next == vm_map_to_entry(map)){
2285                                                /*
2286                                                 * Hole at the end of the map.
2287                                                 */
2288                                                 entry = NULL;
2289                                        } else {
2290                                                 if (start < (entry->vme_next)->vme_start ) {
2291                                                         start = entry->vme_end;
2292                                                         start = vm_map_round_page(start,
2293                                                                                   VM_MAP_PAGE_MASK(map));
2294                                                 } else {
2295                                                         /*
2296                                                          * Need to do a lookup.
2297                                                          */
2298                                                         entry = NULL;
2299                                                 }
2300                                        }
2301                                 }
2302
2303                                 if (entry == NULL) {
2304                                         vm_map_entry_t  tmp_entry;
2305                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2306                                                 assert(!entry_for_jit);
2307                                                 start = tmp_entry->vme_end;
2308                                                 start = vm_map_round_page(start,
2309                                                                           VM_MAP_PAGE_MASK(map));
2310                                         }
2311                                         entry = tmp_entry;
2312                                 }
2313                         }
2314                 }
2315
2316                 /*
2317                  *      In any case, the "entry" always precedes
2318                  *      the proposed new region throughout the
2319                  *      loop:
2320                  */
2321
2322                 while (TRUE) {
2323                         vm_map_entry_t  next;
2324
2325                         /*
2326                          *      Find the end of the proposed new region.
2327                          *      Be sure we didn't go beyond the end, or
2328                          *      wrap around the address.
2329                          */
2330
2331                         end = ((start + mask) & ~mask);
2332                         end = vm_map_round_page(end,
2333                                                 VM_MAP_PAGE_MASK(map));
2334                         if (end < start)
2335                                 RETURN(KERN_NO_SPACE);
2336                         start = end;
2337                         assert(VM_MAP_PAGE_ALIGNED(start,
2338                                                    VM_MAP_PAGE_MASK(map)));
2339                         end += size;
2340
2341                         if ((end > effective_max_offset) || (end < start)) {
2342                                 if (map->wait_for_space) {
2343                                         assert(!keep_map_locked);
2344                                         if (size <= (effective_max_offset -
2345                                                      effective_min_offset)) {
2346                                                 assert_wait((event_t)map,
2347                                                             THREAD_ABORTSAFE);
2348                                                 vm_map_unlock(map);
2349                                                 map_locked = FALSE;
2350                                                 thread_block(THREAD_CONTINUE_NULL);
2351                                                 goto StartAgain;
2352                                         }
2353                                 }
2354                                 RETURN(KERN_NO_SPACE);
2355                         }
2356
2357                         next = entry->vme_next;
2358
2359                         if (map->holelistenabled) {
2360                                 if (entry->vme_end >= end)
2361                                         break;
2362                         } else {
2363                                 /*
2364                                  *      If there are no more entries, we must win.
2365                                  *
2366                                  *      OR
2367                                  *
2368                                  *      If there is another entry, it must be
2369                                  *      after the end of the potential new region.
2370                                  */
2371
2372                                 if (next == vm_map_to_entry(map))
2373                                         break;
2374
2375                                 if (next->vme_start >= end)
2376                                         break;
2377                         }
2378
2379                         /*
2380                          *      Didn't fit -- move to the next entry.
2381                          */
2382
2383                         entry = next;
2384
2385                         if (map->holelistenabled) {
2386                                 if (entry == (vm_map_entry_t) map->holes_list) {
2387                                         /*
2388                                          * Wrapped around
2389                                          */
2390                                         result = KERN_NO_SPACE;
2391                                         goto BailOut;
2392                                 }
2393                                 start = entry->vme_start;
2394                         } else {
2395                                 start = entry->vme_end;
2396                         }
2397
2398                         start = vm_map_round_page(start,
2399                                                   VM_MAP_PAGE_MASK(map));
2400                 }
2401
2402                 if (map->holelistenabled) {
2403                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2404                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2405                         }
2406                 }
2407
2408                 *address = start;
2409                 assert(VM_MAP_PAGE_ALIGNED(*address,
2410                                            VM_MAP_PAGE_MASK(map)));
2411         } else {
2412                 /*
2413                  *      Verify that:
2414                  *              the address doesn't itself violate
2415                  *              the mask requirement.
2416                  */
2417
2418                 vm_map_lock(map);
2419                 map_locked = TRUE;
2420                 if ((start & mask) != 0)
2421                         RETURN(KERN_NO_SPACE);
2422
2423                 /*
2424                  *      ...     the address is within bounds
2425                  */
2426
2427                 end = start + size;
2428
2429                 if ((start < effective_min_offset) ||
2430                     (end > effective_max_offset) ||
2431                     (start >= end)) {
2432                         RETURN(KERN_INVALID_ADDRESS);
2433                 }
2434
2435                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2436                         int remove_flags;
2437                         /*
2438                          * Fixed mapping and "overwrite" flag: attempt to
2439                          * remove all existing mappings in the specified
2440                          * address range, saving them in our "zap_old_map".
2441                          */
2442                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2443                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2444                         if (vmk_flags.vmkf_overwrite_immutable) {
2445                                 /* we can overwrite immutable mappings */
2446                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2447                         }
2448                         (void) vm_map_delete(map, start, end,
2449                                              remove_flags,
2450                                              zap_old_map);
2451                 }
2452
2453                 /*
2454                  *      ...     the starting address isn't allocated
2455                  */
2456
2457                 if (vm_map_lookup_entry(map, start, &entry)) {
2458                         if (! (vmk_flags.vmkf_already)) {
2459                                 RETURN(KERN_NO_SPACE);
2460                         }
2461                         /*
2462                          * Check if what's already there is what we want.
2463                          */
2464                         tmp_start = start;
2465                         tmp_offset = offset;
2466                         if (entry->vme_start < start) {
2467                                 tmp_start -= start - entry->vme_start;
2468                                 tmp_offset -= start - entry->vme_start;
2469
2470                         }
2471                         for (; entry->vme_start < end;
2472                              entry = entry->vme_next) {
2473                                 /*
2474                                  * Check if the mapping's attributes
2475                                  * match the existing map entry.
2476                                  */
2477                                 if (entry == vm_map_to_entry(map) ||
2478                                     entry->vme_start != tmp_start ||
2479                                     entry->is_sub_map != is_submap ||
2480                                     VME_OFFSET(entry) != tmp_offset ||
2481                                     entry->needs_copy != needs_copy ||
2482                                     entry->protection != cur_protection ||
2483                                     entry->max_protection != max_protection ||
2484                                     entry->inheritance != inheritance ||
2485                                     entry->iokit_acct != iokit_acct ||
2486                                     VME_ALIAS(entry) != alias) {
2487                                         /* not the same mapping ! */
2488                                         RETURN(KERN_NO_SPACE);
2489                                 }
2490                                 /*
2491                                  * Check if the same object is being mapped.
2492                                  */
2493                                 if (is_submap) {
2494                                         if (VME_SUBMAP(entry) !=
2495                                             (vm_map_t) object) {
2496                                                 /* not the same submap */
2497                                                 RETURN(KERN_NO_SPACE);
2498                                         }
2499                                 } else {
2500                                         if (VME_OBJECT(entry) != object) {
2501                                                 /* not the same VM object... */
2502                                                 vm_object_t obj2;
2503
2504                                                 obj2 = VME_OBJECT(entry);
2505                                                 if ((obj2 == VM_OBJECT_NULL ||
2506                                                      obj2->internal) &&
2507                                                     (object == VM_OBJECT_NULL ||
2508                                                      object->internal)) {
2509                                                         /*
2510                                                          * ... but both are
2511                                                          * anonymous memory,
2512                                                          * so equivalent.
2513                                                          */
2514                                                 } else {
2515                                                         RETURN(KERN_NO_SPACE);
2516                                                 }
2517                                         }
2518                                 }
2519
2520                                 tmp_offset += entry->vme_end - entry->vme_start;
2521                                 tmp_start += entry->vme_end - entry->vme_start;
2522                                 if (entry->vme_end >= end) {
2523                                         /* reached the end of our mapping */
2524                                         break;
2525                                 }
2526                         }
2527                         /* it all matches:  let's use what's already there ! */
2528                         RETURN(KERN_MEMORY_PRESENT);
2529                 }
2530
2531                 /*
2532                  *      ...     the next region doesn't overlap the
2533                  *              end point.
2534                  */
2535
2536                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2537                     (entry->vme_next->vme_start < end))
2538                         RETURN(KERN_NO_SPACE);
2539         }
2540
2541         /*
2542          *      At this point,
2543          *              "start" and "end" should define the endpoints of the
2544          *                      available new range, and
2545          *              "entry" should refer to the region before the new
2546          *                      range, and
2547          *
2548          *              the map should be locked.
2549          */
2550
2551         /*
2552          *      See whether we can avoid creating a new entry (and object) by
2553          *      extending one of our neighbors.  [So far, we only attempt to
2554          *      extend from below.]  Note that we can never extend/join
2555          *      purgable objects because they need to remain distinct
2556          *      entities in order to implement their "volatile object"
2557          *      semantics.
2558          */
2559
2560         if (purgable || entry_for_jit) {
2561                 if (object == VM_OBJECT_NULL) {
2562
2563                         object = vm_object_allocate(size);
2564                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2565                         object->true_share = TRUE;
2566                         if (purgable) {
2567                                 task_t owner;
2568                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2569                                 if (map->pmap == kernel_pmap) {
2570                                         /*
2571                                          * Purgeable mappings made in a kernel
2572                                          * map are "owned" by the kernel itself
2573                                          * rather than the current user task
2574                                          * because they're likely to be used by
2575                                          * more than this user task (see
2576                                          * execargs_purgeable_allocate(), for
2577                                          * example).
2578                                          */
2579                                         owner = kernel_task;
2580                                 } else {
2581                                         owner = current_task();
2582                                 }
2583                                 assert(object->vo_purgeable_owner == NULL);
2584                                 assert(object->resident_page_count == 0);
2585                                 assert(object->wired_page_count == 0);
2586                                 vm_object_lock(object);
2587                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2588                                 vm_object_unlock(object);
2589                         }
2590                         offset = (vm_object_offset_t)0;
2591                 }
2592         } else if ((is_submap == FALSE) &&
2593                    (object == VM_OBJECT_NULL) &&
2594                    (entry != vm_map_to_entry(map)) &&
2595                    (entry->vme_end == start) &&
2596                    (!entry->is_shared) &&
2597                    (!entry->is_sub_map) &&
2598                    (!entry->in_transition) &&
2599                    (!entry->needs_wakeup) &&
2600                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2601                    (entry->protection == cur_protection) &&
2602                    (entry->max_protection == max_protection) &&
2603                    (entry->inheritance == inheritance) &&
2604                    ((user_alias == VM_MEMORY_REALLOC) ||
2605                     (VME_ALIAS(entry) == alias)) &&
2606                    (entry->no_cache == no_cache) &&
2607                    (entry->permanent == permanent) &&
2608                    /* no coalescing for immutable executable mappings */
2609                    !((entry->protection & VM_PROT_EXECUTE) &&
2610                      entry->permanent) &&
2611                    (!entry->superpage_size && !superpage_size) &&
2612                    /*
2613                     * No coalescing if not map-aligned, to avoid propagating
2614                     * that condition any further than needed:
2615                     */
2616                    (!entry->map_aligned || !clear_map_aligned) &&
2617                    (!entry->zero_wired_pages) &&
2618                    (!entry->used_for_jit && !entry_for_jit) &&
2619                    (entry->iokit_acct == iokit_acct) &&
2620                    (!entry->vme_resilient_codesign) &&
2621                    (!entry->vme_resilient_media) &&
2622                    (!entry->vme_atomic) &&
2623
2624                    ((entry->vme_end - entry->vme_start) + size <=
2625                     (user_alias == VM_MEMORY_REALLOC ?
2626                      ANON_CHUNK_SIZE :
2627                      NO_COALESCE_LIMIT)) &&
2628
2629                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2630                 if (vm_object_coalesce(VME_OBJECT(entry),
2631                                        VM_OBJECT_NULL,
2632                                        VME_OFFSET(entry),
2633                                        (vm_object_offset_t) 0,
2634                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2635                                        (vm_map_size_t)(end - entry->vme_end))) {
2636
2637                         /*
2638                          *      Coalesced the two objects - can extend
2639                          *      the previous map entry to include the
2640                          *      new range.
2641                          */
2642                         map->size += (end - entry->vme_end);
2643                         assert(entry->vme_start < end);
2644                         assert(VM_MAP_PAGE_ALIGNED(end,
2645                                                    VM_MAP_PAGE_MASK(map)));
2646                         if (__improbable(vm_debug_events))
2647                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2648                         entry->vme_end = end;
2649                         if (map->holelistenabled) {
2650                                 vm_map_store_update_first_free(map, entry, TRUE);
2651                         } else {
2652                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2653                         }
2654                         new_mapping_established = TRUE;
2655                         RETURN(KERN_SUCCESS);
2656                 }
2657         }
2658
2659         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2660         new_entry = NULL;
2661
2662         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2663                 tmp2_end = tmp2_start + step;
2664                 /*
2665                  *      Create a new entry
2666                  *
2667                  * XXX FBDP
2668                  * The reserved "page zero" in each process's address space can
2669                  * be arbitrarily large.  Splitting it into separate objects and
2670                  * therefore different VM map entries serves no purpose and just
2671                  * slows down operations on the VM map, so let's not split the
2672                  * allocation into chunks if the max protection is NONE.  That
2673                  * memory should never be accessible, so it will never get to the
2674                  * default pager.
2675                  */
2676                 tmp_start = tmp2_start;
2677                 if (object == VM_OBJECT_NULL &&
2678                     size > chunk_size &&
2679                     max_protection != VM_PROT_NONE &&
2680                     superpage_size == 0)
2681                         tmp_end = tmp_start + chunk_size;
2682                 else
2683                         tmp_end = tmp2_end;
2684                 do {
2685                         new_entry = vm_map_entry_insert(
2686                                 map, entry, tmp_start, tmp_end,
2687                                 object, offset, needs_copy,
2688                                 FALSE, FALSE,
2689                                 cur_protection, max_protection,
2690                                 VM_BEHAVIOR_DEFAULT,
2691                                 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2692                                 0,
2693                                 no_cache,
2694                                 permanent,
2695                                 superpage_size,
2696                                 clear_map_aligned,
2697                                 is_submap,
2698                                 entry_for_jit,
2699                                 alias);
2700
2701                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2702
2703                         if (resilient_codesign &&
2704                             ! ((cur_protection | max_protection) &
2705                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2706                                 new_entry->vme_resilient_codesign = TRUE;
2707                         }
2708
2709                         if (resilient_media &&
2710                             ! ((cur_protection | max_protection) &
2711                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2712                                 new_entry->vme_resilient_media = TRUE;
2713                         }
2714
2715                         assert(!new_entry->iokit_acct);
2716                         if (!is_submap &&
2717                             object != VM_OBJECT_NULL &&
2718                             object->purgable != VM_PURGABLE_DENY) {
2719                                 assert(new_entry->use_pmap);
2720                                 assert(!new_entry->iokit_acct);
2721                                 /*
2722                                  * Turn off pmap accounting since
2723                                  * purgeable objects have their
2724                                  * own ledgers.
2725                                  */
2726                                 new_entry->use_pmap = FALSE;
2727                         } else if (!is_submap &&
2728                                    iokit_acct &&
2729                                    object != VM_OBJECT_NULL &&
2730                                    object->internal) {
2731                                 /* alternate accounting */
2732                                 assert(!new_entry->iokit_acct);
2733                                 assert(new_entry->use_pmap);
2734                                 new_entry->iokit_acct = TRUE;
2735                                 new_entry->use_pmap = FALSE;
2736                                 DTRACE_VM4(
2737                                         vm_map_iokit_mapped_region,
2738                                         vm_map_t, map,
2739                                         vm_map_offset_t, new_entry->vme_start,
2740                                         vm_map_offset_t, new_entry->vme_end,
2741                                         int, VME_ALIAS(new_entry));
2742                                 vm_map_iokit_mapped_region(
2743                                         map,
2744                                         (new_entry->vme_end -
2745                                          new_entry->vme_start));
2746                         } else if (!is_submap) {
2747                                 assert(!new_entry->iokit_acct);
2748                                 assert(new_entry->use_pmap);
2749                         }
2750
2751                         if (is_submap) {
2752                                 vm_map_t        submap;
2753                                 boolean_t       submap_is_64bit;
2754                                 boolean_t       use_pmap;
2755
2756                                 assert(new_entry->is_sub_map);
2757                                 assert(!new_entry->use_pmap);
2758                                 assert(!new_entry->iokit_acct);
2759                                 submap = (vm_map_t) object;
2760                                 submap_is_64bit = vm_map_is_64bit(submap);
2761                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2762 #ifndef NO_NESTED_PMAP
2763                                 if (use_pmap && submap->pmap == NULL) {
2764                                         ledger_t ledger = map->pmap->ledger;
2765                                         /* we need a sub pmap to nest... */
2766                                         submap->pmap = pmap_create(ledger, 0,
2767                                             submap_is_64bit);
2768                                         if (submap->pmap == NULL) {
2769                                                 /* let's proceed without nesting... */
2770                                         }
2771 #if     defined(__arm__) || defined(__arm64__)
2772                                         else {
2773                                                 pmap_set_nested(submap->pmap);
2774                                         }
2775 #endif
2776                                 }
2777                                 if (use_pmap && submap->pmap != NULL) {
2778                                         kr = pmap_nest(map->pmap,
2779                                                        submap->pmap,
2780                                                        tmp_start,
2781                                                        tmp_start,
2782                                                        tmp_end - tmp_start);
2783                                         if (kr != KERN_SUCCESS) {
2784                                                 printf("vm_map_enter: "
2785                                                        "pmap_nest(0x%llx,0x%llx) "
2786                                                        "error 0x%x\n",
2787                                                        (long long)tmp_start,
2788                                                        (long long)tmp_end,
2789                                                        kr);
2790                                         } else {
2791                                                 /* we're now nested ! */
2792                                                 new_entry->use_pmap = TRUE;
2793                                                 pmap_empty = FALSE;
2794                                         }
2795                                 }
2796 #endif /* NO_NESTED_PMAP */
2797                         }
2798                         entry = new_entry;
2799
2800                         if (superpage_size) {
2801                                 vm_page_t pages, m;
2802                                 vm_object_t sp_object;
2803                                 vm_object_offset_t sp_offset;
2804
2805                                 VME_OFFSET_SET(entry, 0);
2806
2807                                 /* allocate one superpage */
2808                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2809                                 if (kr != KERN_SUCCESS) {
2810                                         /* deallocate whole range... */
2811                                         new_mapping_established = TRUE;
2812                                         /* ... but only up to "tmp_end" */
2813                                         size -= end - tmp_end;
2814                                         RETURN(kr);
2815                                 }
2816
2817                                 /* create one vm_object per superpage */
2818                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2819                                 sp_object->phys_contiguous = TRUE;
2820                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2821                                 VME_OBJECT_SET(entry, sp_object);
2822                                 assert(entry->use_pmap);
2823
2824                                 /* enter the base pages into the object */
2825                                 vm_object_lock(sp_object);
2826                                 for (sp_offset = 0;
2827                                      sp_offset < SUPERPAGE_SIZE;
2828                                      sp_offset += PAGE_SIZE) {
2829                                         m = pages;
2830                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2831                                         pages = NEXT_PAGE(m);
2832                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2833                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2834                                 }
2835                                 vm_object_unlock(sp_object);
2836                         }
2837                 } while (tmp_end != tmp2_end &&
2838                          (tmp_start = tmp_end) &&
2839                          (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
2840                           tmp_end + chunk_size : tmp2_end));
2841         }
2842
2843         new_mapping_established = TRUE;
2844
2845 BailOut:
2846         assert(map_locked == TRUE);
2847
2848         if (result == KERN_SUCCESS) {
2849                 vm_prot_t pager_prot;
2850                 memory_object_t pager;
2851
2852 #if DEBUG
2853                 if (pmap_empty &&
2854                     !(vmk_flags.vmkf_no_pmap_check)) {
2855                         assert(vm_map_pmap_is_empty(map,
2856                                                     *address,
2857                                                     *address+size));
2858                 }
2859 #endif /* DEBUG */
2860
2861                 /*
2862                  * For "named" VM objects, let the pager know that the
2863                  * memory object is being mapped.  Some pagers need to keep
2864                  * track of this, to know when they can reclaim the memory
2865                  * object, for example.
2866                  * VM calls memory_object_map() for each mapping (specifying
2867                  * the protection of each mapping) and calls
2868                  * memory_object_last_unmap() when all the mappings are gone.
2869                  */
2870                 pager_prot = max_protection;
2871                 if (needs_copy) {
2872                         /*
2873                          * Copy-On-Write mapping: won't modify
2874                          * the memory object.
2875                          */
2876                         pager_prot &= ~VM_PROT_WRITE;
2877                 }
2878                 if (!is_submap &&
2879                     object != VM_OBJECT_NULL &&
2880                     object->named &&
2881                     object->pager != MEMORY_OBJECT_NULL) {
2882                         vm_object_lock(object);
2883                         pager = object->pager;
2884                         if (object->named &&
2885                             pager != MEMORY_OBJECT_NULL) {
2886                                 assert(object->pager_ready);
2887                                 vm_object_mapping_wait(object, THREAD_UNINT);
2888                                 vm_object_mapping_begin(object);
2889                                 vm_object_unlock(object);
2890
2891                                 kr = memory_object_map(pager, pager_prot);
2892                                 assert(kr == KERN_SUCCESS);
2893
2894                                 vm_object_lock(object);
2895                                 vm_object_mapping_end(object);
2896                         }
2897                         vm_object_unlock(object);
2898                 }
2899         }
2900
2901         assert(map_locked == TRUE);
2902
2903         if (!keep_map_locked) {
2904                 vm_map_unlock(map);
2905                 map_locked = FALSE;
2906         }
2907
2908         /*
2909          * We can't hold the map lock if we enter this block.
2910          */
2911
2912         if (result == KERN_SUCCESS) {
2913
2914                 /*      Wire down the new entry if the user
2915                  *      requested all new map entries be wired.
2916                  */
2917                 if ((map->wiring_required)||(superpage_size)) {
2918                         assert(!keep_map_locked);
2919                         pmap_empty = FALSE; /* pmap won't be empty */
2920                         kr = vm_map_wire_kernel(map, start, end,
2921                                              new_entry->protection, VM_KERN_MEMORY_MLOCK,
2922                                              TRUE);
2923                         result = kr;
2924                 }
2925
2926         }
2927
2928         if (result != KERN_SUCCESS) {
2929                 if (new_mapping_established) {
2930                         /*
2931                          * We have to get rid of the new mappings since we
2932                          * won't make them available to the user.
2933                          * Try and do that atomically, to minimize the risk
2934                          * that someone else create new mappings that range.
2935                          */
2936                         zap_new_map = vm_map_create(PMAP_NULL,
2937                                                     *address,
2938                                                     *address + size,
2939                                                     map->hdr.entries_pageable);
2940                         vm_map_set_page_shift(zap_new_map,
2941                                               VM_MAP_PAGE_SHIFT(map));
2942                         vm_map_disable_hole_optimization(zap_new_map);
2943
2944                         if (!map_locked) {
2945                                 vm_map_lock(map);
2946                                 map_locked = TRUE;
2947                         }
2948                         (void) vm_map_delete(map, *address, *address+size,
2949                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2950                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2951                                              zap_new_map);
2952                 }
2953                 if (zap_old_map != VM_MAP_NULL &&
2954                     zap_old_map->hdr.nentries != 0) {
2955                         vm_map_entry_t  entry1, entry2;
2956
2957                         /*
2958                          * The new mapping failed.  Attempt to restore
2959                          * the old mappings, saved in the "zap_old_map".
2960                          */
2961                         if (!map_locked) {
2962                                 vm_map_lock(map);
2963                                 map_locked = TRUE;
2964                         }
2965
2966                         /* first check if the coast is still clear */
2967                         start = vm_map_first_entry(zap_old_map)->vme_start;
2968                         end = vm_map_last_entry(zap_old_map)->vme_end;
2969                         if (vm_map_lookup_entry(map, start, &entry1) ||
2970                             vm_map_lookup_entry(map, end, &entry2) ||
2971                             entry1 != entry2) {
2972                                 /*
2973                                  * Part of that range has already been
2974                                  * re-mapped:  we can't restore the old
2975                                  * mappings...
2976                                  */
2977                                 vm_map_enter_restore_failures++;
2978                         } else {
2979                                 /*
2980                                  * Transfer the saved map entries from
2981                                  * "zap_old_map" to the original "map",
2982                                  * inserting them all after "entry1".
2983                                  */
2984                                 for (entry2 = vm_map_first_entry(zap_old_map);
2985                                      entry2 != vm_map_to_entry(zap_old_map);
2986                                      entry2 = vm_map_first_entry(zap_old_map)) {
2987                                         vm_map_size_t entry_size;
2988
2989                                         entry_size = (entry2->vme_end -
2990                                                       entry2->vme_start);
2991                                         vm_map_store_entry_unlink(zap_old_map,
2992                                                             entry2);
2993                                         zap_old_map->size -= entry_size;
2994                                         vm_map_store_entry_link(map, entry1, entry2);
2995                                         map->size += entry_size;
2996                                         entry1 = entry2;
2997                                 }
2998                                 if (map->wiring_required) {
2999                                         /*
3000                                          * XXX TODO: we should rewire the
3001                                          * old pages here...
3002                                          */
3003                                 }
3004                                 vm_map_enter_restore_successes++;
3005                         }
3006                 }
3007         }
3008
3009         /*
3010          * The caller is responsible for releasing the lock if it requested to
3011          * keep the map locked.
3012          */
3013         if (map_locked && !keep_map_locked) {
3014                 vm_map_unlock(map);
3015         }
3016
3017         /*
3018          * Get rid of the "zap_maps" and all the map entries that
3019          * they may still contain.
3020          */
3021         if (zap_old_map != VM_MAP_NULL) {
3022                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3023                 zap_old_map = VM_MAP_NULL;
3024         }
3025         if (zap_new_map != VM_MAP_NULL) {
3026                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3027                 zap_new_map = VM_MAP_NULL;
3028         }
3029
3030         return result;
3031
3032 #undef  RETURN
3033 }
3034
3035 #if __arm64__
3036 extern const struct memory_object_pager_ops fourk_pager_ops;
3037 kern_return_t
3038 vm_map_enter_fourk(
3039         vm_map_t                map,
3040         vm_map_offset_t         *address,       /* IN/OUT */
3041         vm_map_size_t           size,
3042         vm_map_offset_t         mask,
3043         int                     flags,
3044         vm_map_kernel_flags_t   vmk_flags,
3045         vm_tag_t                alias,
3046         vm_object_t             object,
3047         vm_object_offset_t      offset,
3048         boolean_t               needs_copy,
3049         vm_prot_t               cur_protection,
3050         vm_prot_t               max_protection,
3051         vm_inherit_t            inheritance)
3052 {
3053         vm_map_entry_t          entry, new_entry;
3054         vm_map_offset_t         start, fourk_start;
3055         vm_map_offset_t         end, fourk_end;
3056         vm_map_size_t           fourk_size;
3057         kern_return_t           result = KERN_SUCCESS;
3058         vm_map_t                zap_old_map = VM_MAP_NULL;
3059         vm_map_t                zap_new_map = VM_MAP_NULL;
3060         boolean_t               map_locked = FALSE;
3061         boolean_t               pmap_empty = TRUE;
3062         boolean_t               new_mapping_established = FALSE;
3063         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3064         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3065         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3066         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3067         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3068         boolean_t               is_submap = vmk_flags.vmkf_submap;
3069         boolean_t               permanent = vmk_flags.vmkf_permanent;
3070         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3071 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3072         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3073         vm_map_offset_t         effective_min_offset, effective_max_offset;
3074         kern_return_t           kr;
3075         boolean_t               clear_map_aligned = FALSE;
3076         memory_object_t         fourk_mem_obj;
3077         vm_object_t             fourk_object;
3078         vm_map_offset_t         fourk_pager_offset;
3079         int                     fourk_pager_index_start, fourk_pager_index_num;
3080         int                     cur_idx;
3081         boolean_t               fourk_copy;
3082         vm_object_t             copy_object;
3083         vm_object_offset_t      copy_offset;
3084
3085         fourk_mem_obj = MEMORY_OBJECT_NULL;
3086         fourk_object = VM_OBJECT_NULL;
3087
3088         if (superpage_size) {
3089                 return KERN_NOT_SUPPORTED;
3090         }
3091
3092 #if CONFIG_EMBEDDED
3093         if (cur_protection & VM_PROT_WRITE) {
3094                 if ((cur_protection & VM_PROT_EXECUTE) &&
3095                     !entry_for_jit) {
3096                         printf("EMBEDDED: %s: curprot cannot be write+execute. "
3097                                "turning off execute\n",
3098                                __FUNCTION__);
3099                         cur_protection &= ~VM_PROT_EXECUTE;
3100                 }
3101         }
3102 #endif /* CONFIG_EMBEDDED */
3103
3104         /*
3105          * If the task has requested executable lockdown,
3106          * deny any new executable mapping.
3107          */
3108         if (map->map_disallow_new_exec == TRUE) {
3109                 if (cur_protection & VM_PROT_EXECUTE) {
3110                         return KERN_PROTECTION_FAILURE;
3111                 }
3112         }
3113
3114         if (is_submap) {
3115                 return KERN_NOT_SUPPORTED;
3116         }
3117         if (vmk_flags.vmkf_already) {
3118                 return KERN_NOT_SUPPORTED;
3119         }
3120         if (purgable || entry_for_jit) {
3121                 return KERN_NOT_SUPPORTED;
3122         }
3123
3124         effective_min_offset = map->min_offset;
3125
3126         if (vmk_flags.vmkf_beyond_max) {
3127                 return KERN_NOT_SUPPORTED;
3128         } else {
3129                 effective_max_offset = map->max_offset;
3130         }
3131
3132         if (size == 0 ||
3133             (offset & FOURK_PAGE_MASK) != 0) {
3134                 *address = 0;
3135                 return KERN_INVALID_ARGUMENT;
3136         }
3137
3138 #define RETURN(value)   { result = value; goto BailOut; }
3139
3140         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3141         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3142
3143         if (!anywhere && overwrite) {
3144                 return KERN_NOT_SUPPORTED;
3145         }
3146         if (!anywhere && overwrite) {
3147                 /*
3148                  * Create a temporary VM map to hold the old mappings in the
3149                  * affected area while we create the new one.
3150                  * This avoids releasing the VM map lock in
3151                  * vm_map_entry_delete() and allows atomicity
3152                  * when we want to replace some mappings with a new one.
3153                  * It also allows us to restore the old VM mappings if the
3154                  * new mapping fails.
3155                  */
3156                 zap_old_map = vm_map_create(PMAP_NULL,
3157                                             *address,
3158                                             *address + size,
3159                                             map->hdr.entries_pageable);
3160                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3161                 vm_map_disable_hole_optimization(zap_old_map);
3162         }
3163
3164         fourk_start = *address;
3165         fourk_size = size;
3166         fourk_end = fourk_start + fourk_size;
3167
3168         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3169         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3170         size = end - start;
3171
3172         if (anywhere) {
3173                 return KERN_NOT_SUPPORTED;
3174         } else {
3175                 /*
3176                  *      Verify that:
3177                  *              the address doesn't itself violate
3178                  *              the mask requirement.
3179                  */
3180
3181                 vm_map_lock(map);
3182                 map_locked = TRUE;
3183                 if ((start & mask) != 0) {
3184                         RETURN(KERN_NO_SPACE);
3185                 }
3186
3187                 /*
3188                  *      ...     the address is within bounds
3189                  */
3190
3191                 end = start + size;
3192
3193                 if ((start < effective_min_offset) ||
3194                     (end > effective_max_offset) ||
3195                     (start >= end)) {
3196                         RETURN(KERN_INVALID_ADDRESS);
3197                 }
3198
3199                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3200                         /*
3201                          * Fixed mapping and "overwrite" flag: attempt to
3202                          * remove all existing mappings in the specified
3203                          * address range, saving them in our "zap_old_map".
3204                          */
3205                         (void) vm_map_delete(map, start, end,
3206                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3207                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3208                                              zap_old_map);
3209                 }
3210
3211                 /*
3212                  *      ...     the starting address isn't allocated
3213                  */
3214                 if (vm_map_lookup_entry(map, start, &entry)) {
3215                         vm_object_t cur_object, shadow_object;
3216
3217                         /*
3218                          * We might already some 4K mappings
3219                          * in a 16K page here.
3220                          */
3221
3222                         if (entry->vme_end - entry->vme_start
3223                             != SIXTEENK_PAGE_SIZE) {
3224                                 RETURN(KERN_NO_SPACE);
3225                         }
3226                         if (entry->is_sub_map) {
3227                                 RETURN(KERN_NO_SPACE);
3228                         }
3229                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3230                                 RETURN(KERN_NO_SPACE);
3231                         }
3232
3233                         /* go all the way down the shadow chain */
3234                         cur_object = VME_OBJECT(entry);
3235                         vm_object_lock(cur_object);
3236                         while (cur_object->shadow != VM_OBJECT_NULL) {
3237                                 shadow_object = cur_object->shadow;
3238                                 vm_object_lock(shadow_object);
3239                                 vm_object_unlock(cur_object);
3240                                 cur_object = shadow_object;
3241                                 shadow_object = VM_OBJECT_NULL;
3242                         }
3243                         if (cur_object->internal ||
3244                             cur_object->pager == NULL) {
3245                                 vm_object_unlock(cur_object);
3246                                 RETURN(KERN_NO_SPACE);
3247                         }
3248                         if (cur_object->pager->mo_pager_ops
3249                             != &fourk_pager_ops) {
3250                                 vm_object_unlock(cur_object);
3251                                 RETURN(KERN_NO_SPACE);
3252                         }
3253                         fourk_object = cur_object;
3254                         fourk_mem_obj = fourk_object->pager;
3255
3256                         /* keep the "4K" object alive */
3257                         vm_object_reference_locked(fourk_object);
3258                         vm_object_unlock(fourk_object);
3259
3260                         /* merge permissions */
3261                         entry->protection |= cur_protection;
3262                         entry->max_protection |= max_protection;
3263                         if ((entry->protection & (VM_PROT_WRITE |
3264                                                   VM_PROT_EXECUTE)) ==
3265                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3266                             fourk_binary_compatibility_unsafe &&
3267                             fourk_binary_compatibility_allow_wx) {
3268                                 /* write+execute: need to be "jit" */
3269                                 entry->used_for_jit = TRUE;
3270                         }
3271
3272                         goto map_in_fourk_pager;
3273                 }
3274
3275                 /*
3276                  *      ...     the next region doesn't overlap the
3277                  *              end point.
3278                  */
3279
3280                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3281                     (entry->vme_next->vme_start < end)) {
3282                         RETURN(KERN_NO_SPACE);
3283                 }
3284         }
3285
3286         /*
3287          *      At this point,
3288          *              "start" and "end" should define the endpoints of the
3289          *                      available new range, and
3290          *              "entry" should refer to the region before the new
3291          *                      range, and
3292          *
3293          *              the map should be locked.
3294          */
3295
3296         /* create a new "4K" pager */
3297         fourk_mem_obj = fourk_pager_create();
3298         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3299         assert(fourk_object);
3300
3301         /* keep the "4" object alive */
3302         vm_object_reference(fourk_object);
3303
3304         /* create a "copy" object, to map the "4K" object copy-on-write */
3305         fourk_copy = TRUE;
3306         result = vm_object_copy_strategically(fourk_object,
3307                                               0,
3308                                               end - start,
3309                                               &copy_object,
3310                                               &copy_offset,
3311                                               &fourk_copy);
3312         assert(result == KERN_SUCCESS);
3313         assert(copy_object != VM_OBJECT_NULL);
3314         assert(copy_offset == 0);
3315
3316         /* take a reference on the copy object, for this mapping */
3317         vm_object_reference(copy_object);
3318
3319         /* map the "4K" pager's copy object */
3320         new_entry =
3321                 vm_map_entry_insert(map, entry,
3322                                     vm_map_trunc_page(start,
3323                                                       VM_MAP_PAGE_MASK(map)),
3324                                     vm_map_round_page(end,
3325                                                       VM_MAP_PAGE_MASK(map)),
3326                                     copy_object,
3327                                     0, /* offset */
3328                                     FALSE, /* needs_copy */
3329                                     FALSE, FALSE,
3330                                     cur_protection, max_protection,
3331                                     VM_BEHAVIOR_DEFAULT,
3332                                     ((entry_for_jit)
3333                                      ? VM_INHERIT_NONE
3334                                      : inheritance),
3335                                     0,
3336                                     no_cache,
3337                                     permanent,
3338                                     superpage_size,
3339                                     clear_map_aligned,
3340                                     is_submap,
3341                                     FALSE, /* jit */
3342                                     alias);
3343         entry = new_entry;
3344
3345 #if VM_MAP_DEBUG_FOURK
3346         if (vm_map_debug_fourk) {
3347                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3348                        map,
3349                        (uint64_t) entry->vme_start,
3350                        (uint64_t) entry->vme_end,
3351                        fourk_mem_obj);
3352         }
3353 #endif /* VM_MAP_DEBUG_FOURK */
3354
3355         new_mapping_established = TRUE;
3356
3357 map_in_fourk_pager:
3358         /* "map" the original "object" where it belongs in the "4K" pager */
3359         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3360         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3361         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3362                 fourk_pager_index_num = 4;
3363         } else {
3364                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3365         }
3366         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3367                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3368         }
3369         for (cur_idx = 0;
3370              cur_idx < fourk_pager_index_num;
3371              cur_idx++) {
3372                 vm_object_t             old_object;
3373                 vm_object_offset_t      old_offset;
3374
3375                 kr = fourk_pager_populate(fourk_mem_obj,
3376                                           TRUE, /* overwrite */
3377                                           fourk_pager_index_start + cur_idx,
3378                                           object,
3379                                           (object
3380                                            ? (offset +
3381                                               (cur_idx * FOURK_PAGE_SIZE))
3382                                            : 0),
3383                                           &old_object,
3384                                           &old_offset);
3385 #if VM_MAP_DEBUG_FOURK
3386                 if (vm_map_debug_fourk) {
3387                         if (old_object == (vm_object_t) -1 &&
3388                             old_offset == (vm_object_offset_t) -1) {
3389                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3390                                        "pager [%p:0x%llx] "
3391                                        "populate[%d] "
3392                                        "[object:%p,offset:0x%llx]\n",
3393                                        map,
3394                                        (uint64_t) entry->vme_start,
3395                                        (uint64_t) entry->vme_end,
3396                                        fourk_mem_obj,
3397                                        VME_OFFSET(entry),
3398                                        fourk_pager_index_start + cur_idx,
3399                                        object,
3400                                        (object
3401                                         ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3402                                         : 0));
3403                         } else {
3404                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3405                                        "pager [%p:0x%llx] "
3406                                        "populate[%d] [object:%p,offset:0x%llx] "
3407                                        "old [%p:0x%llx]\n",
3408                                        map,
3409                                        (uint64_t) entry->vme_start,
3410                                        (uint64_t) entry->vme_end,
3411                                        fourk_mem_obj,
3412                                        VME_OFFSET(entry),
3413                                        fourk_pager_index_start + cur_idx,
3414                                        object,
3415                                        (object
3416                                         ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3417                                         : 0),
3418                                        old_object,
3419                                        old_offset);
3420                         }
3421                 }
3422 #endif /* VM_MAP_DEBUG_FOURK */
3423
3424                 assert(kr == KERN_SUCCESS);
3425                 if (object != old_object &&
3426                     object != VM_OBJECT_NULL &&
3427                     object != (vm_object_t) -1) {
3428                         vm_object_reference(object);
3429                 }
3430                 if (object != old_object &&
3431                     old_object != VM_OBJECT_NULL &&
3432                     old_object != (vm_object_t) -1) {
3433                         vm_object_deallocate(old_object);
3434                 }
3435         }
3436
3437 BailOut:
3438         assert(map_locked == TRUE);
3439
3440         if (fourk_object != VM_OBJECT_NULL) {
3441                 vm_object_deallocate(fourk_object);
3442                 fourk_object = VM_OBJECT_NULL;
3443                 fourk_mem_obj = MEMORY_OBJECT_NULL;
3444         }
3445
3446         if (result == KERN_SUCCESS) {
3447                 vm_prot_t pager_prot;
3448                 memory_object_t pager;
3449
3450 #if DEBUG
3451                 if (pmap_empty &&
3452                     !(vmk_flags.vmkf_no_pmap_check)) {
3453                         assert(vm_map_pmap_is_empty(map,
3454                                                     *address,
3455                                                     *address+size));
3456                 }
3457 #endif /* DEBUG */
3458
3459                 /*
3460                  * For "named" VM objects, let the pager know that the
3461                  * memory object is being mapped.  Some pagers need to keep
3462                  * track of this, to know when they can reclaim the memory
3463                  * object, for example.
3464                  * VM calls memory_object_map() for each mapping (specifying
3465                  * the protection of each mapping) and calls
3466                  * memory_object_last_unmap() when all the mappings are gone.
3467                  */
3468                 pager_prot = max_protection;
3469                 if (needs_copy) {
3470                         /*
3471                          * Copy-On-Write mapping: won't modify
3472                          * the memory object.
3473                          */
3474                         pager_prot &= ~VM_PROT_WRITE;
3475                 }
3476                 if (!is_submap &&
3477                     object != VM_OBJECT_NULL &&
3478                     object->named &&
3479                     object->pager != MEMORY_OBJECT_NULL) {
3480                         vm_object_lock(object);
3481                         pager = object->pager;
3482                         if (object->named &&
3483                             pager != MEMORY_OBJECT_NULL) {
3484                                 assert(object->pager_ready);
3485                                 vm_object_mapping_wait(object, THREAD_UNINT);
3486                                 vm_object_mapping_begin(object);
3487                                 vm_object_unlock(object);
3488
3489                                 kr = memory_object_map(pager, pager_prot);
3490                                 assert(kr == KERN_SUCCESS);
3491
3492                                 vm_object_lock(object);
3493                                 vm_object_mapping_end(object);
3494                         }
3495                         vm_object_unlock(object);
3496                 }
3497                 if (!is_submap &&
3498                     fourk_object != VM_OBJECT_NULL &&
3499                     fourk_object->named &&
3500                     fourk_object->pager != MEMORY_OBJECT_NULL) {
3501                         vm_object_lock(fourk_object);
3502                         pager = fourk_object->pager;
3503                         if (fourk_object->named &&
3504                             pager != MEMORY_OBJECT_NULL) {
3505                                 assert(fourk_object->pager_ready);
3506                                 vm_object_mapping_wait(fourk_object,
3507                                                        THREAD_UNINT);
3508                                 vm_object_mapping_begin(fourk_object);
3509                                 vm_object_unlock(fourk_object);
3510
3511                                 kr = memory_object_map(pager, VM_PROT_READ);
3512                                 assert(kr == KERN_SUCCESS);
3513
3514                                 vm_object_lock(fourk_object);
3515                                 vm_object_mapping_end(fourk_object);
3516                         }
3517                         vm_object_unlock(fourk_object);
3518                 }
3519         }
3520
3521         assert(map_locked == TRUE);
3522
3523         if (!keep_map_locked) {
3524                 vm_map_unlock(map);
3525                 map_locked = FALSE;
3526         }
3527
3528         /*
3529          * We can't hold the map lock if we enter this block.
3530          */
3531
3532         if (result == KERN_SUCCESS) {
3533
3534                 /*      Wire down the new entry if the user
3535                  *      requested all new map entries be wired.
3536                  */
3537                 if ((map->wiring_required)||(superpage_size)) {
3538                         assert(!keep_map_locked);
3539                         pmap_empty = FALSE; /* pmap won't be empty */
3540                         kr = vm_map_wire_kernel(map, start, end,
3541                                              new_entry->protection, VM_KERN_MEMORY_MLOCK,
3542                                              TRUE);
3543                         result = kr;
3544                 }
3545
3546         }
3547
3548         if (result != KERN_SUCCESS) {
3549                 if (new_mapping_established) {
3550                         /*
3551                          * We have to get rid of the new mappings since we
3552                          * won't make them available to the user.
3553                          * Try and do that atomically, to minimize the risk
3554                          * that someone else create new mappings that range.
3555                          */
3556                         zap_new_map = vm_map_create(PMAP_NULL,
3557                                                     *address,
3558                                                     *address + size,
3559                                                     map->hdr.entries_pageable);
3560                         vm_map_set_page_shift(zap_new_map,
3561                                               VM_MAP_PAGE_SHIFT(map));
3562                         vm_map_disable_hole_optimization(zap_new_map);
3563
3564                         if (!map_locked) {
3565                                 vm_map_lock(map);
3566                                 map_locked = TRUE;
3567                         }
3568                         (void) vm_map_delete(map, *address, *address+size,
3569                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3570                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3571                                              zap_new_map);
3572                 }
3573                 if (zap_old_map != VM_MAP_NULL &&
3574                     zap_old_map->hdr.nentries != 0) {
3575                         vm_map_entry_t  entry1, entry2;
3576
3577                         /*
3578                          * The new mapping failed.  Attempt to restore
3579                          * the old mappings, saved in the "zap_old_map".
3580                          */
3581                         if (!map_locked) {
3582                                 vm_map_lock(map);
3583                                 map_locked = TRUE;
3584                         }
3585
3586                         /* first check if the coast is still clear */
3587                         start = vm_map_first_entry(zap_old_map)->vme_start;
3588                         end = vm_map_last_entry(zap_old_map)->vme_end;
3589                         if (vm_map_lookup_entry(map, start, &entry1) ||
3590                             vm_map_lookup_entry(map, end, &entry2) ||
3591                             entry1 != entry2) {
3592                                 /*
3593                                  * Part of that range has already been
3594                                  * re-mapped:  we can't restore the old
3595                                  * mappings...
3596                                  */
3597                                 vm_map_enter_restore_failures++;
3598                         } else {
3599                                 /*
3600                                  * Transfer the saved map entries from
3601                                  * "zap_old_map" to the original "map",
3602                                  * inserting them all after "entry1".
3603                                  */
3604                                 for (entry2 = vm_map_first_entry(zap_old_map);
3605                                      entry2 != vm_map_to_entry(zap_old_map);
3606                                      entry2 = vm_map_first_entry(zap_old_map)) {
3607                                         vm_map_size_t entry_size;
3608
3609                                         entry_size = (entry2->vme_end -
3610                                                       entry2->vme_start);
3611                                         vm_map_store_entry_unlink(zap_old_map,
3612                                                             entry2);
3613                                         zap_old_map->size -= entry_size;
3614                                         vm_map_store_entry_link(map, entry1, entry2);
3615                                         map->size += entry_size;
3616                                         entry1 = entry2;
3617                                 }
3618                                 if (map->wiring_required) {
3619                                         /*
3620                                          * XXX TODO: we should rewire the
3621                                          * old pages here...
3622                                          */
3623                                 }
3624                                 vm_map_enter_restore_successes++;
3625                         }
3626                 }
3627         }
3628
3629         /*
3630          * The caller is responsible for releasing the lock if it requested to
3631          * keep the map locked.
3632          */
3633         if (map_locked && !keep_map_locked) {
3634                 vm_map_unlock(map);
3635         }
3636
3637         /*
3638          * Get rid of the "zap_maps" and all the map entries that
3639          * they may still contain.
3640          */
3641         if (zap_old_map != VM_MAP_NULL) {
3642                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3643                 zap_old_map = VM_MAP_NULL;
3644         }
3645         if (zap_new_map != VM_MAP_NULL) {
3646                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3647                 zap_new_map = VM_MAP_NULL;
3648         }
3649
3650         return result;
3651
3652 #undef  RETURN
3653 }
3654 #endif /* __arm64__ */
3655
3656 /*
3657  * Counters for the prefault optimization.
3658  */
3659 int64_t vm_prefault_nb_pages = 0;
3660 int64_t vm_prefault_nb_bailout = 0;
3661
3662 static kern_return_t
3663 vm_map_enter_mem_object_helper(
3664         vm_map_t                target_map,
3665         vm_map_offset_t         *address,
3666         vm_map_size_t           initial_size,
3667         vm_map_offset_t         mask,
3668         int                     flags,
3669         vm_map_kernel_flags_t   vmk_flags,
3670         vm_tag_t                tag,
3671         ipc_port_t              port,
3672         vm_object_offset_t      offset,
3673         boolean_t               copy,
3674         vm_prot_t               cur_protection,
3675         vm_prot_t               max_protection,
3676         vm_inherit_t            inheritance,
3677         upl_page_list_ptr_t     page_list,
3678         unsigned int            page_list_count)
3679 {
3680         vm_map_address_t        map_addr;
3681         vm_map_size_t           map_size;
3682         vm_object_t             object;
3683         vm_object_size_t        size;
3684         kern_return_t           result;
3685         boolean_t               mask_cur_protection, mask_max_protection;
3686         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
3687         vm_map_offset_t         offset_in_mapping = 0;
3688 #if __arm64__
3689         boolean_t               fourk = vmk_flags.vmkf_fourk;
3690 #endif /* __arm64__ */
3691
3692         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3693
3694         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3695         mask_max_protection = max_protection & VM_PROT_IS_MASK;
3696         cur_protection &= ~VM_PROT_IS_MASK;
3697         max_protection &= ~VM_PROT_IS_MASK;
3698
3699         /*
3700          * Check arguments for validity
3701          */
3702         if ((target_map == VM_MAP_NULL) ||
3703             (cur_protection & ~VM_PROT_ALL) ||
3704             (max_protection & ~VM_PROT_ALL) ||
3705             (inheritance > VM_INHERIT_LAST_VALID) ||
3706             (try_prefault && (copy || !page_list)) ||
3707             initial_size == 0) {
3708                 return KERN_INVALID_ARGUMENT;
3709         }
3710
3711 #if __arm64__
3712         if (fourk) {
3713                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3714                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3715         } else
3716 #endif /* __arm64__ */
3717         {
3718                 map_addr = vm_map_trunc_page(*address,
3719                                              VM_MAP_PAGE_MASK(target_map));
3720                 map_size = vm_map_round_page(initial_size,
3721                                              VM_MAP_PAGE_MASK(target_map));
3722         }
3723         size = vm_object_round_page(initial_size);
3724
3725         /*
3726          * Find the vm object (if any) corresponding to this port.
3727          */
3728         if (!IP_VALID(port)) {
3729                 object = VM_OBJECT_NULL;
3730                 offset = 0;
3731                 copy = FALSE;
3732         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
3733                 vm_named_entry_t        named_entry;
3734
3735                 named_entry = (vm_named_entry_t) port->ip_kobject;
3736
3737                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3738                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3739                         offset += named_entry->data_offset;
3740                 }
3741
3742                 /* a few checks to make sure user is obeying rules */
3743                 if (size == 0) {
3744                         if (offset >= named_entry->size)
3745                                 return KERN_INVALID_RIGHT;
3746                         size = named_entry->size - offset;
3747                 }
3748                 if (mask_max_protection) {
3749                         max_protection &= named_entry->protection;
3750                 }
3751                 if (mask_cur_protection) {
3752                         cur_protection &= named_entry->protection;
3753                 }
3754                 if ((named_entry->protection & max_protection) !=
3755                     max_protection)
3756                         return KERN_INVALID_RIGHT;
3757                 if ((named_entry->protection & cur_protection) !=
3758                     cur_protection)
3759                         return KERN_INVALID_RIGHT;
3760                 if (offset + size < offset) {
3761                         /* overflow */
3762                         return KERN_INVALID_ARGUMENT;
3763                 }
3764                 if (named_entry->size < (offset + initial_size)) {
3765                         return KERN_INVALID_ARGUMENT;
3766                 }
3767
3768                 if (named_entry->is_copy) {
3769                         /* for a vm_map_copy, we can only map it whole */
3770                         if ((size != named_entry->size) &&
3771                             (vm_map_round_page(size,
3772                                                VM_MAP_PAGE_MASK(target_map)) ==
3773                              named_entry->size)) {
3774                                 /* XXX FBDP use the rounded size... */
3775                                 size = vm_map_round_page(
3776                                         size,
3777                                         VM_MAP_PAGE_MASK(target_map));
3778                         }
3779
3780                         if (!(flags & VM_FLAGS_ANYWHERE) &&
3781                             (offset != 0 ||
3782                              size != named_entry->size)) {
3783                                 /*
3784                                  * XXX for a mapping at a "fixed" address,
3785                                  * we can't trim after mapping the whole
3786                                  * memory entry, so reject a request for a
3787                                  * partial mapping.
3788                                  */
3789                                 return KERN_INVALID_ARGUMENT;
3790                         }
3791                 }
3792
3793                 /* the callers parameter offset is defined to be the */
3794                 /* offset from beginning of named entry offset in object */
3795                 offset = offset + named_entry->offset;
3796
3797                 if (! VM_MAP_PAGE_ALIGNED(size,
3798                                           VM_MAP_PAGE_MASK(target_map))) {
3799                         /*
3800                          * Let's not map more than requested;
3801                          * vm_map_enter() will handle this "not map-aligned"
3802                          * case.
3803                          */
3804                         map_size = size;
3805                 }
3806
3807                 named_entry_lock(named_entry);
3808                 if (named_entry->is_sub_map) {
3809                         vm_map_t                submap;
3810
3811                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3812                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3813                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3814                         }
3815
3816                         submap = named_entry->backing.map;
3817                         vm_map_lock(submap);
3818                         vm_map_reference(submap);
3819                         vm_map_unlock(submap);
3820                         named_entry_unlock(named_entry);
3821
3822                         vmk_flags.vmkf_submap = TRUE;
3823
3824                         result = vm_map_enter(target_map,
3825                                               &map_addr,
3826                                               map_size,
3827                                               mask,
3828                                               flags,
3829                                               vmk_flags,
3830                                               tag,
3831                                               (vm_object_t) submap,
3832                                               offset,
3833                                               copy,
3834                                               cur_protection,
3835                                               max_protection,
3836                                               inheritance);
3837                         if (result != KERN_SUCCESS) {
3838                                 vm_map_deallocate(submap);
3839                         } else {
3840                                 /*
3841                                  * No need to lock "submap" just to check its
3842                                  * "mapped" flag: that flag is never reset
3843                                  * once it's been set and if we race, we'll
3844                                  * just end up setting it twice, which is OK.
3845                                  */
3846                                 if (submap->mapped_in_other_pmaps == FALSE &&
3847                                     vm_map_pmap(submap) != PMAP_NULL &&
3848                                     vm_map_pmap(submap) !=
3849                                     vm_map_pmap(target_map)) {
3850                                         /*
3851                                          * This submap is being mapped in a map
3852                                          * that uses a different pmap.
3853                                          * Set its "mapped_in_other_pmaps" flag
3854                                          * to indicate that we now need to
3855                                          * remove mappings from all pmaps rather
3856                                          * than just the submap's pmap.
3857                                          */
3858                                         vm_map_lock(submap);
3859                                         submap->mapped_in_other_pmaps = TRUE;
3860                                         vm_map_unlock(submap);
3861                                 }
3862                                 *address = map_addr;
3863                         }
3864                         return result;
3865
3866                 } else if (named_entry->is_copy) {
3867                         kern_return_t   kr;
3868                         vm_map_copy_t   copy_map;
3869                         vm_map_entry_t  copy_entry;
3870                         vm_map_offset_t copy_addr;
3871
3872                         if (flags & ~(VM_FLAGS_FIXED |
3873                                       VM_FLAGS_ANYWHERE |
3874                                       VM_FLAGS_OVERWRITE |
3875                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
3876                                       VM_FLAGS_RETURN_DATA_ADDR |
3877                                       VM_FLAGS_ALIAS_MASK)) {
3878                                 named_entry_unlock(named_entry);
3879                                 return KERN_INVALID_ARGUMENT;
3880                         }
3881
3882                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3883                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3884                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3885                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3886                                         offset_in_mapping &= ~((signed)(0xFFF));
3887                                 offset = vm_object_trunc_page(offset);
3888                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3889                         }
3890
3891                         copy_map = named_entry->backing.copy;
3892                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3893                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3894                                 /* unsupported type; should not happen */
3895                                 printf("vm_map_enter_mem_object: "
3896                                        "memory_entry->backing.copy "
3897                                        "unsupported type 0x%x\n",
3898                                        copy_map->type);
3899                                 named_entry_unlock(named_entry);
3900                                 return KERN_INVALID_ARGUMENT;
3901                         }
3902
3903                         /* reserve a contiguous range */
3904                         kr = vm_map_enter(target_map,
3905                                           &map_addr,
3906                                           /* map whole mem entry, trim later: */
3907                                           named_entry->size,
3908                                           mask,
3909                                           flags & (VM_FLAGS_ANYWHERE |
3910                                                    VM_FLAGS_OVERWRITE |
3911                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
3912                                                    VM_FLAGS_RETURN_DATA_ADDR),
3913                                           vmk_flags,
3914                                           tag,
3915                                           VM_OBJECT_NULL,
3916                                           0,
3917                                           FALSE, /* copy */
3918                                           cur_protection,
3919                                           max_protection,
3920                                           inheritance);
3921                         if (kr != KERN_SUCCESS) {
3922                                 named_entry_unlock(named_entry);
3923                                 return kr;
3924                         }
3925
3926                         copy_addr = map_addr;
3927
3928                         for (copy_entry = vm_map_copy_first_entry(copy_map);
3929                              copy_entry != vm_map_copy_to_entry(copy_map);
3930                              copy_entry = copy_entry->vme_next) {
3931                                 int                     remap_flags;
3932                                 vm_map_kernel_flags_t   vmk_remap_flags;
3933                                 vm_map_t                copy_submap;
3934                                 vm_object_t             copy_object;
3935                                 vm_map_size_t           copy_size;
3936                                 vm_object_offset_t      copy_offset;
3937                                 int                     copy_vm_alias;
3938
3939                                 remap_flags = 0;
3940                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
3941
3942                                 copy_object = VME_OBJECT(copy_entry);
3943                                 copy_offset = VME_OFFSET(copy_entry);
3944                                 copy_size = (copy_entry->vme_end -
3945                                              copy_entry->vme_start);
3946                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3947                                 if (copy_vm_alias == 0) {
3948                                         /*
3949                                          * Caller does not want a specific
3950                                          * alias for this new mapping:  use
3951                                          * the alias of the original mapping.
3952                                          */
3953                                         copy_vm_alias = VME_ALIAS(copy_entry);
3954                                 }
3955
3956                                 /* sanity check */
3957                                 if ((copy_addr + copy_size) >
3958                                     (map_addr +
3959                                      named_entry->size /* XXX full size */ )) {
3960                                         /* over-mapping too much !? */
3961                                         kr = KERN_INVALID_ARGUMENT;
3962                                         /* abort */
3963                                         break;
3964                                 }
3965
3966                                 /* take a reference on the object */
3967                                 if (copy_entry->is_sub_map) {
3968                                         vmk_remap_flags.vmkf_submap = TRUE;
3969                                         copy_submap = VME_SUBMAP(copy_entry);
3970                                         vm_map_lock(copy_submap);
3971                                         vm_map_reference(copy_submap);
3972                                         vm_map_unlock(copy_submap);
3973                                         copy_object = (vm_object_t) copy_submap;
3974                                 } else if (!copy &&
3975                                            copy_object != VM_OBJECT_NULL &&
3976                                            (copy_entry->needs_copy ||
3977                                             copy_object->shadowed ||
3978                                             (!copy_object->true_share &&
3979                                              !copy_entry->is_shared &&
3980                                              copy_object->vo_size > copy_size))) {
3981                                         /*
3982                                          * We need to resolve our side of this
3983                                          * "symmetric" copy-on-write now; we
3984                                          * need a new object to map and share,
3985                                          * instead of the current one which
3986                                          * might still be shared with the
3987                                          * original mapping.
3988                                          *
3989                                          * Note: A "vm_map_copy_t" does not
3990                                          * have a lock but we're protected by
3991                                          * the named entry's lock here.
3992                                          */
3993                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3994                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
3995                                         if (!copy_entry->needs_copy &&
3996                                             copy_entry->protection & VM_PROT_WRITE) {
3997                                                 vm_prot_t prot;
3998
3999                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
4000                                                 vm_object_pmap_protect(copy_object,
4001                                                                        copy_offset,
4002                                                                        copy_size,
4003                                                                        PMAP_NULL,
4004                                                                        0,
4005                                                                        prot);
4006                                         }
4007
4008                                         copy_entry->needs_copy = FALSE;
4009                                         copy_entry->is_shared = TRUE;
4010                                         copy_object = VME_OBJECT(copy_entry);
4011                                         copy_offset = VME_OFFSET(copy_entry);
4012                                         vm_object_lock(copy_object);
4013                                         vm_object_reference_locked(copy_object);
4014                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4015                                                 /* we're about to make a shared mapping of this object */
4016                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4017                                                 copy_object->true_share = TRUE;
4018                                         }
4019                                         vm_object_unlock(copy_object);
4020                                 } else {
4021                                         /*
4022                                          * We already have the right object
4023                                          * to map.
4024                                          */
4025                                         copy_object = VME_OBJECT(copy_entry);
4026                                         vm_object_reference(copy_object);
4027                                 }
4028
4029                                 /* over-map the object into destination */
4030                                 remap_flags |= flags;
4031                                 remap_flags |= VM_FLAGS_FIXED;
4032                                 remap_flags |= VM_FLAGS_OVERWRITE;
4033                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4034                                 if (!copy && !copy_entry->is_sub_map) {
4035                                         /*
4036                                          * copy-on-write should have been
4037                                          * resolved at this point, or we would
4038                                          * end up sharing instead of copying.
4039                                          */
4040                                         assert(!copy_entry->needs_copy);
4041                                 }
4042                                 kr = vm_map_enter(target_map,
4043                                                   &copy_addr,
4044                                                   copy_size,
4045                                                   (vm_map_offset_t) 0,
4046                                                   remap_flags,
4047                                                   vmk_remap_flags,
4048                                                   copy_vm_alias,
4049                                                   copy_object,
4050                                                   copy_offset,
4051                                                   copy,
4052                                                   cur_protection,
4053                                                   max_protection,
4054                                                   inheritance);
4055                                 if (kr != KERN_SUCCESS) {
4056                                         if (copy_entry->is_sub_map) {
4057                                                 vm_map_deallocate(copy_submap);
4058                                         } else {
4059                                                 vm_object_deallocate(copy_object);
4060                                         }
4061                                         /* abort */
4062                                         break;
4063                                 }
4064
4065                                 /* next mapping */
4066                                 copy_addr += copy_size;
4067                         }
4068
4069                         if (kr == KERN_SUCCESS) {
4070                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4071                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4072                                         *address = map_addr + offset_in_mapping;
4073                                 } else {
4074                                         *address = map_addr;
4075                                 }
4076
4077                                 if (offset) {
4078                                         /*
4079                                          * Trim in front, from 0 to "offset".
4080                                          */
4081                                         vm_map_remove(target_map,
4082                                                       map_addr,
4083                                                       map_addr + offset,
4084                                                       0);
4085                                         *address += offset;
4086                                 }
4087                                 if (offset + map_size < named_entry->size) {
4088                                         /*
4089                                          * Trim in back, from
4090                                          * "offset + map_size" to
4091                                          * "named_entry->size".
4092                                          */
4093                                         vm_map_remove(target_map,
4094                                                       (map_addr +
4095                                                        offset + map_size),
4096                                                       (map_addr +
4097                                                        named_entry->size),
4098                                                       0);
4099                                 }
4100                         }
4101                         named_entry_unlock(named_entry);
4102
4103                         if (kr != KERN_SUCCESS) {
4104                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
4105                                         /* deallocate the contiguous range */
4106                                         (void) vm_deallocate(target_map,
4107                                                              map_addr,
4108                                                              map_size);
4109                                 }
4110                         }
4111
4112                         return kr;
4113
4114                 } else {
4115                         unsigned int    access;
4116                         vm_prot_t       protections;
4117                         unsigned int    wimg_mode;
4118
4119                         /* we are mapping a VM object */
4120
4121                         protections = named_entry->protection & VM_PROT_ALL;
4122                         access = GET_MAP_MEM(named_entry->protection);
4123
4124                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4125                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4126                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4127                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4128                                         offset_in_mapping &= ~((signed)(0xFFF));
4129                                 offset = vm_object_trunc_page(offset);
4130                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4131                         }
4132
4133                         object = named_entry->backing.object;
4134                         assert(object != VM_OBJECT_NULL);
4135                         vm_object_lock(object);
4136                         named_entry_unlock(named_entry);
4137
4138                         vm_object_reference_locked(object);
4139
4140                         wimg_mode = object->wimg_bits;
4141                         vm_prot_to_wimg(access, &wimg_mode);
4142                         if (object->wimg_bits != wimg_mode)
4143                                 vm_object_change_wimg_mode(object, wimg_mode);
4144
4145                         vm_object_unlock(object);
4146                 }
4147         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4148                 /*
4149                  * JMM - This is temporary until we unify named entries
4150                  * and raw memory objects.
4151                  *
4152                  * Detected fake ip_kotype for a memory object.  In
4153                  * this case, the port isn't really a port at all, but
4154                  * instead is just a raw memory object.
4155                  */
4156                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4157                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4158                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4159                 }
4160
4161                 object = memory_object_to_vm_object((memory_object_t)port);
4162                 if (object == VM_OBJECT_NULL)
4163                         return KERN_INVALID_OBJECT;
4164                 vm_object_reference(object);
4165
4166                 /* wait for object (if any) to be ready */
4167                 if (object != VM_OBJECT_NULL) {
4168                         if (object == kernel_object) {
4169                                 printf("Warning: Attempt to map kernel object"
4170                                         " by a non-private kernel entity\n");
4171                                 return KERN_INVALID_OBJECT;
4172                         }
4173                         if (!object->pager_ready) {
4174                                 vm_object_lock(object);
4175
4176                                 while (!object->pager_ready) {
4177                                         vm_object_wait(object,
4178                                                        VM_OBJECT_EVENT_PAGER_READY,
4179                                                        THREAD_UNINT);
4180                                         vm_object_lock(object);
4181                                 }
4182                                 vm_object_unlock(object);
4183                         }
4184                 }
4185         } else {
4186                 return KERN_INVALID_OBJECT;
4187         }
4188
4189         if (object != VM_OBJECT_NULL &&
4190             object->named &&
4191             object->pager != MEMORY_OBJECT_NULL &&
4192             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4193                 memory_object_t pager;
4194                 vm_prot_t       pager_prot;
4195                 kern_return_t   kr;
4196
4197                 /*
4198                  * For "named" VM objects, let the pager know that the
4199                  * memory object is being mapped.  Some pagers need to keep
4200                  * track of this, to know when they can reclaim the memory
4201                  * object, for example.
4202                  * VM calls memory_object_map() for each mapping (specifying
4203                  * the protection of each mapping) and calls
4204                  * memory_object_last_unmap() when all the mappings are gone.
4205                  */
4206                 pager_prot = max_protection;
4207                 if (copy) {
4208                         /*
4209                          * Copy-On-Write mapping: won't modify the
4210                          * memory object.
4211                          */
4212                         pager_prot &= ~VM_PROT_WRITE;
4213                 }
4214                 vm_object_lock(object);
4215                 pager = object->pager;
4216                 if (object->named &&
4217                     pager != MEMORY_OBJECT_NULL &&
4218                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4219                         assert(object->pager_ready);
4220                         vm_object_mapping_wait(object, THREAD_UNINT);
4221                         vm_object_mapping_begin(object);
4222                         vm_object_unlock(object);
4223
4224                         kr = memory_object_map(pager, pager_prot);
4225                         assert(kr == KERN_SUCCESS);
4226
4227                         vm_object_lock(object);
4228                         vm_object_mapping_end(object);
4229                 }
4230                 vm_object_unlock(object);
4231         }
4232
4233         /*
4234          *      Perform the copy if requested
4235          */
4236
4237         if (copy) {
4238                 vm_object_t             new_object;
4239                 vm_object_offset_t      new_offset;
4240
4241                 result = vm_object_copy_strategically(object, offset,
4242                                                       map_size,
4243                                                       &new_object, &new_offset,
4244                                                       &copy);
4245
4246
4247                 if (result == KERN_MEMORY_RESTART_COPY) {
4248                         boolean_t success;
4249                         boolean_t src_needs_copy;
4250
4251                         /*
4252                          * XXX
4253                          * We currently ignore src_needs_copy.
4254                          * This really is the issue of how to make
4255                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4256                          * non-kernel users to use. Solution forthcoming.
4257                          * In the meantime, since we don't allow non-kernel
4258                          * memory managers to specify symmetric copy,
4259                          * we won't run into problems here.
4260                          */
4261                         new_object = object;
4262                         new_offset = offset;
4263                         success = vm_object_copy_quickly(&new_object,
4264                                                          new_offset,
4265                                                          map_size,
4266                                                          &src_needs_copy,
4267                                                          &copy);
4268                         assert(success);
4269                         result = KERN_SUCCESS;
4270                 }
4271                 /*
4272                  *      Throw away the reference to the
4273                  *      original object, as it won't be mapped.
4274                  */
4275
4276                 vm_object_deallocate(object);
4277
4278                 if (result != KERN_SUCCESS) {
4279                         return result;
4280                 }
4281
4282                 object = new_object;
4283                 offset = new_offset;
4284         }
4285
4286         /*
4287          * If non-kernel users want to try to prefault pages, the mapping and prefault
4288          * needs to be atomic.
4289          */
4290         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4291         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4292
4293 #if __arm64__
4294         if (fourk) {
4295                 /* map this object in a "4K" pager */
4296                 result = vm_map_enter_fourk(target_map,
4297                                             &map_addr,
4298                                             map_size,
4299                                             (vm_map_offset_t) mask,
4300                                             flags,
4301                                             vmk_flags,
4302                                             tag,
4303                                             object,
4304                                             offset,
4305                                             copy,
4306                                             cur_protection,
4307                                             max_protection,
4308                                             inheritance);
4309         } else
4310 #endif /* __arm64__ */
4311         {
4312                 result = vm_map_enter(target_map,
4313                                       &map_addr, map_size,
4314                                       (vm_map_offset_t)mask,
4315                                       flags,
4316                                       vmk_flags,
4317                                       tag,
4318                                       object, offset,
4319                                       copy,
4320                                       cur_protection, max_protection,
4321                                       inheritance);
4322         }
4323         if (result != KERN_SUCCESS)
4324                 vm_object_deallocate(object);
4325
4326         /*
4327          * Try to prefault, and do not forget to release the vm map lock.
4328          */
4329         if (result == KERN_SUCCESS && try_prefault) {
4330                 mach_vm_address_t va = map_addr;
4331                 kern_return_t kr = KERN_SUCCESS;
4332                 unsigned int i = 0;
4333                 int pmap_options;
4334
4335                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4336                 if (object->internal) {
4337                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4338                 }
4339
4340                 for (i = 0; i < page_list_count; ++i) {
4341                         if (!UPL_VALID_PAGE(page_list, i)) {
4342                                 if (kernel_prefault) {
4343                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4344                                         result = KERN_MEMORY_ERROR;
4345                                         break;
4346                                 }
4347                         } else {
4348                                 /*
4349                                  * If this function call failed, we should stop
4350                                  * trying to optimize, other calls are likely
4351                                  * going to fail too.
4352                                  *
4353                                  * We are not gonna report an error for such
4354                                  * failure though. That's an optimization, not
4355                                  * something critical.
4356                                  */
4357                                 kr = pmap_enter_options(target_map->pmap,
4358                                                         va, UPL_PHYS_PAGE(page_list, i),
4359                                                         cur_protection, VM_PROT_NONE,
4360                                                         0, TRUE, pmap_options, NULL);
4361                                 if (kr != KERN_SUCCESS) {
4362                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4363                                         if (kernel_prefault) {
4364                                                 result = kr;
4365                                         }
4366                                         break;
4367                                 }
4368                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4369                         }
4370
4371                         /* Next virtual address */
4372                         va += PAGE_SIZE;
4373                 }
4374                 if (vmk_flags.vmkf_keep_map_locked) {
4375                         vm_map_unlock(target_map);
4376                 }
4377         }
4378
4379         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4380                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4381                 *address = map_addr + offset_in_mapping;
4382         } else {
4383                 *address = map_addr;
4384         }
4385         return result;
4386 }
4387
4388 kern_return_t
4389 vm_map_enter_mem_object(
4390         vm_map_t                target_map,
4391         vm_map_offset_t         *address,
4392         vm_map_size_t           initial_size,
4393         vm_map_offset_t         mask,
4394         int                     flags,
4395         vm_map_kernel_flags_t   vmk_flags,
4396         vm_tag_t                tag,
4397         ipc_port_t              port,
4398         vm_object_offset_t      offset,
4399         boolean_t               copy,
4400         vm_prot_t               cur_protection,
4401         vm_prot_t               max_protection,
4402         vm_inherit_t            inheritance)
4403 {
4404         kern_return_t ret;
4405
4406         ret = vm_map_enter_mem_object_helper(target_map,
4407                                              address,
4408                                              initial_size,
4409                                              mask,
4410                                              flags,
4411                                              vmk_flags,
4412                                              tag,
4413                                              port,
4414                                              offset,
4415                                              copy,
4416                                              cur_protection,
4417                                              max_protection,
4418                                              inheritance,
4419                                              NULL,
4420                                              0);
4421
4422 #if KASAN
4423         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4424                 kasan_notify_address(*address, initial_size);
4425         }
4426 #endif
4427
4428         return ret;
4429 }
4430
4431 kern_return_t
4432 vm_map_enter_mem_object_prefault(
4433         vm_map_t                target_map,
4434         vm_map_offset_t         *address,
4435         vm_map_size_t           initial_size,
4436         vm_map_offset_t         mask,
4437         int                     flags,
4438         vm_map_kernel_flags_t   vmk_flags,
4439         vm_tag_t                tag,
4440         ipc_port_t              port,
4441         vm_object_offset_t      offset,
4442         vm_prot_t               cur_protection,
4443         vm_prot_t               max_protection,
4444         upl_page_list_ptr_t     page_list,
4445         unsigned int            page_list_count)
4446 {
4447         kern_return_t ret;
4448
4449         ret = vm_map_enter_mem_object_helper(target_map,
4450                                              address,
4451                                              initial_size,
4452                                              mask,
4453                                              flags,
4454                                              vmk_flags,
4455                                              tag,
4456                                              port,
4457                                              offset,
4458                                              FALSE,
4459                                              cur_protection,
4460                                              max_protection,
4461                                              VM_INHERIT_DEFAULT,
4462                                              page_list,
4463                                              page_list_count);
4464
4465 #if KASAN
4466         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4467                 kasan_notify_address(*address, initial_size);
4468         }
4469 #endif
4470
4471         return ret;
4472 }
4473
4474
4475 kern_return_t
4476 vm_map_enter_mem_object_control(
4477         vm_map_t                target_map,
4478         vm_map_offset_t         *address,
4479         vm_map_size_t           initial_size,
4480         vm_map_offset_t         mask,
4481         int                     flags,
4482         vm_map_kernel_flags_t   vmk_flags,
4483         vm_tag_t                tag,
4484         memory_object_control_t control,
4485         vm_object_offset_t      offset,
4486         boolean_t               copy,
4487         vm_prot_t               cur_protection,
4488         vm_prot_t               max_protection,
4489         vm_inherit_t            inheritance)
4490 {
4491         vm_map_address_t        map_addr;
4492         vm_map_size_t           map_size;
4493         vm_object_t             object;
4494         vm_object_size_t        size;
4495         kern_return_t           result;
4496         memory_object_t         pager;
4497         vm_prot_t               pager_prot;
4498         kern_return_t           kr;
4499 #if __arm64__
4500         boolean_t               fourk = vmk_flags.vmkf_fourk;
4501 #endif /* __arm64__ */
4502
4503         /*
4504          * Check arguments for validity
4505          */
4506         if ((target_map == VM_MAP_NULL) ||
4507             (cur_protection & ~VM_PROT_ALL) ||
4508             (max_protection & ~VM_PROT_ALL) ||
4509             (inheritance > VM_INHERIT_LAST_VALID) ||
4510             initial_size == 0) {
4511                 return KERN_INVALID_ARGUMENT;
4512         }
4513
4514 #if __arm64__
4515         if (fourk) {
4516                 map_addr = vm_map_trunc_page(*address,
4517                                              FOURK_PAGE_MASK);
4518                 map_size = vm_map_round_page(initial_size,
4519                                              FOURK_PAGE_MASK);
4520         } else
4521 #endif /* __arm64__ */
4522         {
4523                 map_addr = vm_map_trunc_page(*address,
4524                                              VM_MAP_PAGE_MASK(target_map));
4525                 map_size = vm_map_round_page(initial_size,
4526                                              VM_MAP_PAGE_MASK(target_map));
4527         }
4528         size = vm_object_round_page(initial_size);
4529
4530         object = memory_object_control_to_vm_object(control);
4531
4532         if (object == VM_OBJECT_NULL)
4533                 return KERN_INVALID_OBJECT;
4534
4535         if (object == kernel_object) {
4536                 printf("Warning: Attempt to map kernel object"
4537                        " by a non-private kernel entity\n");
4538                 return KERN_INVALID_OBJECT;
4539         }
4540
4541         vm_object_lock(object);
4542         object->ref_count++;
4543         vm_object_res_reference(object);
4544
4545         /*
4546          * For "named" VM objects, let the pager know that the
4547          * memory object is being mapped.  Some pagers need to keep
4548          * track of this, to know when they can reclaim the memory
4549          * object, for example.
4550          * VM calls memory_object_map() for each mapping (specifying
4551          * the protection of each mapping) and calls
4552          * memory_object_last_unmap() when all the mappings are gone.
4553          */
4554         pager_prot = max_protection;
4555         if (copy) {
4556                 pager_prot &= ~VM_PROT_WRITE;
4557         }
4558         pager = object->pager;
4559         if (object->named &&
4560             pager != MEMORY_OBJECT_NULL &&
4561             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4562                 assert(object->pager_ready);
4563                 vm_object_mapping_wait(object, THREAD_UNINT);
4564                 vm_object_mapping_begin(object);
4565                 vm_object_unlock(object);
4566
4567                 kr = memory_object_map(pager, pager_prot);
4568                 assert(kr == KERN_SUCCESS);
4569
4570                 vm_object_lock(object);
4571                 vm_object_mapping_end(object);
4572         }
4573         vm_object_unlock(object);
4574
4575         /*
4576          *      Perform the copy if requested
4577          */
4578
4579         if (copy) {
4580                 vm_object_t             new_object;
4581                 vm_object_offset_t      new_offset;
4582
4583                 result = vm_object_copy_strategically(object, offset, size,
4584                                                       &new_object, &new_offset,
4585                                                       &copy);
4586
4587
4588                 if (result == KERN_MEMORY_RESTART_COPY) {
4589                         boolean_t success;
4590                         boolean_t src_needs_copy;
4591
4592                         /*
4593                          * XXX
4594                          * We currently ignore src_needs_copy.
4595                          * This really is the issue of how to make
4596                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4597                          * non-kernel users to use. Solution forthcoming.
4598                          * In the meantime, since we don't allow non-kernel
4599                          * memory managers to specify symmetric copy,
4600                          * we won't run into problems here.
4601                          */
4602                         new_object = object;
4603                         new_offset = offset;
4604                         success = vm_object_copy_quickly(&new_object,
4605                                                          new_offset, size,
4606                                                          &src_needs_copy,
4607                                                          &copy);
4608                         assert(success);
4609                         result = KERN_SUCCESS;
4610                 }
4611                 /*
4612                  *      Throw away the reference to the
4613                  *      original object, as it won't be mapped.
4614                  */
4615
4616                 vm_object_deallocate(object);
4617
4618                 if (result != KERN_SUCCESS) {
4619                         return result;
4620                 }
4621
4622                 object = new_object;
4623                 offset = new_offset;
4624         }
4625
4626 #if __arm64__
4627         if (fourk) {
4628                 result = vm_map_enter_fourk(target_map,
4629                                             &map_addr,
4630                                             map_size,
4631                                             (vm_map_offset_t)mask,
4632                                             flags,
4633                                             vmk_flags,
4634                                             tag,
4635                                             object, offset,
4636                                             copy,
4637                                             cur_protection, max_protection,
4638                                             inheritance);
4639         } else
4640 #endif /* __arm64__ */
4641         {
4642                 result = vm_map_enter(target_map,
4643                                       &map_addr, map_size,
4644                                       (vm_map_offset_t)mask,
4645                                       flags,
4646                                       vmk_flags,
4647                                       tag,
4648                                       object, offset,
4649                                       copy,
4650                                       cur_protection, max_protection,
4651                                       inheritance);
4652         }
4653         if (result != KERN_SUCCESS)
4654                 vm_object_deallocate(object);
4655         *address = map_addr;
4656
4657         return result;
4658 }
4659
4660
4661 #if     VM_CPM
4662
4663 #ifdef MACH_ASSERT
4664 extern pmap_paddr_t     avail_start, avail_end;
4665 #endif
4666
4667 /*
4668  *      Allocate memory in the specified map, with the caveat that
4669  *      the memory is physically contiguous.  This call may fail
4670  *      if the system can't find sufficient contiguous memory.
4671  *      This call may cause or lead to heart-stopping amounts of
4672  *      paging activity.
4673  *
4674  *      Memory obtained from this call should be freed in the
4675  *      normal way, viz., via vm_deallocate.
4676  */
4677 kern_return_t
4678 vm_map_enter_cpm(
4679         vm_map_t                map,
4680         vm_map_offset_t *addr,
4681         vm_map_size_t           size,
4682         int                     flags)
4683 {
4684         vm_object_t             cpm_obj;
4685         pmap_t                  pmap;
4686         vm_page_t               m, pages;
4687         kern_return_t           kr;
4688         vm_map_offset_t         va, start, end, offset;
4689 #if     MACH_ASSERT
4690         vm_map_offset_t         prev_addr = 0;
4691 #endif  /* MACH_ASSERT */
4692
4693         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4694         vm_tag_t tag;
4695
4696         VM_GET_FLAGS_ALIAS(flags, tag);
4697
4698         if (size == 0) {
4699                 *addr = 0;
4700                 return KERN_SUCCESS;
4701         }
4702         if (anywhere)
4703                 *addr = vm_map_min(map);
4704         else
4705                 *addr = vm_map_trunc_page(*addr,
4706                                           VM_MAP_PAGE_MASK(map));
4707         size = vm_map_round_page(size,
4708                                  VM_MAP_PAGE_MASK(map));
4709
4710         /*
4711          * LP64todo - cpm_allocate should probably allow
4712          * allocations of >4GB, but not with the current
4713          * algorithm, so just cast down the size for now.
4714          */
4715         if (size > VM_MAX_ADDRESS)
4716                 return KERN_RESOURCE_SHORTAGE;
4717         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
4718                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
4719                 return kr;
4720
4721         cpm_obj = vm_object_allocate((vm_object_size_t)size);
4722         assert(cpm_obj != VM_OBJECT_NULL);
4723         assert(cpm_obj->internal);
4724         assert(cpm_obj->vo_size == (vm_object_size_t)size);
4725         assert(cpm_obj->can_persist == FALSE);
4726         assert(cpm_obj->pager_created == FALSE);
4727         assert(cpm_obj->pageout == FALSE);
4728         assert(cpm_obj->shadow == VM_OBJECT_NULL);
4729
4730         /*
4731          *      Insert pages into object.
4732          */
4733
4734         vm_object_lock(cpm_obj);
4735         for (offset = 0; offset < size; offset += PAGE_SIZE) {
4736                 m = pages;
4737                 pages = NEXT_PAGE(m);
4738                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
4739
4740                 assert(!m->gobbled);
4741                 assert(!m->wanted);
4742                 assert(!m->pageout);
4743                 assert(!m->tabled);
4744                 assert(VM_PAGE_WIRED(m));
4745                 assert(m->busy);
4746                 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
4747
4748                 m->busy = FALSE;
4749                 vm_page_insert(m, cpm_obj, offset);
4750         }
4751         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4752         vm_object_unlock(cpm_obj);
4753
4754         /*
4755          *      Hang onto a reference on the object in case a
4756          *      multi-threaded application for some reason decides
4757          *      to deallocate the portion of the address space into
4758          *      which we will insert this object.
4759          *
4760          *      Unfortunately, we must insert the object now before
4761          *      we can talk to the pmap module about which addresses
4762          *      must be wired down.  Hence, the race with a multi-
4763          *      threaded app.
4764          */
4765         vm_object_reference(cpm_obj);
4766
4767         /*
4768          *      Insert object into map.
4769          */
4770
4771         kr = vm_map_enter(
4772                 map,
4773                 addr,
4774                 size,
4775                 (vm_map_offset_t)0,
4776                 flags,
4777                 VM_MAP_KERNEL_FLAGS_NONE,
4778                 cpm_obj,
4779                 (vm_object_offset_t)0,
4780                 FALSE,
4781                 VM_PROT_ALL,
4782                 VM_PROT_ALL,
4783                 VM_INHERIT_DEFAULT);
4784
4785         if (kr != KERN_SUCCESS) {
4786                 /*
4787                  *      A CPM object doesn't have can_persist set,
4788                  *      so all we have to do is deallocate it to
4789                  *      free up these pages.
4790                  */
4791                 assert(cpm_obj->pager_created == FALSE);
4792                 assert(cpm_obj->can_persist == FALSE);
4793                 assert(cpm_obj->pageout == FALSE);
4794                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4795                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
4796                 vm_object_deallocate(cpm_obj); /* kill creation ref */
4797         }
4798
4799         /*
4800          *      Inform the physical mapping system that the
4801          *      range of addresses may not fault, so that
4802          *      page tables and such can be locked down as well.
4803          */
4804         start = *addr;
4805         end = start + size;
4806         pmap = vm_map_pmap(map);
4807         pmap_pageable(pmap, start, end, FALSE);
4808
4809         /*
4810          *      Enter each page into the pmap, to avoid faults.
4811          *      Note that this loop could be coded more efficiently,
4812          *      if the need arose, rather than looking up each page
4813          *      again.
4814          */
4815         for (offset = 0, va = start; offset < size;
4816              va += PAGE_SIZE, offset += PAGE_SIZE) {
4817                 int type_of_fault;
4818
4819                 vm_object_lock(cpm_obj);
4820                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4821                 assert(m != VM_PAGE_NULL);
4822
4823                 vm_page_zero_fill(m);
4824
4825                 type_of_fault = DBG_ZERO_FILL_FAULT;
4826
4827                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
4828                                                 VM_PAGE_WIRED(m),
4829                                                 FALSE, /* change_wiring */
4830                                                 VM_KERN_MEMORY_NONE, /* tag - not wiring */
4831                                                 FALSE, /* no_cache */
4832                                                 FALSE, /* cs_bypass */
4833                                                 0,     /* user_tag */
4834                                             0,     /* pmap_options */
4835                                                 NULL,  /* need_retry */
4836                                                 &type_of_fault);
4837
4838                 vm_object_unlock(cpm_obj);
4839         }
4840
4841 #if     MACH_ASSERT
4842         /*
4843          *      Verify ordering in address space.
4844          */
4845         for (offset = 0; offset < size; offset += PAGE_SIZE) {
4846                 vm_object_lock(cpm_obj);
4847                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4848                 vm_object_unlock(cpm_obj);
4849                 if (m == VM_PAGE_NULL)
4850                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
4851                               cpm_obj, (uint64_t)offset);
4852                 assert(m->tabled);
4853                 assert(!m->busy);
4854                 assert(!m->wanted);
4855                 assert(!m->fictitious);
4856                 assert(!m->private);
4857                 assert(!m->absent);
4858                 assert(!m->error);
4859                 assert(!m->cleaning);
4860                 assert(!m->laundry);
4861                 assert(!m->precious);
4862                 assert(!m->clustered);
4863                 if (offset != 0) {
4864                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4865                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
4866                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
4867                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
4868                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
4869                                 panic("vm_allocate_cpm:  pages not contig!");
4870                         }
4871                 }
4872                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4873         }
4874 #endif  /* MACH_ASSERT */
4875
4876         vm_object_deallocate(cpm_obj); /* kill extra ref */
4877
4878         return kr;
4879 }
4880
4881
4882 #else   /* VM_CPM */
4883
4884 /*
4885  *      Interface is defined in all cases, but unless the kernel
4886  *      is built explicitly for this option, the interface does
4887  *      nothing.
4888  */
4889
4890 kern_return_t
4891 vm_map_enter_cpm(
4892         __unused vm_map_t       map,
4893         __unused vm_map_offset_t        *addr,
4894         __unused vm_map_size_t  size,
4895         __unused int            flags)
4896 {
4897         return KERN_FAILURE;
4898 }
4899 #endif /* VM_CPM */
4900
4901 /* Not used without nested pmaps */
4902 #ifndef NO_NESTED_PMAP
4903 /*
4904  * Clip and unnest a portion of a nested submap mapping.
4905  */
4906
4907
4908 static void
4909 vm_map_clip_unnest(
4910         vm_map_t        map,
4911         vm_map_entry_t  entry,
4912         vm_map_offset_t start_unnest,
4913         vm_map_offset_t end_unnest)
4914 {
4915         vm_map_offset_t old_start_unnest = start_unnest;
4916         vm_map_offset_t old_end_unnest = end_unnest;
4917
4918         assert(entry->is_sub_map);
4919         assert(VME_SUBMAP(entry) != NULL);
4920         assert(entry->use_pmap);
4921
4922         /*
4923          * Query the platform for the optimal unnest range.
4924          * DRK: There's some duplication of effort here, since
4925          * callers may have adjusted the range to some extent. This
4926          * routine was introduced to support 1GiB subtree nesting
4927          * for x86 platforms, which can also nest on 2MiB boundaries
4928          * depending on size/alignment.
4929          */
4930         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
4931                 assert(VME_SUBMAP(entry)->is_nested_map);
4932                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4933                 log_unnest_badness(map,
4934                                    old_start_unnest,
4935                                    old_end_unnest,
4936                                    VME_SUBMAP(entry)->is_nested_map,
4937                                    (entry->vme_start +
4938                                     VME_SUBMAP(entry)->lowest_unnestable_start -
4939                                     VME_OFFSET(entry)));
4940         }
4941
4942         if (entry->vme_start > start_unnest ||
4943             entry->vme_end < end_unnest) {
4944                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4945                       "bad nested entry: start=0x%llx end=0x%llx\n",
4946                       (long long)start_unnest, (long long)end_unnest,
4947                       (long long)entry->vme_start, (long long)entry->vme_end);
4948         }
4949
4950         if (start_unnest > entry->vme_start) {
4951                 _vm_map_clip_start(&map->hdr,
4952                                    entry,
4953                                    start_unnest);
4954                 if (map->holelistenabled) {
4955                         vm_map_store_update_first_free(map, NULL, FALSE);
4956                 } else {
4957                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4958                 }
4959         }
4960         if (entry->vme_end > end_unnest) {
4961                 _vm_map_clip_end(&map->hdr,
4962                                  entry,
4963                                  end_unnest);
4964                 if (map->holelistenabled) {
4965                         vm_map_store_update_first_free(map, NULL, FALSE);
4966                 } else {
4967                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4968                 }
4969         }
4970
4971         pmap_unnest(map->pmap,
4972                     entry->vme_start,
4973                     entry->vme_end - entry->vme_start);
4974         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
4975                 /* clean up parent map/maps */
4976                 vm_map_submap_pmap_clean(
4977                         map, entry->vme_start,
4978                         entry->vme_end,
4979                         VME_SUBMAP(entry),
4980                         VME_OFFSET(entry));
4981         }
4982         entry->use_pmap = FALSE;
4983         if ((map->pmap != kernel_pmap) &&
4984             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4985                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
4986         }
4987 }
4988 #endif  /* NO_NESTED_PMAP */
4989
4990 /*
4991  *      vm_map_clip_start:      [ internal use only ]
4992  *
4993  *      Asserts that the given entry begins at or after
4994  *      the specified address; if necessary,
4995  *      it splits the entry into two.
4996  */
4997 void
4998 vm_map_clip_start(
4999         vm_map_t        map,
5000         vm_map_entry_t  entry,
5001         vm_map_offset_t startaddr)
5002 {
5003 #ifndef NO_NESTED_PMAP
5004         if (entry->is_sub_map &&
5005             entry->use_pmap &&
5006             startaddr >= entry->vme_start) {
5007                 vm_map_offset_t start_unnest, end_unnest;
5008
5009                 /*
5010                  * Make sure "startaddr" is no longer in a nested range
5011                  * before we clip.  Unnest only the minimum range the platform
5012                  * can handle.
5013                  * vm_map_clip_unnest may perform additional adjustments to
5014                  * the unnest range.
5015                  */
5016                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5017                 end_unnest = start_unnest + pmap_nesting_size_min;
5018                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5019         }
5020 #endif /* NO_NESTED_PMAP */
5021         if (startaddr > entry->vme_start) {
5022                 if (VME_OBJECT(entry) &&
5023                     !entry->is_sub_map &&
5024                     VME_OBJECT(entry)->phys_contiguous) {
5025                         pmap_remove(map->pmap,
5026                                     (addr64_t)(entry->vme_start),
5027                                     (addr64_t)(entry->vme_end));
5028                 }
5029                 if (entry->vme_atomic) {
5030                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5031                 }
5032                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5033                 if (map->holelistenabled) {
5034                         vm_map_store_update_first_free(map, NULL, FALSE);
5035                 } else {
5036                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5037                 }
5038         }
5039 }
5040
5041
5042 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5043         MACRO_BEGIN \
5044         if ((startaddr) > (entry)->vme_start) \
5045                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5046         MACRO_END
5047
5048 /*
5049  *      This routine is called only when it is known that
5050  *      the entry must be split.
5051  */
5052 static void
5053 _vm_map_clip_start(
5054         struct vm_map_header    *map_header,
5055         vm_map_entry_t          entry,
5056         vm_map_offset_t         start)
5057 {
5058         vm_map_entry_t  new_entry;
5059
5060         /*
5061          *      Split off the front portion --
5062          *      note that we must insert the new
5063          *      entry BEFORE this one, so that
5064          *      this entry has the specified starting
5065          *      address.
5066          */
5067
5068         if (entry->map_aligned) {
5069                 assert(VM_MAP_PAGE_ALIGNED(start,
5070                                            VM_MAP_HDR_PAGE_MASK(map_header)));
5071         }
5072
5073         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5074         vm_map_entry_copy_full(new_entry, entry);
5075
5076         new_entry->vme_end = start;
5077         assert(new_entry->vme_start < new_entry->vme_end);
5078         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5079         assert(start < entry->vme_end);
5080         entry->vme_start = start;
5081
5082         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5083
5084         if (entry->is_sub_map)
5085                 vm_map_reference(VME_SUBMAP(new_entry));
5086         else
5087                 vm_object_reference(VME_OBJECT(new_entry));
5088 }
5089
5090
5091 /*
5092  *      vm_map_clip_end:        [ internal use only ]
5093  *
5094  *      Asserts that the given entry ends at or before
5095  *      the specified address; if necessary,
5096  *      it splits the entry into two.
5097  */
5098 void
5099 vm_map_clip_end(
5100         vm_map_t        map,
5101         vm_map_entry_t  entry,
5102         vm_map_offset_t endaddr)
5103 {
5104         if (endaddr > entry->vme_end) {
5105                 /*
5106                  * Within the scope of this clipping, limit "endaddr" to
5107                  * the end of this map entry...
5108                  */
5109                 endaddr = entry->vme_end;
5110         }
5111 #ifndef NO_NESTED_PMAP
5112         if (entry->is_sub_map && entry->use_pmap) {
5113                 vm_map_offset_t start_unnest, end_unnest;
5114
5115                 /*
5116                  * Make sure the range between the start of this entry and
5117                  * the new "endaddr" is no longer nested before we clip.
5118                  * Unnest only the minimum range the platform can handle.
5119                  * vm_map_clip_unnest may perform additional adjustments to
5120                  * the unnest range.
5121                  */
5122                 start_unnest = entry->vme_start;
5123                 end_unnest =
5124                         (endaddr + pmap_nesting_size_min - 1) &
5125                         ~(pmap_nesting_size_min - 1);
5126                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5127         }
5128 #endif /* NO_NESTED_PMAP */
5129         if (endaddr < entry->vme_end) {
5130                 if (VME_OBJECT(entry) &&
5131                     !entry->is_sub_map &&
5132                     VME_OBJECT(entry)->phys_contiguous) {
5133                         pmap_remove(map->pmap,
5134                                     (addr64_t)(entry->vme_start),
5135                                     (addr64_t)(entry->vme_end));
5136                 }
5137                 if (entry->vme_atomic) {
5138                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5139                 }
5140                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5141                 if (map->holelistenabled) {
5142                         vm_map_store_update_first_free(map, NULL, FALSE);
5143                 } else {
5144                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5145                 }
5146         }
5147 }
5148
5149
5150 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5151         MACRO_BEGIN \
5152         if ((endaddr) < (entry)->vme_end) \
5153                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5154         MACRO_END
5155
5156 /*
5157  *      This routine is called only when it is known that
5158  *      the entry must be split.
5159  */
5160 static void
5161 _vm_map_clip_end(
5162         struct vm_map_header    *map_header,
5163         vm_map_entry_t          entry,
5164         vm_map_offset_t         end)
5165 {
5166         vm_map_entry_t  new_entry;
5167
5168         /*
5169          *      Create a new entry and insert it
5170          *      AFTER the specified entry
5171          */
5172
5173         if (entry->map_aligned) {
5174                 assert(VM_MAP_PAGE_ALIGNED(end,
5175                                            VM_MAP_HDR_PAGE_MASK(map_header)));
5176         }
5177
5178         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5179         vm_map_entry_copy_full(new_entry, entry);
5180
5181         assert(entry->vme_start < end);
5182         new_entry->vme_start = entry->vme_end = end;
5183         VME_OFFSET_SET(new_entry,
5184                        VME_OFFSET(new_entry) + (end - entry->vme_start));
5185         assert(new_entry->vme_start < new_entry->vme_end);
5186
5187         _vm_map_store_entry_link(map_header, entry, new_entry);
5188
5189         if (entry->is_sub_map)
5190                 vm_map_reference(VME_SUBMAP(new_entry));
5191         else
5192                 vm_object_reference(VME_OBJECT(new_entry));
5193 }
5194
5195
5196 /*
5197  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5198  *
5199  *      Asserts that the starting and ending region
5200  *      addresses fall within the valid range of the map.
5201  */
5202 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5203         MACRO_BEGIN                             \
5204         if (start < vm_map_min(map))            \
5205                 start = vm_map_min(map);        \
5206         if (end > vm_map_max(map))              \
5207                 end = vm_map_max(map);          \
5208         if (start > end)                        \
5209                 start = end;                    \
5210         MACRO_END
5211
5212 /*
5213  *      vm_map_range_check:     [ internal use only ]
5214  *
5215  *      Check that the region defined by the specified start and
5216  *      end addresses are wholly contained within a single map
5217  *      entry or set of adjacent map entries of the spacified map,
5218  *      i.e. the specified region contains no unmapped space.
5219  *      If any or all of the region is unmapped, FALSE is returned.
5220  *      Otherwise, TRUE is returned and if the output argument 'entry'
5221  *      is not NULL it points to the map entry containing the start
5222  *      of the region.
5223  *
5224  *      The map is locked for reading on entry and is left locked.
5225  */
5226 static boolean_t
5227 vm_map_range_check(
5228         vm_map_t                map,
5229         vm_map_offset_t         start,
5230         vm_map_offset_t         end,
5231         vm_map_entry_t          *entry)
5232 {
5233         vm_map_entry_t          cur;
5234         vm_map_offset_t         prev;
5235
5236         /*
5237          *      Basic sanity checks first
5238          */
5239         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
5240                 return (FALSE);
5241
5242         /*
5243          *      Check first if the region starts within a valid
5244          *      mapping for the map.
5245          */
5246         if (!vm_map_lookup_entry(map, start, &cur))
5247                 return (FALSE);
5248
5249         /*
5250          *      Optimize for the case that the region is contained
5251          *      in a single map entry.
5252          */
5253         if (entry != (vm_map_entry_t *) NULL)
5254                 *entry = cur;
5255         if (end <= cur->vme_end)
5256                 return (TRUE);
5257
5258         /*
5259          *      If the region is not wholly contained within a
5260          *      single entry, walk the entries looking for holes.
5261          */
5262         prev = cur->vme_end;
5263         cur = cur->vme_next;
5264         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5265                 if (end <= cur->vme_end)
5266                         return (TRUE);
5267                 prev = cur->vme_end;
5268                 cur = cur->vme_next;
5269         }
5270         return (FALSE);
5271 }
5272
5273 /*
5274  *      vm_map_submap:          [ kernel use only ]
5275  *
5276  *      Mark the given range as handled by a subordinate map.
5277  *
5278  *      This range must have been created with vm_map_find using
5279  *      the vm_submap_object, and no other operations may have been
5280  *      performed on this range prior to calling vm_map_submap.
5281  *
5282  *      Only a limited number of operations can be performed
5283  *      within this rage after calling vm_map_submap:
5284  *              vm_fault
5285  *      [Don't try vm_map_copyin!]
5286  *
5287  *      To remove a submapping, one must first remove the
5288  *      range from the superior map, and then destroy the
5289  *      submap (if desired).  [Better yet, don't try it.]
5290  */
5291 kern_return_t
5292 vm_map_submap(
5293         vm_map_t        map,
5294         vm_map_offset_t start,
5295         vm_map_offset_t end,
5296         vm_map_t        submap,
5297         vm_map_offset_t offset,
5298 #ifdef NO_NESTED_PMAP
5299         __unused
5300 #endif  /* NO_NESTED_PMAP */
5301         boolean_t       use_pmap)
5302 {
5303         vm_map_entry_t          entry;
5304         kern_return_t           result = KERN_INVALID_ARGUMENT;
5305         vm_object_t             object;
5306
5307         vm_map_lock(map);
5308
5309         if (! vm_map_lookup_entry(map, start, &entry)) {
5310                 entry = entry->vme_next;
5311         }
5312
5313         if (entry == vm_map_to_entry(map) ||
5314             entry->is_sub_map) {
5315                 vm_map_unlock(map);
5316                 return KERN_INVALID_ARGUMENT;
5317         }
5318
5319         vm_map_clip_start(map, entry, start);
5320         vm_map_clip_end(map, entry, end);
5321
5322         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5323             (!entry->is_sub_map) &&
5324             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5325             (object->resident_page_count == 0) &&
5326             (object->copy == VM_OBJECT_NULL) &&
5327             (object->shadow == VM_OBJECT_NULL) &&
5328             (!object->pager_created)) {
5329                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5330                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5331                 vm_object_deallocate(object);
5332                 entry->is_sub_map = TRUE;
5333                 entry->use_pmap = FALSE;
5334                 VME_SUBMAP_SET(entry, submap);
5335                 vm_map_reference(submap);
5336                 if (submap->mapped_in_other_pmaps == FALSE &&
5337                     vm_map_pmap(submap) != PMAP_NULL &&
5338                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5339                         /*
5340                          * This submap is being mapped in a map
5341                          * that uses a different pmap.
5342                          * Set its "mapped_in_other_pmaps" flag
5343                          * to indicate that we now need to
5344                          * remove mappings from all pmaps rather
5345                          * than just the submap's pmap.
5346                          */
5347                         submap->mapped_in_other_pmaps = TRUE;
5348                 }
5349
5350 #ifndef NO_NESTED_PMAP
5351                 if (use_pmap) {
5352                         /* nest if platform code will allow */
5353                         if(submap->pmap == NULL) {
5354                                 ledger_t ledger = map->pmap->ledger;
5355                                 submap->pmap = pmap_create(ledger,
5356                                                 (vm_map_size_t) 0, FALSE);
5357                                 if(submap->pmap == PMAP_NULL) {
5358                                         vm_map_unlock(map);
5359                                         return(KERN_NO_SPACE);
5360                                 }
5361 #if     defined(__arm__) || defined(__arm64__)
5362                                 pmap_set_nested(submap->pmap);
5363 #endif
5364                         }
5365                         result = pmap_nest(map->pmap,
5366                                            (VME_SUBMAP(entry))->pmap,
5367                                            (addr64_t)start,
5368                                            (addr64_t)start,
5369                                            (uint64_t)(end - start));
5370                         if(result)
5371                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5372                         entry->use_pmap = TRUE;
5373                 }
5374 #else   /* NO_NESTED_PMAP */
5375                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5376 #endif  /* NO_NESTED_PMAP */
5377                 result = KERN_SUCCESS;
5378         }
5379         vm_map_unlock(map);
5380
5381         return(result);
5382 }
5383
5384 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5385 #include <sys/codesign.h>
5386 extern int proc_selfcsflags(void);
5387 extern int panic_on_unsigned_execute;
5388 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5389
5390 /*
5391  *      vm_map_protect:
5392  *
5393  *      Sets the protection of the specified address
5394  *      region in the target map.  If "set_max" is
5395  *      specified, the maximum protection is to be set;
5396  *      otherwise, only the current protection is affected.
5397  */
5398 kern_return_t
5399 vm_map_protect(
5400         vm_map_t        map,
5401         vm_map_offset_t start,
5402         vm_map_offset_t end,
5403         vm_prot_t       new_prot,
5404         boolean_t       set_max)
5405 {
5406         vm_map_entry_t                  current;
5407         vm_map_offset_t                 prev;
5408         vm_map_entry_t                  entry;
5409         vm_prot_t                       new_max;
5410         int                             pmap_options = 0;
5411         kern_return_t                   kr;
5412
5413         XPR(XPR_VM_MAP,
5414             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
5415             map, start, end, new_prot, set_max);
5416
5417         if (new_prot & VM_PROT_COPY) {
5418                 vm_map_offset_t         new_start;
5419                 vm_prot_t               cur_prot, max_prot;
5420                 vm_map_kernel_flags_t   kflags;
5421
5422                 /* LP64todo - see below */
5423                 if (start >= map->max_offset) {
5424                         return KERN_INVALID_ADDRESS;
5425                 }
5426
5427                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5428                 kflags.vmkf_remap_prot_copy = TRUE;
5429                 new_start = start;
5430                 kr = vm_map_remap(map,
5431                                   &new_start,
5432                                   end - start,
5433                                   0, /* mask */
5434                                   VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5435                                   kflags,
5436                                   0,
5437                                   map,
5438                                   start,
5439                                   TRUE, /* copy-on-write remapping! */
5440                                   &cur_prot,
5441                                   &max_prot,
5442                                   VM_INHERIT_DEFAULT);
5443                 if (kr != KERN_SUCCESS) {
5444                         return kr;
5445                 }
5446                 new_prot &= ~VM_PROT_COPY;
5447         }
5448
5449         vm_map_lock(map);
5450
5451         /* LP64todo - remove this check when vm_map_commpage64()
5452          * no longer has to stuff in a map_entry for the commpage
5453          * above the map's max_offset.
5454          */
5455         if (start >= map->max_offset) {
5456                 vm_map_unlock(map);
5457                 return(KERN_INVALID_ADDRESS);
5458         }
5459
5460         while(1) {
5461                 /*
5462                  *      Lookup the entry.  If it doesn't start in a valid
5463                  *      entry, return an error.
5464                  */
5465                 if (! vm_map_lookup_entry(map, start, &entry)) {
5466                         vm_map_unlock(map);
5467                         return(KERN_INVALID_ADDRESS);
5468                 }
5469
5470                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
5471                         start = SUPERPAGE_ROUND_DOWN(start);
5472                         continue;
5473                 }
5474                 break;
5475         }
5476         if (entry->superpage_size)
5477                 end = SUPERPAGE_ROUND_UP(end);
5478
5479         /*
5480          *      Make a first pass to check for protection and address
5481          *      violations.
5482          */
5483
5484         current = entry;
5485         prev = current->vme_start;
5486         while ((current != vm_map_to_entry(map)) &&
5487                (current->vme_start < end)) {
5488
5489                 /*
5490                  * If there is a hole, return an error.
5491                  */
5492                 if (current->vme_start != prev) {
5493                         vm_map_unlock(map);
5494                         return(KERN_INVALID_ADDRESS);
5495                 }
5496
5497                 new_max = current->max_protection;
5498                 if ((new_prot & new_max) != new_prot) {
5499                         vm_map_unlock(map);
5500                         return(KERN_PROTECTION_FAILURE);
5501                 }
5502
5503 #if CONFIG_EMBEDDED
5504                 if (new_prot & VM_PROT_WRITE) {
5505                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
5506                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
5507                                 new_prot &= ~VM_PROT_EXECUTE;
5508                         }
5509                 }
5510 #endif
5511
5512                 /*
5513                  * If the task has requested executable lockdown,
5514                  * deny both:
5515                  * - adding executable protections OR
5516                  * - adding write protections to an existing executable mapping.
5517                  */
5518                 if (map->map_disallow_new_exec == TRUE) {
5519                         if ((new_prot & VM_PROT_EXECUTE) ||
5520                             ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5521                                 vm_map_unlock(map);
5522                                 return(KERN_PROTECTION_FAILURE);
5523                         }
5524                 }
5525
5526                 prev = current->vme_end;
5527                 current = current->vme_next;
5528         }
5529
5530 #if __arm64__
5531         if (end > prev &&
5532             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5533                 vm_map_entry_t prev_entry;
5534
5535                 prev_entry = current->vme_prev;
5536                 if (prev_entry != vm_map_to_entry(map) &&
5537                     !prev_entry->map_aligned &&
5538                     (vm_map_round_page(prev_entry->vme_end,
5539                                        VM_MAP_PAGE_MASK(map))
5540                      == end)) {
5541                         /*
5542                          * The last entry in our range is not "map-aligned"
5543                          * but it would have reached all the way to "end"
5544                          * if it had been map-aligned, so this is not really
5545                          * a hole in the range and we can proceed.
5546                          */
5547                         prev = end;
5548                 }
5549         }
5550 #endif /* __arm64__ */
5551
5552         if (end > prev) {
5553                 vm_map_unlock(map);
5554                 return(KERN_INVALID_ADDRESS);
5555         }
5556
5557         /*
5558          *      Go back and fix up protections.
5559          *      Clip to start here if the range starts within
5560          *      the entry.
5561          */
5562
5563         current = entry;
5564         if (current != vm_map_to_entry(map)) {
5565                 /* clip and unnest if necessary */
5566                 vm_map_clip_start(map, current, start);
5567         }
5568
5569         while ((current != vm_map_to_entry(map)) &&
5570                (current->vme_start < end)) {
5571
5572                 vm_prot_t       old_prot;
5573
5574                 vm_map_clip_end(map, current, end);
5575
5576                 if (current->is_sub_map) {
5577                         /* clipping did unnest if needed */
5578                         assert(!current->use_pmap);
5579                 }
5580
5581                 old_prot = current->protection;
5582
5583                 if (set_max) {
5584                         current->max_protection = new_prot;
5585                         current->protection = new_prot & old_prot;
5586                 } else {
5587                         current->protection = new_prot;
5588                 }
5589
5590                 /*
5591                  *      Update physical map if necessary.
5592                  *      If the request is to turn off write protection,
5593                  *      we won't do it for real (in pmap). This is because
5594                  *      it would cause copy-on-write to fail.  We've already
5595                  *      set, the new protection in the map, so if a
5596                  *      write-protect fault occurred, it will be fixed up
5597                  *      properly, COW or not.
5598                  */
5599                 if (current->protection != old_prot) {
5600                         /* Look one level in we support nested pmaps */
5601                         /* from mapped submaps which are direct entries */
5602                         /* in our map */
5603
5604                         vm_prot_t prot;
5605
5606                         prot = current->protection;
5607                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5608                                 prot &= ~VM_PROT_WRITE;
5609                         } else {
5610                                 assert(!VME_OBJECT(current)->code_signed);
5611                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5612                         }
5613
5614                         if (override_nx(map, VME_ALIAS(current)) && prot)
5615                                 prot |= VM_PROT_EXECUTE;
5616
5617 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5618                         if (!(old_prot & VM_PROT_EXECUTE) &&
5619                             (prot & VM_PROT_EXECUTE) &&
5620                             (proc_selfcsflags() & CS_KILL) &&
5621                             panic_on_unsigned_execute) {
5622                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5623                         }
5624 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5625
5626                         if (pmap_has_prot_policy(prot)) {
5627                                 if (current->wired_count) {
5628                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5629                                               map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5630                                 }
5631
5632                                 /* If the pmap layer cares about this
5633                                  * protection type, force a fault for
5634                                  * each page so that vm_fault will
5635                                  * repopulate the page with the full
5636                                  * set of protections.
5637                                  */
5638                                 /*
5639                                  * TODO: We don't seem to need this,
5640                                  * but this is due to an internal
5641                                  * implementation detail of
5642                                  * pmap_protect.  Do we want to rely
5643                                  * on this?
5644                                  */
5645                                 prot = VM_PROT_NONE;
5646                         }
5647
5648                         if (current->is_sub_map && current->use_pmap) {
5649                                 pmap_protect(VME_SUBMAP(current)->pmap,
5650                                              current->vme_start,
5651                                              current->vme_end,
5652                                              prot);
5653                         } else {
5654                                 if (prot & VM_PROT_WRITE) {
5655                                         if (VME_OBJECT(current) == compressor_object) {
5656                                                 /*
5657                                                  * For write requests on the
5658                                                  * compressor, we wil ask the
5659                                                  * pmap layer to prevent us from
5660                                                  * taking a write fault when we
5661                                                  * attempt to access the mapping
5662                                                  * next.
5663                                                  */
5664                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5665                                         }
5666                                 }
5667
5668                                 pmap_protect_options(map->pmap,
5669                                                      current->vme_start,
5670                                                      current->vme_end,
5671                                                      prot,
5672                                                      pmap_options,
5673                                                      NULL);
5674                         }
5675                 }
5676                 current = current->vme_next;
5677         }
5678
5679         current = entry;
5680         while ((current != vm_map_to_entry(map)) &&
5681                (current->vme_start <= end)) {
5682                 vm_map_simplify_entry(map, current);
5683                 current = current->vme_next;
5684         }
5685
5686         vm_map_unlock(map);
5687         return(KERN_SUCCESS);
5688 }
5689
5690 /*
5691  *      vm_map_inherit:
5692  *
5693  *      Sets the inheritance of the specified address
5694  *      range in the target map.  Inheritance
5695  *      affects how the map will be shared with
5696  *      child maps at the time of vm_map_fork.
5697  */
5698 kern_return_t
5699 vm_map_inherit(
5700         vm_map_t        map,
5701         vm_map_offset_t start,
5702         vm_map_offset_t end,
5703         vm_inherit_t    new_inheritance)
5704 {
5705         vm_map_entry_t  entry;
5706         vm_map_entry_t  temp_entry;
5707
5708         vm_map_lock(map);
5709
5710         VM_MAP_RANGE_CHECK(map, start, end);
5711
5712         if (vm_map_lookup_entry(map, start, &temp_entry)) {
5713                 entry = temp_entry;
5714         }
5715         else {
5716                 temp_entry = temp_entry->vme_next;
5717                 entry = temp_entry;
5718         }
5719
5720         /* first check entire range for submaps which can't support the */
5721         /* given inheritance. */
5722         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5723                 if(entry->is_sub_map) {
5724                         if(new_inheritance == VM_INHERIT_COPY) {
5725                                 vm_map_unlock(map);
5726                                 return(KERN_INVALID_ARGUMENT);
5727                         }
5728                 }
5729
5730                 entry = entry->vme_next;
5731         }
5732
5733         entry = temp_entry;
5734         if (entry != vm_map_to_entry(map)) {
5735                 /* clip and unnest if necessary */
5736                 vm_map_clip_start(map, entry, start);
5737         }
5738
5739         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5740                 vm_map_clip_end(map, entry, end);
5741                 if (entry->is_sub_map) {
5742                         /* clip did unnest if needed */
5743                         assert(!entry->use_pmap);
5744                 }
5745
5746                 entry->inheritance = new_inheritance;
5747
5748                 entry = entry->vme_next;
5749         }
5750
5751         vm_map_unlock(map);
5752         return(KERN_SUCCESS);
5753 }
5754
5755 /*
5756  * Update the accounting for the amount of wired memory in this map.  If the user has
5757  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
5758  */
5759
5760 static kern_return_t
5761 add_wire_counts(
5762         vm_map_t        map,
5763         vm_map_entry_t  entry,
5764         boolean_t       user_wire)
5765 {
5766         vm_map_size_t   size;
5767
5768         if (user_wire) {
5769                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
5770
5771                 /*
5772                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
5773                  * this map entry.
5774                  */
5775
5776                 if (entry->user_wired_count == 0) {
5777                         size = entry->vme_end - entry->vme_start;
5778
5779                         /*
5780                          * Since this is the first time the user is wiring this map entry, check to see if we're
5781                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
5782                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
5783                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
5784                          * limit, then we fail.
5785                          */
5786
5787                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
5788                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
5789                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
5790                                 return KERN_RESOURCE_SHORTAGE;
5791
5792                         /*
5793                          * The first time the user wires an entry, we also increment the wired_count and add this to
5794                          * the total that has been wired in the map.
5795                          */
5796
5797                         if (entry->wired_count >= MAX_WIRE_COUNT)
5798                                 return KERN_FAILURE;
5799
5800                         entry->wired_count++;
5801                         map->user_wire_size += size;
5802                 }
5803
5804                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
5805                         return KERN_FAILURE;
5806
5807                 entry->user_wired_count++;
5808
5809         } else {
5810
5811                 /*
5812                  * The kernel's wiring the memory.  Just bump the count and continue.
5813                  */
5814
5815                 if (entry->wired_count >= MAX_WIRE_COUNT)
5816                         panic("vm_map_wire: too many wirings");
5817
5818                 entry->wired_count++;
5819         }
5820
5821         return KERN_SUCCESS;
5822 }
5823
5824 /*
5825  * Update the memory wiring accounting now that the given map entry is being unwired.
5826  */
5827
5828 static void
5829 subtract_wire_counts(
5830         vm_map_t        map,
5831         vm_map_entry_t  entry,
5832         boolean_t       user_wire)
5833 {
5834
5835         if (user_wire) {
5836
5837                 /*
5838                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
5839                  */
5840
5841                 if (entry->user_wired_count == 1) {
5842
5843                         /*
5844                          * We're removing the last user wire reference.  Decrement the wired_count and the total
5845                          * user wired memory for this map.
5846                          */
5847
5848                         assert(entry->wired_count >= 1);
5849                         entry->wired_count--;
5850                         map->user_wire_size -= entry->vme_end - entry->vme_start;
5851                 }
5852
5853                 assert(entry->user_wired_count >= 1);
5854                 entry->user_wired_count--;
5855
5856         } else {
5857
5858                 /*
5859                  * The kernel is unwiring the memory.   Just update the count.
5860                  */
5861
5862                 assert(entry->wired_count >= 1);
5863                 entry->wired_count--;
5864         }
5865 }
5866
5867 #if CONFIG_EMBEDDED
5868 int cs_executable_wire = 0;
5869 #endif /* CONFIG_EMBEDDED */
5870
5871 /*
5872  *      vm_map_wire:
5873  *
5874  *      Sets the pageability of the specified address range in the
5875  *      target map as wired.  Regions specified as not pageable require
5876  *      locked-down physical memory and physical page maps.  The
5877  *      access_type variable indicates types of accesses that must not
5878  *      generate page faults.  This is checked against protection of
5879  *      memory being locked-down.
5880  *
5881  *      The map must not be locked, but a reference must remain to the
5882  *      map throughout the call.
5883  */
5884 static kern_return_t
5885 vm_map_wire_nested(
5886         vm_map_t                map,
5887         vm_map_offset_t         start,
5888         vm_map_offset_t         end,
5889         vm_prot_t               caller_prot,
5890         vm_tag_t                tag,
5891         boolean_t               user_wire,
5892         pmap_t                  map_pmap,
5893         vm_map_offset_t         pmap_addr,
5894         ppnum_t                 *physpage_p)
5895 {
5896         vm_map_entry_t          entry;
5897         vm_prot_t               access_type;
5898         struct vm_map_entry     *first_entry, tmp_entry;
5899         vm_map_t                real_map;
5900         vm_map_offset_t         s,e;
5901         kern_return_t           rc;
5902         boolean_t               need_wakeup;
5903         boolean_t               main_map = FALSE;
5904         wait_interrupt_t        interruptible_state;
5905         thread_t                cur_thread;
5906         unsigned int            last_timestamp;
5907         vm_map_size_t           size;
5908         boolean_t               wire_and_extract;
5909
5910         access_type = (caller_prot & VM_PROT_ALL);
5911
5912         wire_and_extract = FALSE;
5913         if (physpage_p != NULL) {
5914                 /*
5915                  * The caller wants the physical page number of the
5916                  * wired page.  We return only one physical page number
5917                  * so this works for only one page at a time.
5918                  */
5919                 if ((end - start) != PAGE_SIZE) {
5920                         return KERN_INVALID_ARGUMENT;
5921                 }
5922                 wire_and_extract = TRUE;
5923                 *physpage_p = 0;
5924         }
5925
5926         vm_map_lock(map);
5927         if(map_pmap == NULL)
5928                 main_map = TRUE;
5929         last_timestamp = map->timestamp;
5930
5931         VM_MAP_RANGE_CHECK(map, start, end);
5932         assert(page_aligned(start));
5933         assert(page_aligned(end));
5934         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5935         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5936         if (start == end) {
5937                 /* We wired what the caller asked for, zero pages */
5938                 vm_map_unlock(map);
5939                 return KERN_SUCCESS;
5940         }
5941
5942         need_wakeup = FALSE;
5943         cur_thread = current_thread();
5944
5945         s = start;
5946         rc = KERN_SUCCESS;
5947
5948         if (vm_map_lookup_entry(map, s, &first_entry)) {
5949                 entry = first_entry;
5950                 /*
5951                  * vm_map_clip_start will be done later.
5952                  * We don't want to unnest any nested submaps here !
5953                  */
5954         } else {
5955                 /* Start address is not in map */
5956                 rc = KERN_INVALID_ADDRESS;
5957                 goto done;
5958         }
5959
5960         while ((entry != vm_map_to_entry(map)) && (s < end)) {
5961                 /*
5962                  * At this point, we have wired from "start" to "s".
5963                  * We still need to wire from "s" to "end".
5964                  *
5965                  * "entry" hasn't been clipped, so it could start before "s"
5966                  * and/or end after "end".
5967                  */
5968
5969                 /* "e" is how far we want to wire in this entry */
5970                 e = entry->vme_end;
5971                 if (e > end)
5972                         e = end;
5973
5974                 /*
5975                  * If another thread is wiring/unwiring this entry then
5976                  * block after informing other thread to wake us up.
5977                  */
5978                 if (entry->in_transition) {
5979                         wait_result_t wait_result;
5980
5981                         /*
5982                          * We have not clipped the entry.  Make sure that
5983                          * the start address is in range so that the lookup
5984                          * below will succeed.
5985                          * "s" is the current starting point: we've already
5986                          * wired from "start" to "s" and we still have
5987                          * to wire from "s" to "end".
5988                          */
5989
5990                         entry->needs_wakeup = TRUE;
5991
5992                         /*
5993                          * wake up anybody waiting on entries that we have
5994                          * already wired.
5995                          */
5996                         if (need_wakeup) {
5997                                 vm_map_entry_wakeup(map);
5998                                 need_wakeup = FALSE;
5999                         }
6000                         /*
6001                          * User wiring is interruptible
6002                          */
6003                         wait_result = vm_map_entry_wait(map,
6004                                                         (user_wire) ? THREAD_ABORTSAFE :
6005                                                         THREAD_UNINT);
6006                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
6007                                 /*
6008                                  * undo the wirings we have done so far
6009                                  * We do not clear the needs_wakeup flag,
6010                                  * because we cannot tell if we were the
6011                                  * only one waiting.
6012                                  */
6013                                 rc = KERN_FAILURE;
6014                                 goto done;
6015                         }
6016
6017                         /*
6018                          * Cannot avoid a lookup here. reset timestamp.
6019                          */
6020                         last_timestamp = map->timestamp;
6021
6022                         /*
6023                          * The entry could have been clipped, look it up again.
6024                          * Worse that can happen is, it may not exist anymore.
6025                          */
6026                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6027                                 /*
6028                                  * User: undo everything upto the previous
6029                                  * entry.  let vm_map_unwire worry about
6030                                  * checking the validity of the range.
6031                                  */
6032                                 rc = KERN_FAILURE;
6033                                 goto done;
6034                         }
6035                         entry = first_entry;
6036                         continue;
6037                 }
6038
6039                 if (entry->is_sub_map) {
6040                         vm_map_offset_t sub_start;
6041                         vm_map_offset_t sub_end;
6042                         vm_map_offset_t local_start;
6043                         vm_map_offset_t local_end;
6044                         pmap_t          pmap;
6045
6046                         if (wire_and_extract) {
6047                                 /*
6048                                  * Wiring would result in copy-on-write
6049                                  * which would not be compatible with
6050                                  * the sharing we have with the original
6051                                  * provider of this memory.
6052                                  */
6053                                 rc = KERN_INVALID_ARGUMENT;
6054                                 goto done;
6055                         }
6056
6057                         vm_map_clip_start(map, entry, s);
6058                         vm_map_clip_end(map, entry, end);
6059
6060                         sub_start = VME_OFFSET(entry);
6061                         sub_end = entry->vme_end;
6062                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6063
6064                         local_end = entry->vme_end;
6065                         if(map_pmap == NULL) {
6066                                 vm_object_t             object;
6067                                 vm_object_offset_t      offset;
6068                                 vm_prot_t               prot;
6069                                 boolean_t               wired;
6070                                 vm_map_entry_t          local_entry;
6071                                 vm_map_version_t         version;
6072                                 vm_map_t                lookup_map;
6073
6074                                 if(entry->use_pmap) {
6075                                         pmap = VME_SUBMAP(entry)->pmap;
6076                                         /* ppc implementation requires that */
6077                                         /* submaps pmap address ranges line */
6078                                         /* up with parent map */
6079 #ifdef notdef
6080                                         pmap_addr = sub_start;
6081 #endif
6082                                         pmap_addr = s;
6083                                 } else {
6084                                         pmap = map->pmap;
6085                                         pmap_addr = s;
6086                                 }
6087
6088                                 if (entry->wired_count) {
6089                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6090                                                 goto done;
6091
6092                                         /*
6093                                          * The map was not unlocked:
6094                                          * no need to goto re-lookup.
6095                                          * Just go directly to next entry.
6096                                          */
6097                                         entry = entry->vme_next;
6098                                         s = entry->vme_start;
6099                                         continue;
6100
6101                                 }
6102
6103                                 /* call vm_map_lookup_locked to */
6104                                 /* cause any needs copy to be   */
6105                                 /* evaluated */
6106                                 local_start = entry->vme_start;
6107                                 lookup_map = map;
6108                                 vm_map_lock_write_to_read(map);
6109                                 if(vm_map_lookup_locked(
6110                                            &lookup_map, local_start,
6111                                            access_type | VM_PROT_COPY,
6112                                            OBJECT_LOCK_EXCLUSIVE,
6113                                            &version, &object,
6114                                            &offset, &prot, &wired,
6115                                            NULL,
6116                                            &real_map)) {
6117
6118                                         vm_map_unlock_read(lookup_map);
6119                                         assert(map_pmap == NULL);
6120                                         vm_map_unwire(map, start,
6121                                                       s, user_wire);
6122                                         return(KERN_FAILURE);
6123                                 }
6124                                 vm_object_unlock(object);
6125                                 if(real_map != lookup_map)
6126                                         vm_map_unlock(real_map);
6127                                 vm_map_unlock_read(lookup_map);
6128                                 vm_map_lock(map);
6129
6130                                 /* we unlocked, so must re-lookup */
6131                                 if (!vm_map_lookup_entry(map,
6132                                                          local_start,
6133                                                          &local_entry)) {
6134                                         rc = KERN_FAILURE;
6135                                         goto done;
6136                                 }
6137
6138                                 /*
6139                                  * entry could have been "simplified",
6140                                  * so re-clip
6141                                  */
6142                                 entry = local_entry;
6143                                 assert(s == local_start);
6144                                 vm_map_clip_start(map, entry, s);
6145                                 vm_map_clip_end(map, entry, end);
6146                                 /* re-compute "e" */
6147                                 e = entry->vme_end;
6148                                 if (e > end)
6149                                         e = end;
6150
6151                                 /* did we have a change of type? */
6152                                 if (!entry->is_sub_map) {
6153                                         last_timestamp = map->timestamp;
6154                                         continue;
6155                                 }
6156                         } else {
6157                                 local_start = entry->vme_start;
6158                                 pmap = map_pmap;
6159                         }
6160
6161                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6162                                 goto done;
6163
6164                         entry->in_transition = TRUE;
6165
6166                         vm_map_unlock(map);
6167                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6168                                                 sub_start, sub_end,
6169                                                 caller_prot, tag,
6170                                                 user_wire, pmap, pmap_addr,
6171                                                 NULL);
6172                         vm_map_lock(map);
6173
6174                         /*
6175                          * Find the entry again.  It could have been clipped
6176                          * after we unlocked the map.
6177                          */
6178                         if (!vm_map_lookup_entry(map, local_start,
6179                                                  &first_entry))
6180                                 panic("vm_map_wire: re-lookup failed");
6181                         entry = first_entry;
6182
6183                         assert(local_start == s);
6184                         /* re-compute "e" */
6185                         e = entry->vme_end;
6186                         if (e > end)
6187                                 e = end;
6188
6189                         last_timestamp = map->timestamp;
6190                         while ((entry != vm_map_to_entry(map)) &&
6191                                (entry->vme_start < e)) {
6192                                 assert(entry->in_transition);
6193                                 entry->in_transition = FALSE;
6194                                 if (entry->needs_wakeup) {
6195                                         entry->needs_wakeup = FALSE;
6196                                         need_wakeup = TRUE;
6197                                 }
6198                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6199                                         subtract_wire_counts(map, entry, user_wire);
6200                                 }
6201                                 entry = entry->vme_next;
6202                         }
6203                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6204                                 goto done;
6205                         }
6206
6207                         /* no need to relookup again */
6208                         s = entry->vme_start;
6209                         continue;
6210                 }
6211
6212                 /*
6213                  * If this entry is already wired then increment
6214                  * the appropriate wire reference count.
6215                  */
6216                 if (entry->wired_count) {
6217
6218                         if ((entry->protection & access_type) != access_type) {
6219                                 /* found a protection problem */
6220
6221                                 /*
6222                                  * XXX FBDP
6223                                  * We should always return an error
6224                                  * in this case but since we didn't
6225                                  * enforce it before, let's do
6226                                  * it only for the new "wire_and_extract"
6227                                  * code path for now...
6228                                  */
6229                                 if (wire_and_extract) {
6230                                         rc = KERN_PROTECTION_FAILURE;
6231                                         goto done;
6232                                 }
6233                         }
6234
6235                         /*
6236                          * entry is already wired down, get our reference
6237                          * after clipping to our range.
6238                          */
6239                         vm_map_clip_start(map, entry, s);
6240                         vm_map_clip_end(map, entry, end);
6241
6242                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6243                                 goto done;
6244
6245                         if (wire_and_extract) {
6246                                 vm_object_t             object;
6247                                 vm_object_offset_t      offset;
6248                                 vm_page_t               m;
6249
6250                                 /*
6251                                  * We don't have to "wire" the page again
6252                                  * bit we still have to "extract" its
6253                                  * physical page number, after some sanity
6254                                  * checks.
6255                                  */
6256                                 assert((entry->vme_end - entry->vme_start)
6257                                        == PAGE_SIZE);
6258                                 assert(!entry->needs_copy);
6259                                 assert(!entry->is_sub_map);
6260                                 assert(VME_OBJECT(entry));
6261                                 if (((entry->vme_end - entry->vme_start)
6262                                      != PAGE_SIZE) ||
6263                                     entry->needs_copy ||
6264                                     entry->is_sub_map ||
6265                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6266                                         rc = KERN_INVALID_ARGUMENT;
6267                                         goto done;
6268                                 }
6269
6270                                 object = VME_OBJECT(entry);
6271                                 offset = VME_OFFSET(entry);
6272                                 /* need exclusive lock to update m->dirty */
6273                                 if (entry->protection & VM_PROT_WRITE) {
6274                                         vm_object_lock(object);
6275                                 } else {
6276                                         vm_object_lock_shared(object);
6277                                 }
6278                                 m = vm_page_lookup(object, offset);
6279                                 assert(m != VM_PAGE_NULL);
6280                                 assert(VM_PAGE_WIRED(m));
6281                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6282                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6283                                         if (entry->protection & VM_PROT_WRITE) {
6284                                                 vm_object_lock_assert_exclusive(
6285                                                         object);
6286                                                 m->dirty = TRUE;
6287                                         }
6288                                 } else {
6289                                         /* not already wired !? */
6290                                         *physpage_p = 0;
6291                                 }
6292                                 vm_object_unlock(object);
6293                         }
6294
6295                         /* map was not unlocked: no need to relookup */
6296                         entry = entry->vme_next;
6297                         s = entry->vme_start;
6298                         continue;
6299                 }
6300
6301                 /*
6302                  * Unwired entry or wire request transmitted via submap
6303                  */
6304
6305 #if CONFIG_EMBEDDED
6306                 /*
6307                  * Wiring would copy the pages to the shadow object.
6308                  * The shadow object would not be code-signed so
6309                  * attempting to execute code from these copied pages
6310                  * would trigger a code-signing violation.
6311                  */
6312                 if (entry->protection & VM_PROT_EXECUTE) {
6313 #if MACH_ASSERT
6314                         printf("pid %d[%s] wiring executable range from "
6315                                "0x%llx to 0x%llx: rejected to preserve "
6316                                "code-signing\n",
6317                                proc_selfpid(),
6318                                (current_task()->bsd_info
6319                                 ? proc_name_address(current_task()->bsd_info)
6320                                 : "?"),
6321                                (uint64_t) entry->vme_start,
6322                                (uint64_t) entry->vme_end);
6323 #endif /* MACH_ASSERT */
6324                         DTRACE_VM2(cs_executable_wire,
6325                                    uint64_t, (uint64_t)entry->vme_start,
6326                                    uint64_t, (uint64_t)entry->vme_end);
6327                         cs_executable_wire++;
6328                         rc = KERN_PROTECTION_FAILURE;
6329                         goto done;
6330                 }
6331 #endif /* CONFIG_EMBEDDED */
6332
6333
6334                 /*
6335                  * Perform actions of vm_map_lookup that need the write
6336                  * lock on the map: create a shadow object for a
6337                  * copy-on-write region, or an object for a zero-fill
6338                  * region.
6339                  */
6340                 size = entry->vme_end - entry->vme_start;
6341                 /*
6342                  * If wiring a copy-on-write page, we need to copy it now
6343                  * even if we're only (currently) requesting read access.
6344                  * This is aggressive, but once it's wired we can't move it.
6345                  */
6346                 if (entry->needs_copy) {
6347                         if (wire_and_extract) {
6348                                 /*
6349                                  * We're supposed to share with the original
6350                                  * provider so should not be "needs_copy"
6351                                  */
6352                                 rc = KERN_INVALID_ARGUMENT;
6353                                 goto done;
6354                         }
6355
6356                         VME_OBJECT_SHADOW(entry, size);
6357                         entry->needs_copy = FALSE;
6358                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6359                         if (wire_and_extract) {
6360                                 /*
6361                                  * We're supposed to share with the original
6362                                  * provider so should already have an object.
6363                                  */
6364                                 rc = KERN_INVALID_ARGUMENT;
6365                                 goto done;
6366                         }
6367                         VME_OBJECT_SET(entry, vm_object_allocate(size));
6368                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6369                         assert(entry->use_pmap);
6370                 }
6371
6372                 vm_map_clip_start(map, entry, s);
6373                 vm_map_clip_end(map, entry, end);
6374
6375                 /* re-compute "e" */
6376                 e = entry->vme_end;
6377                 if (e > end)
6378                         e = end;
6379
6380                 /*
6381                  * Check for holes and protection mismatch.
6382                  * Holes: Next entry should be contiguous unless this
6383                  *        is the end of the region.
6384                  * Protection: Access requested must be allowed, unless
6385                  *      wiring is by protection class
6386                  */
6387                 if ((entry->vme_end < end) &&
6388                     ((entry->vme_next == vm_map_to_entry(map)) ||
6389                      (entry->vme_next->vme_start > entry->vme_end))) {
6390                         /* found a hole */
6391                         rc = KERN_INVALID_ADDRESS;
6392                         goto done;
6393                 }
6394                 if ((entry->protection & access_type) != access_type) {
6395                         /* found a protection problem */
6396                         rc = KERN_PROTECTION_FAILURE;
6397                         goto done;
6398                 }
6399
6400                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6401
6402                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6403                         goto done;
6404
6405                 entry->in_transition = TRUE;
6406
6407                 /*
6408                  * This entry might get split once we unlock the map.
6409                  * In vm_fault_wire(), we need the current range as
6410                  * defined by this entry.  In order for this to work
6411                  * along with a simultaneous clip operation, we make a
6412                  * temporary copy of this entry and use that for the
6413                  * wiring.  Note that the underlying objects do not
6414                  * change during a clip.
6415                  */
6416                 tmp_entry = *entry;
6417
6418                 /*
6419                  * The in_transition state guarentees that the entry
6420                  * (or entries for this range, if split occured) will be
6421                  * there when the map lock is acquired for the second time.
6422                  */
6423                 vm_map_unlock(map);
6424
6425                 if (!user_wire && cur_thread != THREAD_NULL)
6426                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
6427                 else
6428                         interruptible_state = THREAD_UNINT;
6429
6430                 if(map_pmap)
6431                         rc = vm_fault_wire(map,
6432                                            &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6433                                            physpage_p);
6434                 else
6435                         rc = vm_fault_wire(map,
6436                                            &tmp_entry, caller_prot, tag, map->pmap,
6437                                            tmp_entry.vme_start,
6438                                            physpage_p);
6439
6440                 if (!user_wire && cur_thread != THREAD_NULL)
6441                         thread_interrupt_level(interruptible_state);
6442
6443                 vm_map_lock(map);
6444
6445                 if (last_timestamp+1 != map->timestamp) {
6446                         /*
6447                          * Find the entry again.  It could have been clipped
6448                          * after we unlocked the map.
6449                          */
6450                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6451                                                  &first_entry))
6452                                 panic("vm_map_wire: re-lookup failed");
6453
6454                         entry = first_entry;
6455                 }
6456
6457                 last_timestamp = map->timestamp;
6458
6459                 while ((entry != vm_map_to_entry(map)) &&
6460                        (entry->vme_start < tmp_entry.vme_end)) {
6461                         assert(entry->in_transition);
6462                         entry->in_transition = FALSE;
6463                         if (entry->needs_wakeup) {
6464                                 entry->needs_wakeup = FALSE;
6465                                 need_wakeup = TRUE;
6466                         }
6467                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6468                                 subtract_wire_counts(map, entry, user_wire);
6469                         }
6470                         entry = entry->vme_next;
6471                 }
6472
6473                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
6474                         goto done;
6475                 }
6476
6477                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6478                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
6479                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6480                         /* found a "new" hole */
6481                         s = tmp_entry.vme_end;
6482                         rc = KERN_INVALID_ADDRESS;
6483                         goto done;
6484                 }
6485
6486                 s = entry->vme_start;
6487
6488         } /* end while loop through map entries */
6489
6490 done:
6491         if (rc == KERN_SUCCESS) {
6492                 /* repair any damage we may have made to the VM map */
6493                 vm_map_simplify_range(map, start, end);
6494         }
6495
6496         vm_map_unlock(map);
6497
6498         /*
6499          * wake up anybody waiting on entries we wired.
6500          */
6501         if (need_wakeup)
6502                 vm_map_entry_wakeup(map);
6503
6504         if (rc != KERN_SUCCESS) {
6505                 /* undo what has been wired so far */
6506                 vm_map_unwire_nested(map, start, s, user_wire,
6507                                      map_pmap, pmap_addr);
6508                 if (physpage_p) {
6509                         *physpage_p = 0;
6510                 }
6511         }
6512
6513         return rc;
6514
6515 }
6516
6517 kern_return_t
6518 vm_map_wire_external(
6519         vm_map_t                map,
6520         vm_map_offset_t         start,
6521         vm_map_offset_t         end,
6522         vm_prot_t               caller_prot,
6523         boolean_t               user_wire)
6524 {
6525         kern_return_t   kret;
6526
6527         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6528                                   user_wire, (pmap_t)NULL, 0, NULL);
6529         return kret;
6530 }
6531
6532 kern_return_t
6533 vm_map_wire_kernel(
6534         vm_map_t                map,
6535         vm_map_offset_t         start,
6536         vm_map_offset_t         end,
6537         vm_prot_t               caller_prot,
6538         vm_tag_t                tag,
6539         boolean_t               user_wire)
6540 {
6541         kern_return_t   kret;
6542
6543         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6544                                   user_wire, (pmap_t)NULL, 0, NULL);
6545         return kret;
6546 }
6547
6548 kern_return_t
6549 vm_map_wire_and_extract_external(
6550         vm_map_t        map,
6551         vm_map_offset_t start,
6552         vm_prot_t       caller_prot,
6553         boolean_t       user_wire,
6554         ppnum_t         *physpage_p)
6555 {
6556         kern_return_t   kret;
6557
6558         kret = vm_map_wire_nested(map,
6559                                   start,
6560                                   start+VM_MAP_PAGE_SIZE(map),
6561                                   caller_prot,
6562                                   vm_tag_bt(),
6563                                   user_wire,
6564                                   (pmap_t)NULL,
6565                                   0,
6566                                   physpage_p);
6567         if (kret != KERN_SUCCESS &&
6568             physpage_p != NULL) {
6569                 *physpage_p = 0;
6570         }
6571         return kret;
6572 }
6573
6574 kern_return_t
6575 vm_map_wire_and_extract_kernel(
6576         vm_map_t        map,
6577         vm_map_offset_t start,
6578         vm_prot_t       caller_prot,
6579         vm_tag_t        tag,
6580         boolean_t       user_wire,
6581         ppnum_t         *physpage_p)
6582 {
6583         kern_return_t   kret;
6584
6585         kret = vm_map_wire_nested(map,
6586                                   start,
6587                                   start+VM_MAP_PAGE_SIZE(map),
6588                                   caller_prot,
6589                                   tag,
6590                                   user_wire,
6591                                   (pmap_t)NULL,
6592                                   0,
6593                                   physpage_p);
6594         if (kret != KERN_SUCCESS &&
6595             physpage_p != NULL) {
6596                 *physpage_p = 0;
6597         }
6598         return kret;
6599 }
6600
6601 /*
6602  *      vm_map_unwire:
6603  *
6604  *      Sets the pageability of the specified address range in the target
6605  *      as pageable.  Regions specified must have been wired previously.
6606  *
6607  *      The map must not be locked, but a reference must remain to the map
6608  *      throughout the call.
6609  *
6610  *      Kernel will panic on failures.  User unwire ignores holes and
6611  *      unwired and intransition entries to avoid losing memory by leaving
6612  *      it unwired.
6613  */
6614 static kern_return_t
6615 vm_map_unwire_nested(
6616         vm_map_t                map,
6617         vm_map_offset_t         start,
6618         vm_map_offset_t         end,
6619         boolean_t               user_wire,
6620         pmap_t                  map_pmap,
6621         vm_map_offset_t         pmap_addr)
6622 {
6623         vm_map_entry_t          entry;
6624         struct vm_map_entry     *first_entry, tmp_entry;
6625         boolean_t               need_wakeup;
6626         boolean_t               main_map = FALSE;
6627         unsigned int            last_timestamp;
6628
6629         vm_map_lock(map);
6630         if(map_pmap == NULL)
6631                 main_map = TRUE;
6632         last_timestamp = map->timestamp;
6633
6634         VM_MAP_RANGE_CHECK(map, start, end);
6635         assert(page_aligned(start));
6636         assert(page_aligned(end));
6637         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6638         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6639
6640         if (start == end) {
6641                 /* We unwired what the caller asked for: zero pages */
6642                 vm_map_unlock(map);
6643                 return KERN_SUCCESS;
6644         }
6645
6646         if (vm_map_lookup_entry(map, start, &first_entry)) {
6647                 entry = first_entry;
6648                 /*
6649                  * vm_map_clip_start will be done later.
6650                  * We don't want to unnest any nested sub maps here !
6651                  */
6652         }
6653         else {
6654                 if (!user_wire) {
6655                         panic("vm_map_unwire: start not found");
6656                 }
6657                 /*      Start address is not in map. */
6658                 vm_map_unlock(map);
6659                 return(KERN_INVALID_ADDRESS);
6660         }
6661
6662         if (entry->superpage_size) {
6663                 /* superpages are always wired */
6664                 vm_map_unlock(map);
6665                 return KERN_INVALID_ADDRESS;
6666         }
6667
6668         need_wakeup = FALSE;
6669         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6670                 if (entry->in_transition) {
6671                         /*
6672                          * 1)
6673                          * Another thread is wiring down this entry. Note
6674                          * that if it is not for the other thread we would
6675                          * be unwiring an unwired entry.  This is not
6676                          * permitted.  If we wait, we will be unwiring memory
6677                          * we did not wire.
6678                          *
6679                          * 2)
6680                          * Another thread is unwiring this entry.  We did not
6681                          * have a reference to it, because if we did, this
6682                          * entry will not be getting unwired now.
6683                          */
6684                         if (!user_wire) {
6685                                 /*
6686                                  * XXX FBDP
6687                                  * This could happen:  there could be some
6688                                  * overlapping vslock/vsunlock operations
6689                                  * going on.
6690                                  * We should probably just wait and retry,
6691                                  * but then we have to be careful that this
6692                                  * entry could get "simplified" after
6693                                  * "in_transition" gets unset and before
6694                                  * we re-lookup the entry, so we would
6695                                  * have to re-clip the entry to avoid
6696                                  * re-unwiring what we have already unwired...
6697                                  * See vm_map_wire_nested().
6698                                  *
6699                                  * Or we could just ignore "in_transition"
6700                                  * here and proceed to decement the wired
6701                                  * count(s) on this entry.  That should be fine
6702                                  * as long as "wired_count" doesn't drop all
6703                                  * the way to 0 (and we should panic if THAT
6704                                  * happens).
6705                                  */
6706                                 panic("vm_map_unwire: in_transition entry");
6707                         }
6708
6709                         entry = entry->vme_next;
6710                         continue;
6711                 }
6712
6713                 if (entry->is_sub_map) {
6714                         vm_map_offset_t sub_start;
6715                         vm_map_offset_t sub_end;
6716                         vm_map_offset_t local_end;
6717                         pmap_t          pmap;
6718
6719                         vm_map_clip_start(map, entry, start);
6720                         vm_map_clip_end(map, entry, end);
6721
6722                         sub_start = VME_OFFSET(entry);
6723                         sub_end = entry->vme_end - entry->vme_start;
6724                         sub_end += VME_OFFSET(entry);
6725                         local_end = entry->vme_end;
6726                         if(map_pmap == NULL) {
6727                                 if(entry->use_pmap) {
6728                                         pmap = VME_SUBMAP(entry)->pmap;
6729                                         pmap_addr = sub_start;
6730                                 } else {
6731                                         pmap = map->pmap;
6732                                         pmap_addr = start;
6733                                 }
6734                                 if (entry->wired_count == 0 ||
6735                                     (user_wire && entry->user_wired_count == 0)) {
6736                                         if (!user_wire)
6737                                                 panic("vm_map_unwire: entry is unwired");
6738                                         entry = entry->vme_next;
6739                                         continue;
6740                                 }
6741
6742                                 /*
6743                                  * Check for holes
6744                                  * Holes: Next entry should be contiguous unless
6745                                  * this is the end of the region.
6746                                  */
6747                                 if (((entry->vme_end < end) &&
6748                                      ((entry->vme_next == vm_map_to_entry(map)) ||
6749                                       (entry->vme_next->vme_start
6750                                        > entry->vme_end)))) {
6751                                         if (!user_wire)
6752                                                 panic("vm_map_unwire: non-contiguous region");
6753 /*
6754                                         entry = entry->vme_next;
6755                                         continue;
6756 */
6757                                 }
6758
6759                                 subtract_wire_counts(map, entry, user_wire);
6760
6761                                 if (entry->wired_count != 0) {
6762                                         entry = entry->vme_next;
6763                                         continue;
6764                                 }
6765
6766                                 entry->in_transition = TRUE;
6767                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
6768
6769                                 /*
6770                                  * We can unlock the map now. The in_transition state
6771                                  * guarantees existance of the entry.
6772                                  */
6773                                 vm_map_unlock(map);
6774                                 vm_map_unwire_nested(VME_SUBMAP(entry),
6775                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
6776                                 vm_map_lock(map);
6777
6778                                 if (last_timestamp+1 != map->timestamp) {
6779                                         /*
6780                                          * Find the entry again.  It could have been
6781                                          * clipped or deleted after we unlocked the map.
6782                                          */
6783                                         if (!vm_map_lookup_entry(map,
6784                                                                  tmp_entry.vme_start,
6785                                                                  &first_entry)) {
6786                                                 if (!user_wire)
6787                                                         panic("vm_map_unwire: re-lookup failed");
6788                                                 entry = first_entry->vme_next;
6789                                         } else
6790                                                 entry = first_entry;
6791                                 }
6792                                 last_timestamp = map->timestamp;
6793
6794                                 /*
6795                                  * clear transition bit for all constituent entries
6796                                  * that were in the original entry (saved in
6797                                  * tmp_entry).  Also check for waiters.
6798                                  */
6799                                 while ((entry != vm_map_to_entry(map)) &&
6800                                        (entry->vme_start < tmp_entry.vme_end)) {
6801                                         assert(entry->in_transition);
6802                                         entry->in_transition = FALSE;
6803                                         if (entry->needs_wakeup) {
6804                                                 entry->needs_wakeup = FALSE;
6805                                                 need_wakeup = TRUE;
6806                                         }
6807                                         entry = entry->vme_next;
6808                                 }
6809                                 continue;
6810                         } else {
6811                                 vm_map_unlock(map);
6812                                 vm_map_unwire_nested(VME_SUBMAP(entry),
6813                                                      sub_start, sub_end, user_wire, map_pmap,
6814                                                      pmap_addr);
6815                                 vm_map_lock(map);
6816
6817                                 if (last_timestamp+1 != map->timestamp) {
6818                                         /*
6819                                          * Find the entry again.  It could have been
6820                                          * clipped or deleted after we unlocked the map.
6821                                          */
6822                                         if (!vm_map_lookup_entry(map,
6823                                                                  tmp_entry.vme_start,
6824                                                                  &first_entry)) {
6825                                                 if (!user_wire)
6826                                                         panic("vm_map_unwire: re-lookup failed");
6827                                                 entry = first_entry->vme_next;
6828                                         } else
6829                                                 entry = first_entry;
6830                                 }
6831                                 last_timestamp = map->timestamp;
6832                         }
6833                 }
6834
6835
6836                 if ((entry->wired_count == 0) ||
6837                     (user_wire && entry->user_wired_count == 0)) {
6838                         if (!user_wire)
6839                                 panic("vm_map_unwire: entry is unwired");
6840
6841                         entry = entry->vme_next;
6842                         continue;
6843                 }
6844
6845                 assert(entry->wired_count > 0 &&
6846                        (!user_wire || entry->user_wired_count > 0));
6847
6848                 vm_map_clip_start(map, entry, start);
6849                 vm_map_clip_end(map, entry, end);
6850
6851                 /*
6852                  * Check for holes
6853                  * Holes: Next entry should be contiguous unless
6854                  *        this is the end of the region.
6855                  */
6856                 if (((entry->vme_end < end) &&
6857                      ((entry->vme_next == vm_map_to_entry(map)) ||
6858                       (entry->vme_next->vme_start > entry->vme_end)))) {
6859
6860                         if (!user_wire)
6861                                 panic("vm_map_unwire: non-contiguous region");
6862                         entry = entry->vme_next;
6863                         continue;
6864                 }
6865
6866                 subtract_wire_counts(map, entry, user_wire);
6867
6868                 if (entry->wired_count != 0) {
6869                         entry = entry->vme_next;
6870                         continue;
6871                 }
6872
6873                 if(entry->zero_wired_pages) {
6874                         entry->zero_wired_pages = FALSE;
6875                 }
6876
6877                 entry->in_transition = TRUE;
6878                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
6879
6880                 /*
6881                  * We can unlock the map now. The in_transition state
6882                  * guarantees existance of the entry.
6883                  */
6884                 vm_map_unlock(map);
6885                 if(map_pmap) {
6886                         vm_fault_unwire(map,
6887                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
6888                 } else {
6889                         vm_fault_unwire(map,
6890                                         &tmp_entry, FALSE, map->pmap,
6891                                         tmp_entry.vme_start);
6892                 }
6893                 vm_map_lock(map);
6894
6895                 if (last_timestamp+1 != map->timestamp) {
6896                         /*
6897                          * Find the entry again.  It could have been clipped
6898                          * or deleted after we unlocked the map.
6899                          */
6900                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6901                                                  &first_entry)) {
6902                                 if (!user_wire)
6903                                         panic("vm_map_unwire: re-lookup failed");
6904                                 entry = first_entry->vme_next;
6905                         } else
6906                                 entry = first_entry;
6907                 }
6908                 last_timestamp = map->timestamp;
6909
6910                 /*
6911                  * clear transition bit for all constituent entries that
6912                  * were in the original entry (saved in tmp_entry).  Also
6913                  * check for waiters.
6914                  */
6915                 while ((entry != vm_map_to_entry(map)) &&
6916                        (entry->vme_start < tmp_entry.vme_end)) {
6917                         assert(entry->in_transition);
6918                         entry->in_transition = FALSE;
6919                         if (entry->needs_wakeup) {
6920                                 entry->needs_wakeup = FALSE;
6921                                 need_wakeup = TRUE;
6922                         }
6923                         entry = entry->vme_next;
6924                 }
6925         }
6926
6927         /*
6928          * We might have fragmented the address space when we wired this
6929          * range of addresses.  Attempt to re-coalesce these VM map entries
6930          * with their neighbors now that they're no longer wired.
6931          * Under some circumstances, address space fragmentation can
6932          * prevent VM object shadow chain collapsing, which can cause
6933          * swap space leaks.
6934          */
6935         vm_map_simplify_range(map, start, end);
6936
6937         vm_map_unlock(map);
6938         /*
6939          * wake up anybody waiting on entries that we have unwired.
6940          */
6941         if (need_wakeup)
6942                 vm_map_entry_wakeup(map);
6943         return(KERN_SUCCESS);
6944
6945 }
6946
6947 kern_return_t
6948 vm_map_unwire(
6949         vm_map_t                map,
6950         vm_map_offset_t         start,
6951         vm_map_offset_t         end,
6952         boolean_t               user_wire)
6953 {
6954         return vm_map_unwire_nested(map, start, end,
6955                                     user_wire, (pmap_t)NULL, 0);
6956 }
6957
6958
6959 /*
6960  *      vm_map_entry_delete:    [ internal use only ]
6961  *
6962  *      Deallocate the given entry from the target map.
6963  */
6964 static void
6965 vm_map_entry_delete(
6966         vm_map_t        map,
6967         vm_map_entry_t  entry)
6968 {
6969         vm_map_offset_t s, e;
6970         vm_object_t     object;
6971         vm_map_t        submap;
6972
6973         s = entry->vme_start;
6974         e = entry->vme_end;
6975         assert(page_aligned(s));
6976         assert(page_aligned(e));
6977         if (entry->map_aligned == TRUE) {
6978                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
6979                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
6980         }
6981         assert(entry->wired_count == 0);
6982         assert(entry->user_wired_count == 0);
6983         assert(!entry->permanent);
6984
6985         if (entry->is_sub_map) {
6986                 object = NULL;
6987                 submap = VME_SUBMAP(entry);
6988         } else {
6989                 submap = NULL;
6990                 object = VME_OBJECT(entry);
6991         }
6992
6993         vm_map_store_entry_unlink(map, entry);
6994         map->size -= e - s;
6995
6996         vm_map_entry_dispose(map, entry);
6997
6998         vm_map_unlock(map);
6999         /*
7000          *      Deallocate the object only after removing all
7001          *      pmap entries pointing to its pages.
7002          */
7003         if (submap)
7004                 vm_map_deallocate(submap);
7005         else
7006                 vm_object_deallocate(object);
7007
7008 }
7009
7010 void
7011 vm_map_submap_pmap_clean(
7012         vm_map_t        map,
7013         vm_map_offset_t start,
7014         vm_map_offset_t end,
7015         vm_map_t        sub_map,
7016         vm_map_offset_t offset)
7017 {
7018         vm_map_offset_t submap_start;
7019         vm_map_offset_t submap_end;
7020         vm_map_size_t   remove_size;
7021         vm_map_entry_t  entry;
7022
7023         submap_end = offset + (end - start);
7024         submap_start = offset;
7025
7026         vm_map_lock_read(sub_map);
7027         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
7028
7029                 remove_size = (entry->vme_end - entry->vme_start);
7030                 if(offset > entry->vme_start)
7031                         remove_size -= offset - entry->vme_start;
7032
7033
7034                 if(submap_end < entry->vme_end) {
7035                         remove_size -=
7036                                 entry->vme_end - submap_end;
7037                 }
7038                 if(entry->is_sub_map) {
7039                         vm_map_submap_pmap_clean(
7040                                 sub_map,
7041                                 start,
7042                                 start + remove_size,
7043                                 VME_SUBMAP(entry),
7044                                 VME_OFFSET(entry));
7045                 } else {
7046
7047                         if((map->mapped_in_other_pmaps) && (map->ref_count)
7048                            && (VME_OBJECT(entry) != NULL)) {
7049                                 vm_object_pmap_protect_options(
7050                                         VME_OBJECT(entry),
7051                                         (VME_OFFSET(entry) +
7052                                          offset -
7053                                          entry->vme_start),
7054                                         remove_size,
7055                                         PMAP_NULL,
7056                                         entry->vme_start,
7057                                         VM_PROT_NONE,
7058                                         PMAP_OPTIONS_REMOVE);
7059                         } else {
7060                                 pmap_remove(map->pmap,
7061                                             (addr64_t)start,
7062                                             (addr64_t)(start + remove_size));
7063                         }
7064                 }
7065         }
7066
7067         entry = entry->vme_next;
7068
7069         while((entry != vm_map_to_entry(sub_map))
7070               && (entry->vme_start < submap_end)) {
7071                 remove_size = (entry->vme_end - entry->vme_start);
7072                 if(submap_end < entry->vme_end) {
7073                         remove_size -= entry->vme_end - submap_end;
7074                 }
7075                 if(entry->is_sub_map) {
7076                         vm_map_submap_pmap_clean(
7077                                 sub_map,
7078                                 (start + entry->vme_start) - offset,
7079                                 ((start + entry->vme_start) - offset) + remove_size,
7080                                 VME_SUBMAP(entry),
7081                                 VME_OFFSET(entry));
7082                 } else {
7083                         if((map->mapped_in_other_pmaps) && (map->ref_count)
7084                            && (VME_OBJECT(entry) != NULL)) {
7085                                 vm_object_pmap_protect_options(
7086                                         VME_OBJECT(entry),
7087                                         VME_OFFSET(entry),
7088                                         remove_size,
7089                                         PMAP_NULL,
7090                                         entry->vme_start,
7091                                         VM_PROT_NONE,
7092                                         PMAP_OPTIONS_REMOVE);
7093                         } else {
7094                                 pmap_remove(map->pmap,
7095                                             (addr64_t)((start + entry->vme_start)
7096                                                        - offset),
7097                                             (addr64_t)(((start + entry->vme_start)
7098                                                         - offset) + remove_size));
7099                         }
7100                 }
7101                 entry = entry->vme_next;
7102         }
7103         vm_map_unlock_read(sub_map);
7104         return;
7105 }
7106
7107 /*
7108  *      vm_map_delete:  [ internal use only ]
7109  *
7110  *      Deallocates the given address range from the target map.
7111  *      Removes all user wirings. Unwires one kernel wiring if
7112  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7113  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7114  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7115  *
7116  *      This routine is called with map locked and leaves map locked.
7117  */
7118 static kern_return_t
7119 vm_map_delete(
7120         vm_map_t                map,
7121         vm_map_offset_t         start,
7122         vm_map_offset_t         end,
7123         int                     flags,
7124         vm_map_t                zap_map)
7125 {
7126         vm_map_entry_t          entry, next;
7127         struct   vm_map_entry   *first_entry, tmp_entry;
7128         vm_map_offset_t         s;
7129         vm_object_t             object;
7130         boolean_t               need_wakeup;
7131         unsigned int            last_timestamp = ~0; /* unlikely value */
7132         int                     interruptible;
7133
7134         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7135                 THREAD_ABORTSAFE : THREAD_UNINT;
7136
7137         /*
7138          * All our DMA I/O operations in IOKit are currently done by
7139          * wiring through the map entries of the task requesting the I/O.
7140          * Because of this, we must always wait for kernel wirings
7141          * to go away on the entries before deleting them.
7142          *
7143          * Any caller who wants to actually remove a kernel wiring
7144          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7145          * properly remove one wiring instead of blasting through
7146          * them all.
7147          */
7148         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7149
7150         while(1) {
7151                 /*
7152                  *      Find the start of the region, and clip it
7153                  */
7154                 if (vm_map_lookup_entry(map, start, &first_entry)) {
7155                         entry = first_entry;
7156                         if (map == kalloc_map &&
7157                             (entry->vme_start != start ||
7158                              entry->vme_end != end)) {
7159                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7160                                       "mismatched entry %p [0x%llx:0x%llx]\n",
7161                                       map,
7162                                       (uint64_t)start,
7163                                       (uint64_t)end,
7164                                       entry,
7165                                       (uint64_t)entry->vme_start,
7166                                       (uint64_t)entry->vme_end);
7167                         }
7168                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
7169                                 start = SUPERPAGE_ROUND_DOWN(start);
7170                                 continue;
7171                         }
7172                         if (start == entry->vme_start) {
7173                                 /*
7174                                  * No need to clip.  We don't want to cause
7175                                  * any unnecessary unnesting in this case...
7176                                  */
7177                         } else {
7178                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7179                                     entry->map_aligned &&
7180                                     !VM_MAP_PAGE_ALIGNED(
7181                                             start,
7182                                             VM_MAP_PAGE_MASK(map))) {
7183                                         /*
7184                                          * The entry will no longer be
7185                                          * map-aligned after clipping
7186                                          * and the caller said it's OK.
7187                                          */
7188                                         entry->map_aligned = FALSE;
7189                                 }
7190                                 if (map == kalloc_map) {
7191                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
7192                                               " clipping %p at 0x%llx\n",
7193                                               map,
7194                                               (uint64_t)start,
7195                                               (uint64_t)end,
7196                                               entry,
7197                                               (uint64_t)start);
7198                                 }
7199                                 vm_map_clip_start(map, entry, start);
7200                         }
7201
7202                         /*
7203                          *      Fix the lookup hint now, rather than each
7204                          *      time through the loop.
7205                          */
7206                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7207                 } else {
7208                         if (map->pmap == kernel_pmap &&
7209                             map->ref_count != 0) {
7210                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7211                                       "no map entry at 0x%llx\n",
7212                                       map,
7213                                       (uint64_t)start,
7214                                       (uint64_t)end,
7215                                       (uint64_t)start);
7216                         }
7217                         entry = first_entry->vme_next;
7218                 }
7219                 break;
7220         }
7221         if (entry->superpage_size)
7222                 end = SUPERPAGE_ROUND_UP(end);
7223
7224         need_wakeup = FALSE;
7225         /*
7226          *      Step through all entries in this region
7227          */
7228         s = entry->vme_start;
7229         while ((entry != vm_map_to_entry(map)) && (s < end)) {
7230                 /*
7231                  * At this point, we have deleted all the memory entries
7232                  * between "start" and "s".  We still need to delete
7233                  * all memory entries between "s" and "end".
7234                  * While we were blocked and the map was unlocked, some
7235                  * new memory entries could have been re-allocated between
7236                  * "start" and "s" and we don't want to mess with those.
7237                  * Some of those entries could even have been re-assembled
7238                  * with an entry after "s" (in vm_map_simplify_entry()), so
7239                  * we may have to vm_map_clip_start() again.
7240                  */
7241
7242                 if (entry->vme_start >= s) {
7243                         /*
7244                          * This entry starts on or after "s"
7245                          * so no need to clip its start.
7246                          */
7247                 } else {
7248                         /*
7249                          * This entry has been re-assembled by a
7250                          * vm_map_simplify_entry().  We need to
7251                          * re-clip its start.
7252                          */
7253                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7254                             entry->map_aligned &&
7255                             !VM_MAP_PAGE_ALIGNED(s,
7256                                                  VM_MAP_PAGE_MASK(map))) {
7257                                 /*
7258                                  * The entry will no longer be map-aligned
7259                                  * after clipping and the caller said it's OK.
7260                                  */
7261                                 entry->map_aligned = FALSE;
7262                         }
7263                         if (map == kalloc_map) {
7264                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7265                                       "clipping %p at 0x%llx\n",
7266                                       map,
7267                                       (uint64_t)start,
7268                                       (uint64_t)end,
7269                                       entry,
7270                                       (uint64_t)s);
7271                         }
7272                         vm_map_clip_start(map, entry, s);
7273                 }
7274                 if (entry->vme_end <= end) {
7275                         /*
7276                          * This entry is going away completely, so no need
7277                          * to clip and possibly cause an unnecessary unnesting.
7278                          */
7279                 } else {
7280                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7281                             entry->map_aligned &&
7282                             !VM_MAP_PAGE_ALIGNED(end,
7283                                                  VM_MAP_PAGE_MASK(map))) {
7284                                 /*
7285                                  * The entry will no longer be map-aligned
7286                                  * after clipping and the caller said it's OK.
7287                                  */
7288                                 entry->map_aligned = FALSE;
7289                         }
7290                         if (map == kalloc_map) {
7291                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7292                                       "clipping %p at 0x%llx\n",
7293                                       map,
7294                                       (uint64_t)start,
7295                                       (uint64_t)end,
7296                                       entry,
7297                                       (uint64_t)end);
7298                         }
7299                         vm_map_clip_end(map, entry, end);
7300                 }
7301
7302                 if (entry->permanent) {
7303                         if (map->pmap == kernel_pmap) {
7304                                 panic("%s(%p,0x%llx,0x%llx): "
7305                                       "attempt to remove permanent "
7306                                       "VM map entry "
7307                                       "%p [0x%llx:0x%llx]\n",
7308                                       __FUNCTION__,
7309                                       map,
7310                                       (uint64_t) start,
7311                                       (uint64_t) end,
7312                                       entry,
7313                                       (uint64_t) entry->vme_start,
7314                                       (uint64_t) entry->vme_end);
7315                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7316 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7317                                 entry->permanent = FALSE;
7318                         } else {
7319                                 if (!vm_map_executable_immutable_no_log) {
7320                                         printf("%d[%s] %s(0x%llx,0x%llx): "
7321                                                    "permanent entry [0x%llx:0x%llx] "
7322                                                    "prot 0x%x/0x%x\n",
7323                                                    proc_selfpid(),
7324                                                    (current_task()->bsd_info
7325                                                         ? proc_name_address(current_task()->bsd_info)
7326                                                         : "?"),
7327                                                    __FUNCTION__,
7328                                                    (uint64_t) start,
7329                                                    (uint64_t) end,
7330                                                    (uint64_t)entry->vme_start,
7331                                                    (uint64_t)entry->vme_end,
7332                                                    entry->protection,
7333                                                    entry->max_protection);
7334                                 }
7335                                 /*
7336                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7337                                  */
7338                                 DTRACE_VM5(vm_map_delete_permanent,
7339                                            vm_map_offset_t, entry->vme_start,
7340                                            vm_map_offset_t, entry->vme_end,
7341                                            vm_prot_t, entry->protection,
7342                                            vm_prot_t, entry->max_protection,
7343                                            int, VME_ALIAS(entry));
7344                         }
7345                 }
7346
7347
7348                 if (entry->in_transition) {
7349                         wait_result_t wait_result;
7350
7351                         /*
7352                          * Another thread is wiring/unwiring this entry.
7353                          * Let the other thread know we are waiting.
7354                          */
7355                         assert(s == entry->vme_start);
7356                         entry->needs_wakeup = TRUE;
7357
7358                         /*
7359                          * wake up anybody waiting on entries that we have
7360                          * already unwired/deleted.
7361                          */
7362                         if (need_wakeup) {
7363                                 vm_map_entry_wakeup(map);
7364                                 need_wakeup = FALSE;
7365                         }
7366
7367                         wait_result = vm_map_entry_wait(map, interruptible);
7368
7369                         if (interruptible &&
7370                             wait_result == THREAD_INTERRUPTED) {
7371                                 /*
7372                                  * We do not clear the needs_wakeup flag,
7373                                  * since we cannot tell if we were the only one.
7374                                  */
7375                                 return KERN_ABORTED;
7376                         }
7377
7378                         /*
7379                          * The entry could have been clipped or it
7380                          * may not exist anymore.  Look it up again.
7381                          */
7382                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
7383                                 /*
7384                                  * User: use the next entry
7385                                  */
7386                                 entry = first_entry->vme_next;
7387                                 s = entry->vme_start;
7388                         } else {
7389                                 entry = first_entry;
7390                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7391                         }
7392                         last_timestamp = map->timestamp;
7393                         continue;
7394                 } /* end in_transition */
7395
7396                 if (entry->wired_count) {
7397                         boolean_t       user_wire;
7398
7399                         user_wire = entry->user_wired_count > 0;
7400
7401                         /*
7402                          *      Remove a kernel wiring if requested
7403                          */
7404                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
7405                                 entry->wired_count--;
7406                         }
7407
7408                         /*
7409                          *      Remove all user wirings for proper accounting
7410                          */
7411                         if (entry->user_wired_count > 0) {
7412                                 while (entry->user_wired_count)
7413                                         subtract_wire_counts(map, entry, user_wire);
7414                         }
7415
7416                         if (entry->wired_count != 0) {
7417                                 assert(map != kernel_map);
7418                                 /*
7419                                  * Cannot continue.  Typical case is when
7420                                  * a user thread has physical io pending on
7421                                  * on this page.  Either wait for the
7422                                  * kernel wiring to go away or return an
7423                                  * error.
7424                                  */
7425                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7426                                         wait_result_t wait_result;
7427
7428                                         assert(s == entry->vme_start);
7429                                         entry->needs_wakeup = TRUE;
7430                                         wait_result = vm_map_entry_wait(map,
7431                                                                         interruptible);
7432
7433                                         if (interruptible &&
7434                                             wait_result == THREAD_INTERRUPTED) {
7435                                                 /*
7436                                                  * We do not clear the
7437                                                  * needs_wakeup flag, since we
7438                                                  * cannot tell if we were the
7439                                                  * only one.
7440                                                  */
7441                                                 return KERN_ABORTED;
7442                                         }
7443
7444                                         /*
7445                                          * The entry could have been clipped or
7446                                          * it may not exist anymore.  Look it
7447                                          * up again.
7448                                          */
7449                                         if (!vm_map_lookup_entry(map, s,
7450                                                                  &first_entry)) {
7451                                                 assert(map != kernel_map);
7452                                                 /*
7453                                                  * User: use the next entry
7454                                                  */
7455                                                 entry = first_entry->vme_next;
7456                                                 s = entry->vme_start;
7457                                         } else {
7458                                                 entry = first_entry;
7459                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7460                                         }
7461                                         last_timestamp = map->timestamp;
7462                                         continue;
7463                                 }
7464                                 else {
7465                                         return KERN_FAILURE;
7466                                 }
7467                         }
7468
7469                         entry->in_transition = TRUE;
7470                         /*
7471                          * copy current entry.  see comment in vm_map_wire()
7472                          */
7473                         tmp_entry = *entry;
7474                         assert(s == entry->vme_start);
7475
7476                         /*
7477                          * We can unlock the map now. The in_transition
7478                          * state guarentees existance of the entry.
7479                          */
7480                         vm_map_unlock(map);
7481
7482                         if (tmp_entry.is_sub_map) {
7483                                 vm_map_t sub_map;
7484                                 vm_map_offset_t sub_start, sub_end;
7485                                 pmap_t pmap;
7486                                 vm_map_offset_t pmap_addr;
7487
7488
7489                                 sub_map = VME_SUBMAP(&tmp_entry);
7490                                 sub_start = VME_OFFSET(&tmp_entry);
7491                                 sub_end = sub_start + (tmp_entry.vme_end -
7492                                                        tmp_entry.vme_start);
7493                                 if (tmp_entry.use_pmap) {
7494                                         pmap = sub_map->pmap;
7495                                         pmap_addr = tmp_entry.vme_start;
7496                                 } else {
7497                                         pmap = map->pmap;
7498                                         pmap_addr = tmp_entry.vme_start;
7499                                 }
7500                                 (void) vm_map_unwire_nested(sub_map,
7501                                                             sub_start, sub_end,
7502                                                             user_wire,
7503                                                             pmap, pmap_addr);
7504                         } else {
7505
7506                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
7507                                         pmap_protect_options(
7508                                                 map->pmap,
7509                                                 tmp_entry.vme_start,
7510                                                 tmp_entry.vme_end,
7511                                                 VM_PROT_NONE,
7512                                                 PMAP_OPTIONS_REMOVE,
7513                                                 NULL);
7514                                 }
7515                                 vm_fault_unwire(map, &tmp_entry,
7516                                                 VME_OBJECT(&tmp_entry) == kernel_object,
7517                                                 map->pmap, tmp_entry.vme_start);
7518                         }
7519
7520                         vm_map_lock(map);
7521
7522                         if (last_timestamp+1 != map->timestamp) {
7523                                 /*
7524                                  * Find the entry again.  It could have
7525                                  * been clipped after we unlocked the map.
7526                                  */
7527                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
7528                                         assert((map != kernel_map) &&
7529                                                (!entry->is_sub_map));
7530                                         first_entry = first_entry->vme_next;
7531                                         s = first_entry->vme_start;
7532                                 } else {
7533                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7534                                 }
7535                         } else {
7536                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7537                                 first_entry = entry;
7538                         }
7539
7540                         last_timestamp = map->timestamp;
7541
7542                         entry = first_entry;
7543                         while ((entry != vm_map_to_entry(map)) &&
7544                                (entry->vme_start < tmp_entry.vme_end)) {
7545                                 assert(entry->in_transition);
7546                                 entry->in_transition = FALSE;
7547                                 if (entry->needs_wakeup) {
7548                                         entry->needs_wakeup = FALSE;
7549                                         need_wakeup = TRUE;
7550                                 }
7551                                 entry = entry->vme_next;
7552                         }
7553                         /*
7554                          * We have unwired the entry(s).  Go back and
7555                          * delete them.
7556                          */
7557                         entry = first_entry;
7558                         continue;
7559                 }
7560
7561                 /* entry is unwired */
7562                 assert(entry->wired_count == 0);
7563                 assert(entry->user_wired_count == 0);
7564
7565                 assert(s == entry->vme_start);
7566
7567                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
7568                         /*
7569                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7570                          * vm_map_delete(), some map entries might have been
7571                          * transferred to a "zap_map", which doesn't have a
7572                          * pmap.  The original pmap has already been flushed
7573                          * in the vm_map_delete() call targeting the original
7574                          * map, but when we get to destroying the "zap_map",
7575                          * we don't have any pmap to flush, so let's just skip
7576                          * all this.
7577                          */
7578                 } else if (entry->is_sub_map) {
7579                         if (entry->use_pmap) {
7580 #ifndef NO_NESTED_PMAP
7581                                 int pmap_flags;
7582
7583                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
7584                                         /*
7585                                          * This is the final cleanup of the
7586                                          * address space being terminated.
7587                                          * No new mappings are expected and
7588                                          * we don't really need to unnest the
7589                                          * shared region (and lose the "global"
7590                                          * pmap mappings, if applicable).
7591                                          *
7592                                          * Tell the pmap layer that we're
7593                                          * "clean" wrt nesting.
7594                                          */
7595                                         pmap_flags = PMAP_UNNEST_CLEAN;
7596                                 } else {
7597                                         /*
7598                                          * We're unmapping part of the nested
7599                                          * shared region, so we can't keep the
7600                                          * nested pmap.
7601                                          */
7602                                         pmap_flags = 0;
7603                                 }
7604                                 pmap_unnest_options(
7605                                         map->pmap,
7606                                         (addr64_t)entry->vme_start,
7607                                         entry->vme_end - entry->vme_start,
7608                                         pmap_flags);
7609 #endif  /* NO_NESTED_PMAP */
7610                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7611                                         /* clean up parent map/maps */
7612                                         vm_map_submap_pmap_clean(
7613                                                 map, entry->vme_start,
7614                                                 entry->vme_end,
7615                                                 VME_SUBMAP(entry),
7616                                                 VME_OFFSET(entry));
7617                                 }
7618                         } else {
7619                                 vm_map_submap_pmap_clean(
7620                                         map, entry->vme_start, entry->vme_end,
7621                                         VME_SUBMAP(entry),
7622                                         VME_OFFSET(entry));
7623                         }
7624                 } else if (VME_OBJECT(entry) != kernel_object &&
7625                            VME_OBJECT(entry) != compressor_object) {
7626                         object = VME_OBJECT(entry);
7627                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7628                                 vm_object_pmap_protect_options(
7629                                         object, VME_OFFSET(entry),
7630                                         entry->vme_end - entry->vme_start,
7631                                         PMAP_NULL,
7632                                         entry->vme_start,
7633                                         VM_PROT_NONE,
7634                                         PMAP_OPTIONS_REMOVE);
7635                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
7636                                    (map->pmap == kernel_pmap)) {
7637                                 /* Remove translations associated
7638                                  * with this range unless the entry
7639                                  * does not have an object, or
7640                                  * it's the kernel map or a descendant
7641                                  * since the platform could potentially
7642                                  * create "backdoor" mappings invisible
7643                                  * to the VM. It is expected that
7644                                  * objectless, non-kernel ranges
7645                                  * do not have such VM invisible
7646                                  * translations.
7647                                  */
7648                                 pmap_remove_options(map->pmap,
7649                                                     (addr64_t)entry->vme_start,
7650                                                     (addr64_t)entry->vme_end,
7651                                                     PMAP_OPTIONS_REMOVE);
7652                         }
7653                 }
7654
7655                 if (entry->iokit_acct) {
7656                         /* alternate accounting */
7657                         DTRACE_VM4(vm_map_iokit_unmapped_region,
7658                                    vm_map_t, map,
7659                                    vm_map_offset_t, entry->vme_start,
7660                                    vm_map_offset_t, entry->vme_end,
7661                                    int, VME_ALIAS(entry));
7662                         vm_map_iokit_unmapped_region(map,
7663                                                      (entry->vme_end -
7664                                                       entry->vme_start));
7665                         entry->iokit_acct = FALSE;
7666                         entry->use_pmap = FALSE;
7667                 }
7668
7669                 /*
7670                  * All pmap mappings for this map entry must have been
7671                  * cleared by now.
7672                  */
7673 #if DEBUG
7674                 assert(vm_map_pmap_is_empty(map,
7675                                             entry->vme_start,
7676                                             entry->vme_end));
7677 #endif /* DEBUG */
7678
7679                 next = entry->vme_next;
7680
7681                 if (map->pmap == kernel_pmap &&
7682                     map->ref_count != 0 &&
7683                     entry->vme_end < end &&
7684                     (next == vm_map_to_entry(map) ||
7685                      next->vme_start != entry->vme_end)) {
7686                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
7687                               "hole after %p at 0x%llx\n",
7688                               map,
7689                               (uint64_t)start,
7690                               (uint64_t)end,
7691                               entry,
7692                               (uint64_t)entry->vme_end);
7693                 }
7694
7695                 s = next->vme_start;
7696                 last_timestamp = map->timestamp;
7697
7698                 if (entry->permanent) {
7699                         /*
7700                          * A permanent entry can not be removed, so leave it
7701                          * in place but remove all access permissions.
7702                          */
7703                         entry->protection = VM_PROT_NONE;
7704                         entry->max_protection = VM_PROT_NONE;
7705                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
7706                            zap_map != VM_MAP_NULL) {
7707                         vm_map_size_t entry_size;
7708                         /*
7709                          * The caller wants to save the affected VM map entries
7710                          * into the "zap_map".  The caller will take care of
7711                          * these entries.
7712                          */
7713                         /* unlink the entry from "map" ... */
7714                         vm_map_store_entry_unlink(map, entry);
7715                         /* ... and add it to the end of the "zap_map" */
7716                         vm_map_store_entry_link(zap_map,
7717                                           vm_map_last_entry(zap_map),
7718                                           entry);
7719                         entry_size = entry->vme_end - entry->vme_start;
7720                         map->size -= entry_size;
7721                         zap_map->size += entry_size;
7722                         /* we didn't unlock the map, so no timestamp increase */
7723                         last_timestamp--;
7724                 } else {
7725                         vm_map_entry_delete(map, entry);
7726                         /* vm_map_entry_delete unlocks the map */
7727                         vm_map_lock(map);
7728                 }
7729
7730                 entry = next;
7731
7732                 if(entry == vm_map_to_entry(map)) {
7733                         break;
7734                 }
7735                 if (last_timestamp+1 != map->timestamp) {
7736                         /*
7737                          * we are responsible for deleting everything
7738                          * from the give space, if someone has interfered
7739                          * we pick up where we left off, back fills should
7740                          * be all right for anyone except map_delete and
7741                          * we have to assume that the task has been fully
7742                          * disabled before we get here
7743                          */
7744                         if (!vm_map_lookup_entry(map, s, &entry)){
7745                                 entry = entry->vme_next;
7746                                 s = entry->vme_start;
7747                         } else {
7748                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7749                         }
7750                         /*
7751                          * others can not only allocate behind us, we can
7752                          * also see coalesce while we don't have the map lock
7753                          */
7754                         if(entry == vm_map_to_entry(map)) {
7755                                 break;
7756                         }
7757                 }
7758                 last_timestamp = map->timestamp;
7759         }
7760
7761         if (map->wait_for_space)
7762                 thread_wakeup((event_t) map);
7763         /*
7764          * wake up anybody waiting on entries that we have already deleted.
7765          */
7766         if (need_wakeup)
7767                 vm_map_entry_wakeup(map);
7768
7769         return KERN_SUCCESS;
7770 }
7771
7772 /*
7773  *      vm_map_remove:
7774  *
7775  *      Remove the given address range from the target map.
7776  *      This is the exported form of vm_map_delete.
7777  */
7778 kern_return_t
7779 vm_map_remove(
7780         vm_map_t        map,
7781         vm_map_offset_t start,
7782         vm_map_offset_t end,
7783          boolean_t      flags)
7784 {
7785         kern_return_t   result;
7786
7787         vm_map_lock(map);
7788         VM_MAP_RANGE_CHECK(map, start, end);
7789         /*
7790          * For the zone_map, the kernel controls the allocation/freeing of memory.
7791          * Any free to the zone_map should be within the bounds of the map and
7792          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
7793          * free to the zone_map into a no-op, there is a problem and we should
7794          * panic.
7795          */
7796         if ((map == zone_map) && (start == end))
7797                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
7798         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7799         vm_map_unlock(map);
7800
7801         return(result);
7802 }
7803
7804 /*
7805  *      vm_map_remove_locked:
7806  *
7807  *      Remove the given address range from the target locked map.
7808  *      This is the exported form of vm_map_delete.
7809  */
7810 kern_return_t
7811 vm_map_remove_locked(
7812         vm_map_t        map,
7813         vm_map_offset_t start,
7814         vm_map_offset_t end,
7815         boolean_t       flags)
7816 {
7817         kern_return_t   result;
7818
7819         VM_MAP_RANGE_CHECK(map, start, end);
7820         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7821         return(result);
7822 }
7823
7824
7825 /*
7826  *      Routine:        vm_map_copy_discard
7827  *
7828  *      Description:
7829  *              Dispose of a map copy object (returned by
7830  *              vm_map_copyin).
7831  */
7832 void
7833 vm_map_copy_discard(
7834         vm_map_copy_t   copy)
7835 {
7836         if (copy == VM_MAP_COPY_NULL)
7837                 return;
7838
7839         switch (copy->type) {
7840         case VM_MAP_COPY_ENTRY_LIST:
7841                 while (vm_map_copy_first_entry(copy) !=
7842                        vm_map_copy_to_entry(copy)) {
7843                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
7844
7845                         vm_map_copy_entry_unlink(copy, entry);
7846                         if (entry->is_sub_map) {
7847                                 vm_map_deallocate(VME_SUBMAP(entry));
7848                         } else {
7849                                 vm_object_deallocate(VME_OBJECT(entry));
7850                         }
7851                         vm_map_copy_entry_dispose(copy, entry);
7852                 }
7853                 break;
7854         case VM_MAP_COPY_OBJECT:
7855                 vm_object_deallocate(copy->cpy_object);
7856                 break;
7857         case VM_MAP_COPY_KERNEL_BUFFER:
7858
7859                 /*
7860                  * The vm_map_copy_t and possibly the data buffer were
7861                  * allocated by a single call to kalloc(), i.e. the
7862                  * vm_map_copy_t was not allocated out of the zone.
7863                  */
7864                 if (copy->size > msg_ool_size_small || copy->offset)
7865                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
7866                               (long long)copy->size, (long long)copy->offset);
7867                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
7868                 return;
7869         }
7870         zfree(vm_map_copy_zone, copy);
7871 }
7872
7873 /*
7874  *      Routine:        vm_map_copy_copy
7875  *
7876  *      Description:
7877  *                      Move the information in a map copy object to
7878  *                      a new map copy object, leaving the old one
7879  *                      empty.
7880  *
7881  *                      This is used by kernel routines that need
7882  *                      to look at out-of-line data (in copyin form)
7883  *                      before deciding whether to return SUCCESS.
7884  *                      If the routine returns FAILURE, the original
7885  *                      copy object will be deallocated; therefore,
7886  *                      these routines must make a copy of the copy
7887  *                      object and leave the original empty so that
7888  *                      deallocation will not fail.
7889  */
7890 vm_map_copy_t
7891 vm_map_copy_copy(
7892         vm_map_copy_t   copy)
7893 {
7894         vm_map_copy_t   new_copy;
7895
7896         if (copy == VM_MAP_COPY_NULL)
7897                 return VM_MAP_COPY_NULL;
7898
7899         /*
7900          * Allocate a new copy object, and copy the information
7901          * from the old one into it.
7902          */
7903
7904         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7905         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7906         *new_copy = *copy;
7907
7908         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
7909                 /*
7910                  * The links in the entry chain must be
7911                  * changed to point to the new copy object.
7912                  */
7913                 vm_map_copy_first_entry(copy)->vme_prev
7914                         = vm_map_copy_to_entry(new_copy);
7915                 vm_map_copy_last_entry(copy)->vme_next
7916                         = vm_map_copy_to_entry(new_copy);
7917         }
7918
7919         /*
7920          * Change the old copy object into one that contains
7921          * nothing to be deallocated.
7922          */
7923         copy->type = VM_MAP_COPY_OBJECT;
7924         copy->cpy_object = VM_OBJECT_NULL;
7925
7926         /*
7927          * Return the new object.
7928          */
7929         return new_copy;
7930 }
7931
7932 static kern_return_t
7933 vm_map_overwrite_submap_recurse(
7934         vm_map_t        dst_map,
7935         vm_map_offset_t dst_addr,
7936         vm_map_size_t   dst_size)
7937 {
7938         vm_map_offset_t dst_end;
7939         vm_map_entry_t  tmp_entry;
7940         vm_map_entry_t  entry;
7941         kern_return_t   result;
7942         boolean_t       encountered_sub_map = FALSE;
7943
7944
7945
7946         /*
7947          *      Verify that the destination is all writeable
7948          *      initially.  We have to trunc the destination
7949          *      address and round the copy size or we'll end up
7950          *      splitting entries in strange ways.
7951          */
7952
7953         dst_end = vm_map_round_page(dst_addr + dst_size,
7954                                     VM_MAP_PAGE_MASK(dst_map));
7955         vm_map_lock(dst_map);
7956
7957 start_pass_1:
7958         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7959                 vm_map_unlock(dst_map);
7960                 return(KERN_INVALID_ADDRESS);
7961         }
7962
7963         vm_map_clip_start(dst_map,
7964                           tmp_entry,
7965                           vm_map_trunc_page(dst_addr,
7966                                             VM_MAP_PAGE_MASK(dst_map)));
7967         if (tmp_entry->is_sub_map) {
7968                 /* clipping did unnest if needed */
7969                 assert(!tmp_entry->use_pmap);
7970         }
7971
7972         for (entry = tmp_entry;;) {
7973                 vm_map_entry_t  next;
7974
7975                 next = entry->vme_next;
7976                 while(entry->is_sub_map) {
7977                         vm_map_offset_t sub_start;
7978                         vm_map_offset_t sub_end;
7979                         vm_map_offset_t local_end;
7980
7981                         if (entry->in_transition) {
7982                                 /*
7983                                  * Say that we are waiting, and wait for entry.
7984                                  */
7985                                 entry->needs_wakeup = TRUE;
7986                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7987
7988                                 goto start_pass_1;
7989                         }
7990
7991                         encountered_sub_map = TRUE;
7992                         sub_start = VME_OFFSET(entry);
7993
7994                         if(entry->vme_end < dst_end)
7995                                 sub_end = entry->vme_end;
7996                         else
7997                                 sub_end = dst_end;
7998                         sub_end -= entry->vme_start;
7999                         sub_end += VME_OFFSET(entry);
8000                         local_end = entry->vme_end;
8001                         vm_map_unlock(dst_map);
8002
8003                         result = vm_map_overwrite_submap_recurse(
8004                                 VME_SUBMAP(entry),
8005                                 sub_start,
8006                                 sub_end - sub_start);
8007
8008                         if(result != KERN_SUCCESS)
8009                                 return result;
8010                         if (dst_end <= entry->vme_end)
8011                                 return KERN_SUCCESS;
8012                         vm_map_lock(dst_map);
8013                         if(!vm_map_lookup_entry(dst_map, local_end,
8014                                                 &tmp_entry)) {
8015                                 vm_map_unlock(dst_map);
8016                                 return(KERN_INVALID_ADDRESS);
8017                         }
8018                         entry = tmp_entry;
8019                         next = entry->vme_next;
8020                 }
8021
8022                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8023                         vm_map_unlock(dst_map);
8024                         return(KERN_PROTECTION_FAILURE);
8025                 }
8026
8027                 /*
8028                  *      If the entry is in transition, we must wait
8029                  *      for it to exit that state.  Anything could happen
8030                  *      when we unlock the map, so start over.
8031                  */
8032                 if (entry->in_transition) {
8033
8034                         /*
8035                          * Say that we are waiting, and wait for entry.
8036                          */
8037                         entry->needs_wakeup = TRUE;
8038                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8039
8040                         goto start_pass_1;
8041                 }
8042
8043 /*
8044  *              our range is contained completely within this map entry
8045  */
8046                 if (dst_end <= entry->vme_end) {
8047                         vm_map_unlock(dst_map);
8048                         return KERN_SUCCESS;
8049                 }
8050 /*
8051  *              check that range specified is contiguous region
8052  */
8053                 if ((next == vm_map_to_entry(dst_map)) ||
8054                     (next->vme_start != entry->vme_end)) {
8055                         vm_map_unlock(dst_map);
8056                         return(KERN_INVALID_ADDRESS);
8057                 }
8058
8059                 /*
8060                  *      Check for permanent objects in the destination.
8061                  */
8062                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8063                     ((!VME_OBJECT(entry)->internal) ||
8064                      (VME_OBJECT(entry)->true_share))) {
8065                         if(encountered_sub_map) {
8066                                 vm_map_unlock(dst_map);
8067                                 return(KERN_FAILURE);
8068                         }
8069                 }
8070
8071
8072                 entry = next;
8073         }/* for */
8074         vm_map_unlock(dst_map);
8075         return(KERN_SUCCESS);
8076 }
8077
8078 /*
8079  *      Routine:        vm_map_copy_overwrite
8080  *
8081  *      Description:
8082  *              Copy the memory described by the map copy
8083  *              object (copy; returned by vm_map_copyin) onto
8084  *              the specified destination region (dst_map, dst_addr).
8085  *              The destination must be writeable.
8086  *
8087  *              Unlike vm_map_copyout, this routine actually
8088  *              writes over previously-mapped memory.  If the
8089  *              previous mapping was to a permanent (user-supplied)
8090  *              memory object, it is preserved.
8091  *
8092  *              The attributes (protection and inheritance) of the
8093  *              destination region are preserved.
8094  *
8095  *              If successful, consumes the copy object.
8096  *              Otherwise, the caller is responsible for it.
8097  *
8098  *      Implementation notes:
8099  *              To overwrite aligned temporary virtual memory, it is
8100  *              sufficient to remove the previous mapping and insert
8101  *              the new copy.  This replacement is done either on
8102  *              the whole region (if no permanent virtual memory
8103  *              objects are embedded in the destination region) or
8104  *              in individual map entries.
8105  *
8106  *              To overwrite permanent virtual memory , it is necessary
8107  *              to copy each page, as the external memory management
8108  *              interface currently does not provide any optimizations.
8109  *
8110  *              Unaligned memory also has to be copied.  It is possible
8111  *              to use 'vm_trickery' to copy the aligned data.  This is
8112  *              not done but not hard to implement.
8113  *
8114  *              Once a page of permanent memory has been overwritten,
8115  *              it is impossible to interrupt this function; otherwise,
8116  *              the call would be neither atomic nor location-independent.
8117  *              The kernel-state portion of a user thread must be
8118  *              interruptible.
8119  *
8120  *              It may be expensive to forward all requests that might
8121  *              overwrite permanent memory (vm_write, vm_copy) to
8122  *              uninterruptible kernel threads.  This routine may be
8123  *              called by interruptible threads; however, success is
8124  *              not guaranteed -- if the request cannot be performed
8125  *              atomically and interruptibly, an error indication is
8126  *              returned.
8127  */
8128
8129 static kern_return_t
8130 vm_map_copy_overwrite_nested(
8131         vm_map_t                dst_map,
8132         vm_map_address_t        dst_addr,
8133         vm_map_copy_t           copy,
8134         boolean_t               interruptible,
8135         pmap_t                  pmap,
8136         boolean_t               discard_on_success)
8137 {
8138         vm_map_offset_t         dst_end;
8139         vm_map_entry_t          tmp_entry;
8140         vm_map_entry_t          entry;
8141         kern_return_t           kr;
8142         boolean_t               aligned = TRUE;
8143         boolean_t               contains_permanent_objects = FALSE;
8144         boolean_t               encountered_sub_map = FALSE;
8145         vm_map_offset_t         base_addr;
8146         vm_map_size_t           copy_size;
8147         vm_map_size_t           total_size;
8148
8149
8150         /*
8151          *      Check for null copy object.
8152          */
8153
8154         if (copy == VM_MAP_COPY_NULL)
8155                 return(KERN_SUCCESS);
8156
8157         /*
8158          *      Check for special kernel buffer allocated
8159          *      by new_ipc_kmsg_copyin.
8160          */
8161
8162         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8163                 return(vm_map_copyout_kernel_buffer(
8164                                dst_map, &dst_addr,
8165                                copy, copy->size, TRUE, discard_on_success));
8166         }
8167
8168         /*
8169          *      Only works for entry lists at the moment.  Will
8170          *      support page lists later.
8171          */
8172
8173         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8174
8175         if (copy->size == 0) {
8176                 if (discard_on_success)
8177                         vm_map_copy_discard(copy);
8178                 return(KERN_SUCCESS);
8179         }
8180
8181         /*
8182          *      Verify that the destination is all writeable
8183          *      initially.  We have to trunc the destination
8184          *      address and round the copy size or we'll end up
8185          *      splitting entries in strange ways.
8186          */
8187
8188         if (!VM_MAP_PAGE_ALIGNED(copy->size,
8189                                  VM_MAP_PAGE_MASK(dst_map)) ||
8190             !VM_MAP_PAGE_ALIGNED(copy->offset,
8191                                  VM_MAP_PAGE_MASK(dst_map)) ||
8192             !VM_MAP_PAGE_ALIGNED(dst_addr,
8193                                  VM_MAP_PAGE_MASK(dst_map)))
8194         {
8195                 aligned = FALSE;
8196                 dst_end = vm_map_round_page(dst_addr + copy->size,
8197                                             VM_MAP_PAGE_MASK(dst_map));
8198         } else {
8199                 dst_end = dst_addr + copy->size;
8200         }
8201
8202         vm_map_lock(dst_map);
8203
8204         /* LP64todo - remove this check when vm_map_commpage64()
8205          * no longer has to stuff in a map_entry for the commpage
8206          * above the map's max_offset.
8207          */
8208         if (dst_addr >= dst_map->max_offset) {
8209                 vm_map_unlock(dst_map);
8210                 return(KERN_INVALID_ADDRESS);
8211         }
8212
8213 start_pass_1:
8214         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8215                 vm_map_unlock(dst_map);
8216                 return(KERN_INVALID_ADDRESS);
8217         }
8218         vm_map_clip_start(dst_map,
8219                           tmp_entry,
8220                           vm_map_trunc_page(dst_addr,
8221                                             VM_MAP_PAGE_MASK(dst_map)));
8222         for (entry = tmp_entry;;) {
8223                 vm_map_entry_t  next = entry->vme_next;
8224
8225                 while(entry->is_sub_map) {
8226                         vm_map_offset_t sub_start;
8227                         vm_map_offset_t sub_end;
8228                         vm_map_offset_t local_end;
8229
8230                         if (entry->in_transition) {
8231
8232                                 /*
8233                                  * Say that we are waiting, and wait for entry.
8234                                  */
8235                                 entry->needs_wakeup = TRUE;
8236                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8237
8238                                 goto start_pass_1;
8239                         }
8240
8241                         local_end = entry->vme_end;
8242                         if (!(entry->needs_copy)) {
8243                                 /* if needs_copy we are a COW submap */
8244                                 /* in such a case we just replace so */
8245                                 /* there is no need for the follow-  */
8246                                 /* ing check.                        */
8247                                 encountered_sub_map = TRUE;
8248                                 sub_start = VME_OFFSET(entry);
8249
8250                                 if(entry->vme_end < dst_end)
8251                                         sub_end = entry->vme_end;
8252                                 else
8253                                         sub_end = dst_end;
8254                                 sub_end -= entry->vme_start;
8255                                 sub_end += VME_OFFSET(entry);
8256                                 vm_map_unlock(dst_map);
8257
8258                                 kr = vm_map_overwrite_submap_recurse(
8259                                         VME_SUBMAP(entry),
8260                                         sub_start,
8261                                         sub_end - sub_start);
8262                                 if(kr != KERN_SUCCESS)
8263                                         return kr;
8264                                 vm_map_lock(dst_map);
8265                         }
8266
8267                         if (dst_end <= entry->vme_end)
8268                                 goto start_overwrite;
8269                         if(!vm_map_lookup_entry(dst_map, local_end,
8270                                                 &entry)) {
8271                                 vm_map_unlock(dst_map);
8272                                 return(KERN_INVALID_ADDRESS);
8273                         }
8274                         next = entry->vme_next;
8275                 }
8276
8277                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8278                         vm_map_unlock(dst_map);
8279                         return(KERN_PROTECTION_FAILURE);
8280                 }
8281
8282                 /*
8283                  *      If the entry is in transition, we must wait
8284                  *      for it to exit that state.  Anything could happen
8285                  *      when we unlock the map, so start over.
8286                  */
8287                 if (entry->in_transition) {
8288
8289                         /*
8290                          * Say that we are waiting, and wait for entry.
8291                          */
8292                         entry->needs_wakeup = TRUE;
8293                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8294
8295                         goto start_pass_1;
8296                 }
8297
8298 /*
8299  *              our range is contained completely within this map entry
8300  */
8301                 if (dst_end <= entry->vme_end)
8302                         break;
8303 /*
8304  *              check that range specified is contiguous region
8305  */
8306                 if ((next == vm_map_to_entry(dst_map)) ||
8307                     (next->vme_start != entry->vme_end)) {
8308                         vm_map_unlock(dst_map);
8309                         return(KERN_INVALID_ADDRESS);
8310                 }
8311
8312
8313                 /*
8314                  *      Check for permanent objects in the destination.
8315                  */
8316                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8317                     ((!VME_OBJECT(entry)->internal) ||
8318                      (VME_OBJECT(entry)->true_share))) {
8319                         contains_permanent_objects = TRUE;
8320                 }
8321
8322                 entry = next;
8323         }/* for */
8324
8325 start_overwrite:
8326         /*
8327          *      If there are permanent objects in the destination, then
8328          *      the copy cannot be interrupted.
8329          */
8330
8331         if (interruptible && contains_permanent_objects) {
8332                 vm_map_unlock(dst_map);
8333                 return(KERN_FAILURE);   /* XXX */
8334         }
8335
8336         /*
8337          *
8338          *      Make a second pass, overwriting the data
8339          *      At the beginning of each loop iteration,
8340          *      the next entry to be overwritten is "tmp_entry"
8341          *      (initially, the value returned from the lookup above),
8342          *      and the starting address expected in that entry
8343          *      is "start".
8344          */
8345
8346         total_size = copy->size;
8347         if(encountered_sub_map) {
8348                 copy_size = 0;
8349                 /* re-calculate tmp_entry since we've had the map */
8350                 /* unlocked */
8351                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8352                         vm_map_unlock(dst_map);
8353                         return(KERN_INVALID_ADDRESS);
8354                 }
8355         } else {
8356                 copy_size = copy->size;
8357         }
8358
8359         base_addr = dst_addr;
8360         while(TRUE) {
8361                 /* deconstruct the copy object and do in parts */
8362                 /* only in sub_map, interruptable case */
8363                 vm_map_entry_t  copy_entry;
8364                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
8365                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
8366                 int             nentries;
8367                 int             remaining_entries = 0;
8368                 vm_map_offset_t new_offset = 0;
8369
8370                 for (entry = tmp_entry; copy_size == 0;) {
8371                         vm_map_entry_t  next;
8372
8373                         next = entry->vme_next;
8374
8375                         /* tmp_entry and base address are moved along */
8376                         /* each time we encounter a sub-map.  Otherwise */
8377                         /* entry can outpase tmp_entry, and the copy_size */
8378                         /* may reflect the distance between them */
8379                         /* if the current entry is found to be in transition */
8380                         /* we will start over at the beginning or the last */
8381                         /* encounter of a submap as dictated by base_addr */
8382                         /* we will zero copy_size accordingly. */
8383                         if (entry->in_transition) {
8384                                 /*
8385                                  * Say that we are waiting, and wait for entry.
8386                                  */
8387                                 entry->needs_wakeup = TRUE;
8388                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8389
8390                                 if(!vm_map_lookup_entry(dst_map, base_addr,
8391                                                         &tmp_entry)) {
8392                                         vm_map_unlock(dst_map);
8393                                         return(KERN_INVALID_ADDRESS);
8394                                 }
8395                                 copy_size = 0;
8396                                 entry = tmp_entry;
8397                                 continue;
8398                         }
8399                         if (entry->is_sub_map) {
8400                                 vm_map_offset_t sub_start;
8401                                 vm_map_offset_t sub_end;
8402                                 vm_map_offset_t local_end;
8403
8404                                 if (entry->needs_copy) {
8405                                         /* if this is a COW submap */
8406                                         /* just back the range with a */
8407                                         /* anonymous entry */
8408                                         if(entry->vme_end < dst_end)
8409                                                 sub_end = entry->vme_end;
8410                                         else
8411                                                 sub_end = dst_end;
8412                                         if(entry->vme_start < base_addr)
8413                                                 sub_start = base_addr;
8414                                         else
8415                                                 sub_start = entry->vme_start;
8416                                         vm_map_clip_end(
8417                                                 dst_map, entry, sub_end);
8418                                         vm_map_clip_start(
8419                                                 dst_map, entry, sub_start);
8420                                         assert(!entry->use_pmap);
8421                                         assert(!entry->iokit_acct);
8422                                         entry->use_pmap = TRUE;
8423                                         entry->is_sub_map = FALSE;
8424                                         vm_map_deallocate(
8425                                                 VME_SUBMAP(entry));
8426                                         VME_OBJECT_SET(entry, NULL);
8427                                         VME_OFFSET_SET(entry, 0);
8428                                         entry->is_shared = FALSE;
8429                                         entry->needs_copy = FALSE;
8430                                         entry->protection = VM_PROT_DEFAULT;
8431                                         entry->max_protection = VM_PROT_ALL;
8432                                         entry->wired_count = 0;
8433                                         entry->user_wired_count = 0;
8434                                         if(entry->inheritance
8435                                            == VM_INHERIT_SHARE)
8436                                                 entry->inheritance = VM_INHERIT_COPY;
8437                                         continue;
8438                                 }
8439                                 /* first take care of any non-sub_map */
8440                                 /* entries to send */
8441                                 if(base_addr < entry->vme_start) {
8442                                         /* stuff to send */
8443                                         copy_size =
8444                                                 entry->vme_start - base_addr;
8445                                         break;
8446                                 }
8447                                 sub_start = VME_OFFSET(entry);
8448
8449                                 if(entry->vme_end < dst_end)
8450                                         sub_end = entry->vme_end;
8451                                 else
8452                                         sub_end = dst_end;
8453                                 sub_end -= entry->vme_start;
8454                                 sub_end += VME_OFFSET(entry);
8455                                 local_end = entry->vme_end;
8456                                 vm_map_unlock(dst_map);
8457                                 copy_size = sub_end - sub_start;
8458
8459                                 /* adjust the copy object */
8460                                 if (total_size > copy_size) {
8461                                         vm_map_size_t   local_size = 0;
8462                                         vm_map_size_t   entry_size;
8463
8464                                         nentries = 1;
8465                                         new_offset = copy->offset;
8466                                         copy_entry = vm_map_copy_first_entry(copy);
8467                                         while(copy_entry !=
8468                                               vm_map_copy_to_entry(copy)){
8469                                                 entry_size = copy_entry->vme_end -
8470                                                         copy_entry->vme_start;
8471                                                 if((local_size < copy_size) &&
8472                                                    ((local_size + entry_size)
8473                                                     >= copy_size)) {
8474                                                         vm_map_copy_clip_end(copy,
8475                                                                              copy_entry,
8476                                                                              copy_entry->vme_start +
8477                                                                              (copy_size - local_size));
8478                                                         entry_size = copy_entry->vme_end -
8479                                                                 copy_entry->vme_start;
8480                                                         local_size += entry_size;
8481                                                         new_offset += entry_size;
8482                                                 }
8483                                                 if(local_size >= copy_size) {
8484                                                         next_copy = copy_entry->vme_next;
8485                                                         copy_entry->vme_next =
8486                                                                 vm_map_copy_to_entry(copy);
8487                                                         previous_prev =
8488                                                                 copy->cpy_hdr.links.prev;
8489                                                         copy->cpy_hdr.links.prev = copy_entry;
8490                                                         copy->size = copy_size;
8491                                                         remaining_entries =
8492                                                                 copy->cpy_hdr.nentries;
8493                                                         remaining_entries -= nentries;
8494                                                         copy->cpy_hdr.nentries = nentries;
8495                                                         break;
8496                                                 } else {
8497                                                         local_size += entry_size;
8498                                                         new_offset += entry_size;
8499                                                         nentries++;
8500                                                 }
8501                                                 copy_entry = copy_entry->vme_next;
8502                                         }
8503                                 }
8504
8505                                 if((entry->use_pmap) && (pmap == NULL)) {
8506                                         kr = vm_map_copy_overwrite_nested(
8507                                                 VME_SUBMAP(entry),
8508                                                 sub_start,
8509                                                 copy,
8510                                                 interruptible,
8511                                                 VME_SUBMAP(entry)->pmap,
8512                                                 TRUE);
8513                                 } else if (pmap != NULL) {
8514                                         kr = vm_map_copy_overwrite_nested(
8515                                                 VME_SUBMAP(entry),
8516                                                 sub_start,
8517                                                 copy,
8518                                                 interruptible, pmap,
8519                                                 TRUE);
8520                                 } else {
8521                                         kr = vm_map_copy_overwrite_nested(
8522                                                 VME_SUBMAP(entry),
8523                                                 sub_start,
8524                                                 copy,
8525                                                 interruptible,
8526                                                 dst_map->pmap,
8527                                                 TRUE);
8528                                 }
8529                                 if(kr != KERN_SUCCESS) {
8530                                         if(next_copy != NULL) {
8531                                                 copy->cpy_hdr.nentries +=
8532                                                         remaining_entries;
8533                                                 copy->cpy_hdr.links.prev->vme_next =
8534                                                         next_copy;
8535                                                 copy->cpy_hdr.links.prev
8536                                                         = previous_prev;
8537                                                 copy->size = total_size;
8538                                         }
8539                                         return kr;
8540                                 }
8541                                 if (dst_end <= local_end) {
8542                                         return(KERN_SUCCESS);
8543                                 }
8544                                 /* otherwise copy no longer exists, it was */
8545                                 /* destroyed after successful copy_overwrite */
8546                                 copy = (vm_map_copy_t)
8547                                         zalloc(vm_map_copy_zone);
8548                                 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8549                                 vm_map_copy_first_entry(copy) =
8550                                         vm_map_copy_last_entry(copy) =
8551                                         vm_map_copy_to_entry(copy);
8552                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
8553                                 copy->offset = new_offset;
8554
8555                                 /*
8556                                  * XXX FBDP
8557                                  * this does not seem to deal with
8558                                  * the VM map store (R&B tree)
8559                                  */
8560
8561                                 total_size -= copy_size;
8562                                 copy_size = 0;
8563                                 /* put back remainder of copy in container */
8564                                 if(next_copy != NULL) {
8565                                         copy->cpy_hdr.nentries = remaining_entries;
8566                                         copy->cpy_hdr.links.next = next_copy;
8567                                         copy->cpy_hdr.links.prev = previous_prev;
8568                                         copy->size = total_size;
8569                                         next_copy->vme_prev =
8570                                                 vm_map_copy_to_entry(copy);
8571                                         next_copy = NULL;
8572                                 }
8573                                 base_addr = local_end;
8574                                 vm_map_lock(dst_map);
8575                                 if(!vm_map_lookup_entry(dst_map,
8576                                                         local_end, &tmp_entry)) {
8577                                         vm_map_unlock(dst_map);
8578                                         return(KERN_INVALID_ADDRESS);
8579                                 }
8580                                 entry = tmp_entry;
8581                                 continue;
8582                         }
8583                         if (dst_end <= entry->vme_end) {
8584                                 copy_size = dst_end - base_addr;
8585                                 break;
8586                         }
8587
8588                         if ((next == vm_map_to_entry(dst_map)) ||
8589                             (next->vme_start != entry->vme_end)) {
8590                                 vm_map_unlock(dst_map);
8591                                 return(KERN_INVALID_ADDRESS);
8592                         }
8593
8594                         entry = next;
8595                 }/* for */
8596
8597                 next_copy = NULL;
8598                 nentries = 1;
8599
8600                 /* adjust the copy object */
8601                 if (total_size > copy_size) {
8602                         vm_map_size_t   local_size = 0;
8603                         vm_map_size_t   entry_size;
8604
8605                         new_offset = copy->offset;
8606                         copy_entry = vm_map_copy_first_entry(copy);
8607                         while(copy_entry != vm_map_copy_to_entry(copy)) {
8608                                 entry_size = copy_entry->vme_end -
8609                                         copy_entry->vme_start;
8610                                 if((local_size < copy_size) &&
8611                                    ((local_size + entry_size)
8612                                     >= copy_size)) {
8613                                         vm_map_copy_clip_end(copy, copy_entry,
8614                                                              copy_entry->vme_start +
8615                                                              (copy_size - local_size));
8616                                         entry_size = copy_entry->vme_end -
8617                                                 copy_entry->vme_start;
8618                                         local_size += entry_size;
8619                                         new_offset += entry_size;
8620                                 }
8621                                 if(local_size >= copy_size) {
8622                                         next_copy = copy_entry->vme_next;
8623                                         copy_entry->vme_next =
8624                                                 vm_map_copy_to_entry(copy);
8625                                         previous_prev =
8626                                                 copy->cpy_hdr.links.prev;
8627                                         copy->cpy_hdr.links.prev = copy_entry;
8628                                         copy->size = copy_size;
8629                                         remaining_entries =
8630                                                 copy->cpy_hdr.nentries;
8631                                         remaining_entries -= nentries;
8632                                         copy->cpy_hdr.nentries = nentries;
8633                                         break;
8634                                 } else {
8635                                         local_size += entry_size;
8636                                         new_offset += entry_size;
8637                                         nentries++;
8638                                 }
8639                                 copy_entry = copy_entry->vme_next;
8640                         }
8641                 }
8642
8643                 if (aligned) {
8644                         pmap_t  local_pmap;
8645
8646                         if(pmap)
8647                                 local_pmap = pmap;
8648                         else
8649                                 local_pmap = dst_map->pmap;
8650
8651                         if ((kr =  vm_map_copy_overwrite_aligned(
8652                                      dst_map, tmp_entry, copy,
8653                                      base_addr, local_pmap)) != KERN_SUCCESS) {
8654                                 if(next_copy != NULL) {
8655                                         copy->cpy_hdr.nentries +=
8656                                                 remaining_entries;
8657                                         copy->cpy_hdr.links.prev->vme_next =
8658                                                 next_copy;
8659                                         copy->cpy_hdr.links.prev =
8660                                                 previous_prev;
8661                                         copy->size += copy_size;
8662                                 }
8663                                 return kr;
8664                         }
8665                         vm_map_unlock(dst_map);
8666                 } else {
8667                         /*
8668                          * Performance gain:
8669                          *
8670                          * if the copy and dst address are misaligned but the same
8671                          * offset within the page we can copy_not_aligned the
8672                          * misaligned parts and copy aligned the rest.  If they are
8673                          * aligned but len is unaligned we simply need to copy
8674                          * the end bit unaligned.  We'll need to split the misaligned
8675                          * bits of the region in this case !
8676                          */
8677                         /* ALWAYS UNLOCKS THE dst_map MAP */
8678                         kr = vm_map_copy_overwrite_unaligned(
8679                                 dst_map,
8680                                 tmp_entry,
8681                                 copy,
8682                                 base_addr,
8683                                 discard_on_success);
8684                         if (kr != KERN_SUCCESS) {
8685                                 if(next_copy != NULL) {
8686                                         copy->cpy_hdr.nentries +=
8687                                                 remaining_entries;
8688                                         copy->cpy_hdr.links.prev->vme_next =
8689                                                 next_copy;
8690                                         copy->cpy_hdr.links.prev =
8691                                                 previous_prev;
8692                                         copy->size += copy_size;
8693                                 }
8694                                 return kr;
8695                         }
8696                 }
8697                 total_size -= copy_size;
8698                 if(total_size == 0)
8699                         break;
8700                 base_addr += copy_size;
8701                 copy_size = 0;
8702                 copy->offset = new_offset;
8703                 if(next_copy != NULL) {
8704                         copy->cpy_hdr.nentries = remaining_entries;
8705                         copy->cpy_hdr.links.next = next_copy;
8706                         copy->cpy_hdr.links.prev = previous_prev;
8707                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
8708                         copy->size = total_size;
8709                 }
8710                 vm_map_lock(dst_map);
8711                 while(TRUE) {
8712                         if (!vm_map_lookup_entry(dst_map,
8713                                                  base_addr, &tmp_entry)) {
8714                                 vm_map_unlock(dst_map);
8715                                 return(KERN_INVALID_ADDRESS);
8716                         }
8717                         if (tmp_entry->in_transition) {
8718                                 entry->needs_wakeup = TRUE;
8719                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8720                         } else {
8721                                 break;
8722                         }
8723                 }
8724                 vm_map_clip_start(dst_map,
8725                                   tmp_entry,
8726                                   vm_map_trunc_page(base_addr,
8727                                                     VM_MAP_PAGE_MASK(dst_map)));
8728
8729                 entry = tmp_entry;
8730         } /* while */
8731
8732         /*
8733          *      Throw away the vm_map_copy object
8734          */
8735         if (discard_on_success)
8736                 vm_map_copy_discard(copy);
8737
8738         return(KERN_SUCCESS);
8739 }/* vm_map_copy_overwrite */
8740
8741 kern_return_t
8742 vm_map_copy_overwrite(
8743         vm_map_t        dst_map,
8744         vm_map_offset_t dst_addr,
8745         vm_map_copy_t   copy,
8746         boolean_t       interruptible)
8747 {
8748         vm_map_size_t   head_size, tail_size;
8749         vm_map_copy_t   head_copy, tail_copy;
8750         vm_map_offset_t head_addr, tail_addr;
8751         vm_map_entry_t  entry;
8752         kern_return_t   kr;
8753         vm_map_offset_t effective_page_mask, effective_page_size;
8754
8755         head_size = 0;
8756         tail_size = 0;
8757         head_copy = NULL;
8758         tail_copy = NULL;
8759         head_addr = 0;
8760         tail_addr = 0;
8761
8762         if (interruptible ||
8763             copy == VM_MAP_COPY_NULL ||
8764             copy->type != VM_MAP_COPY_ENTRY_LIST) {
8765                 /*
8766                  * We can't split the "copy" map if we're interruptible
8767                  * or if we don't have a "copy" map...
8768                  */
8769         blunt_copy:
8770                 return vm_map_copy_overwrite_nested(dst_map,
8771                                                     dst_addr,
8772                                                     copy,
8773                                                     interruptible,
8774                                                     (pmap_t) NULL,
8775                                                     TRUE);
8776         }
8777
8778         effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
8779         effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
8780                                   effective_page_mask);
8781         effective_page_size = effective_page_mask + 1;
8782
8783         if (copy->size < 3 * effective_page_size) {
8784                 /*
8785                  * Too small to bother with optimizing...
8786                  */
8787                 goto blunt_copy;
8788         }
8789
8790         if ((dst_addr & effective_page_mask) !=
8791             (copy->offset & effective_page_mask)) {
8792                 /*
8793                  * Incompatible mis-alignment of source and destination...
8794                  */
8795                 goto blunt_copy;
8796         }
8797
8798         /*
8799          * Proper alignment or identical mis-alignment at the beginning.
8800          * Let's try and do a small unaligned copy first (if needed)
8801          * and then an aligned copy for the rest.
8802          */
8803         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
8804                 head_addr = dst_addr;
8805                 head_size = (effective_page_size -
8806                              (copy->offset & effective_page_mask));
8807                 head_size = MIN(head_size, copy->size);
8808         }
8809         if (!vm_map_page_aligned(copy->offset + copy->size,
8810                                   effective_page_mask)) {
8811                 /*
8812                  * Mis-alignment at the end.
8813                  * Do an aligned copy up to the last page and
8814                  * then an unaligned copy for the remaining bytes.
8815                  */
8816                 tail_size = ((copy->offset + copy->size) &
8817                              effective_page_mask);
8818                 tail_size = MIN(tail_size, copy->size);
8819                 tail_addr = dst_addr + copy->size - tail_size;
8820                 assert(tail_addr >= head_addr + head_size);
8821         }
8822         assert(head_size + tail_size <= copy->size);
8823
8824         if (head_size + tail_size == copy->size) {
8825                 /*
8826                  * It's all unaligned, no optimization possible...
8827                  */
8828                 goto blunt_copy;
8829         }
8830
8831         /*
8832          * Can't optimize if there are any submaps in the
8833          * destination due to the way we free the "copy" map
8834          * progressively in vm_map_copy_overwrite_nested()
8835          * in that case.
8836          */
8837         vm_map_lock_read(dst_map);
8838         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
8839                 vm_map_unlock_read(dst_map);
8840                 goto blunt_copy;
8841         }
8842         for (;
8843              (entry != vm_map_copy_to_entry(copy) &&
8844               entry->vme_start < dst_addr + copy->size);
8845              entry = entry->vme_next) {
8846                 if (entry->is_sub_map) {
8847                         vm_map_unlock_read(dst_map);
8848                         goto blunt_copy;
8849                 }
8850         }
8851         vm_map_unlock_read(dst_map);
8852
8853         if (head_size) {
8854                 /*
8855                  * Unaligned copy of the first "head_size" bytes, to reach
8856                  * a page boundary.
8857                  */
8858
8859                 /*
8860                  * Extract "head_copy" out of "copy".
8861                  */
8862                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8863                 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8864                 vm_map_copy_first_entry(head_copy) =
8865                         vm_map_copy_to_entry(head_copy);
8866                 vm_map_copy_last_entry(head_copy) =
8867                         vm_map_copy_to_entry(head_copy);
8868                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
8869                 head_copy->cpy_hdr.nentries = 0;
8870                 head_copy->cpy_hdr.entries_pageable =
8871                         copy->cpy_hdr.entries_pageable;
8872                 vm_map_store_init(&head_copy->cpy_hdr);
8873
8874                 entry = vm_map_copy_first_entry(copy);
8875                 if (entry->vme_end < copy->offset + head_size) {
8876                         head_size = entry->vme_end - copy->offset;
8877                 }
8878
8879                 head_copy->offset = copy->offset;
8880                 head_copy->size = head_size;
8881                 copy->offset += head_size;
8882                 copy->size -= head_size;
8883
8884                 vm_map_copy_clip_end(copy, entry, copy->offset);
8885                 vm_map_copy_entry_unlink(copy, entry);
8886                 vm_map_copy_entry_link(head_copy,
8887                                        vm_map_copy_to_entry(head_copy),
8888                                        entry);
8889
8890                 /*
8891                  * Do the unaligned copy.
8892                  */
8893                 kr = vm_map_copy_overwrite_nested(dst_map,
8894                                                   head_addr,
8895                                                   head_copy,
8896                                                   interruptible,
8897                                                   (pmap_t) NULL,
8898                                                   FALSE);
8899                 if (kr != KERN_SUCCESS)
8900                         goto done;
8901         }
8902
8903         if (tail_size) {
8904                 /*
8905                  * Extract "tail_copy" out of "copy".
8906                  */
8907                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8908                 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8909                 vm_map_copy_first_entry(tail_copy) =
8910                         vm_map_copy_to_entry(tail_copy);
8911                 vm_map_copy_last_entry(tail_copy) =
8912                         vm_map_copy_to_entry(tail_copy);
8913                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
8914                 tail_copy->cpy_hdr.nentries = 0;
8915                 tail_copy->cpy_hdr.entries_pageable =
8916                         copy->cpy_hdr.entries_pageable;
8917                 vm_map_store_init(&tail_copy->cpy_hdr);
8918
8919                 tail_copy->offset = copy->offset + copy->size - tail_size;
8920                 tail_copy->size = tail_size;
8921
8922                 copy->size -= tail_size;
8923
8924                 entry = vm_map_copy_last_entry(copy);
8925                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
8926                 entry = vm_map_copy_last_entry(copy);
8927                 vm_map_copy_entry_unlink(copy, entry);
8928                 vm_map_copy_entry_link(tail_copy,
8929                                        vm_map_copy_last_entry(tail_copy),
8930                                        entry);
8931         }
8932
8933         /*
8934          * Copy most (or possibly all) of the data.
8935          */
8936         kr = vm_map_copy_overwrite_nested(dst_map,
8937                                           dst_addr + head_size,
8938                                           copy,
8939                                           interruptible,
8940                                           (pmap_t) NULL,
8941                                           FALSE);
8942         if (kr != KERN_SUCCESS) {
8943                 goto done;
8944         }
8945
8946         if (tail_size) {
8947                 kr = vm_map_copy_overwrite_nested(dst_map,
8948                                                   tail_addr,
8949                                                   tail_copy,
8950                                                   interruptible,
8951                                                   (pmap_t) NULL,
8952                                                   FALSE);
8953         }
8954
8955 done:
8956         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8957         if (kr == KERN_SUCCESS) {
8958                 /*
8959                  * Discard all the copy maps.
8960                  */
8961                 if (head_copy) {
8962                         vm_map_copy_discard(head_copy);
8963                         head_copy = NULL;
8964                 }
8965                 vm_map_copy_discard(copy);
8966                 if (tail_copy) {
8967                         vm_map_copy_discard(tail_copy);
8968                         tail_copy = NULL;
8969                 }
8970         } else {
8971                 /*
8972                  * Re-assemble the original copy map.
8973                  */
8974                 if (head_copy) {
8975                         entry = vm_map_copy_first_entry(head_copy);
8976                         vm_map_copy_entry_unlink(head_copy, entry);
8977                         vm_map_copy_entry_link(copy,
8978                                                vm_map_copy_to_entry(copy),
8979                                                entry);
8980                         copy->offset -= head_size;
8981                         copy->size += head_size;
8982                         vm_map_copy_discard(head_copy);
8983                         head_copy = NULL;
8984                 }
8985                 if (tail_copy) {
8986                         entry = vm_map_copy_last_entry(tail_copy);
8987                         vm_map_copy_entry_unlink(tail_copy, entry);
8988                         vm_map_copy_entry_link(copy,
8989                                                vm_map_copy_last_entry(copy),
8990                                                entry);
8991                         copy->size += tail_size;
8992                         vm_map_copy_discard(tail_copy);
8993                         tail_copy = NULL;
8994                 }
8995         }
8996         return kr;
8997 }
8998
8999
9000 /*
9001  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
9002  *
9003  *      Decription:
9004  *      Physically copy unaligned data
9005  *
9006  *      Implementation:
9007  *      Unaligned parts of pages have to be physically copied.  We use
9008  *      a modified form of vm_fault_copy (which understands none-aligned
9009  *      page offsets and sizes) to do the copy.  We attempt to copy as
9010  *      much memory in one go as possibly, however vm_fault_copy copies
9011  *      within 1 memory object so we have to find the smaller of "amount left"
9012  *      "source object data size" and "target object data size".  With
9013  *      unaligned data we don't need to split regions, therefore the source
9014  *      (copy) object should be one map entry, the target range may be split
9015  *      over multiple map entries however.  In any event we are pessimistic
9016  *      about these assumptions.
9017  *
9018  *      Assumptions:
9019  *      dst_map is locked on entry and is return locked on success,
9020  *      unlocked on error.
9021  */
9022
9023 static kern_return_t
9024 vm_map_copy_overwrite_unaligned(
9025         vm_map_t        dst_map,
9026         vm_map_entry_t  entry,
9027         vm_map_copy_t   copy,
9028         vm_map_offset_t start,
9029         boolean_t       discard_on_success)
9030 {
9031         vm_map_entry_t          copy_entry;
9032         vm_map_entry_t          copy_entry_next;
9033         vm_map_version_t        version;
9034         vm_object_t             dst_object;
9035         vm_object_offset_t      dst_offset;
9036         vm_object_offset_t      src_offset;
9037         vm_object_offset_t      entry_offset;
9038         vm_map_offset_t         entry_end;
9039         vm_map_size_t           src_size,
9040                                 dst_size,
9041                                 copy_size,
9042                                 amount_left;
9043         kern_return_t           kr = KERN_SUCCESS;
9044
9045
9046         copy_entry = vm_map_copy_first_entry(copy);
9047
9048         vm_map_lock_write_to_read(dst_map);
9049
9050         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9051         amount_left = copy->size;
9052 /*
9053  *      unaligned so we never clipped this entry, we need the offset into
9054  *      the vm_object not just the data.
9055  */
9056         while (amount_left > 0) {
9057
9058                 if (entry == vm_map_to_entry(dst_map)) {
9059                         vm_map_unlock_read(dst_map);
9060                         return KERN_INVALID_ADDRESS;
9061                 }
9062
9063                 /* "start" must be within the current map entry */
9064                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
9065
9066                 dst_offset = start - entry->vme_start;
9067
9068                 dst_size = entry->vme_end - start;
9069
9070                 src_size = copy_entry->vme_end -
9071                         (copy_entry->vme_start + src_offset);
9072
9073                 if (dst_size < src_size) {
9074 /*
9075  *                      we can only copy dst_size bytes before
9076  *                      we have to get the next destination entry
9077  */
9078                         copy_size = dst_size;
9079                 } else {
9080 /*
9081  *                      we can only copy src_size bytes before
9082  *                      we have to get the next source copy entry
9083  */
9084                         copy_size = src_size;
9085                 }
9086
9087                 if (copy_size > amount_left) {
9088                         copy_size = amount_left;
9089                 }
9090 /*
9091  *              Entry needs copy, create a shadow shadow object for
9092  *              Copy on write region.
9093  */
9094                 if (entry->needs_copy &&
9095                     ((entry->protection & VM_PROT_WRITE) != 0))
9096                 {
9097                         if (vm_map_lock_read_to_write(dst_map)) {
9098                                 vm_map_lock_read(dst_map);
9099                                 goto RetryLookup;
9100                         }
9101                         VME_OBJECT_SHADOW(entry,
9102                                           (vm_map_size_t)(entry->vme_end
9103                                                           - entry->vme_start));
9104                         entry->needs_copy = FALSE;
9105                         vm_map_lock_write_to_read(dst_map);
9106                 }
9107                 dst_object = VME_OBJECT(entry);
9108 /*
9109  *              unlike with the virtual (aligned) copy we're going
9110  *              to fault on it therefore we need a target object.
9111  */
9112                 if (dst_object == VM_OBJECT_NULL) {
9113                         if (vm_map_lock_read_to_write(dst_map)) {
9114                                 vm_map_lock_read(dst_map);
9115                                 goto RetryLookup;
9116                         }
9117                         dst_object = vm_object_allocate((vm_map_size_t)
9118                                                         entry->vme_end - entry->vme_start);
9119                         VME_OBJECT(entry) = dst_object;
9120                         VME_OFFSET_SET(entry, 0);
9121                         assert(entry->use_pmap);
9122                         vm_map_lock_write_to_read(dst_map);
9123                 }
9124 /*
9125  *              Take an object reference and unlock map. The "entry" may
9126  *              disappear or change when the map is unlocked.
9127  */
9128                 vm_object_reference(dst_object);
9129                 version.main_timestamp = dst_map->timestamp;
9130                 entry_offset = VME_OFFSET(entry);
9131                 entry_end = entry->vme_end;
9132                 vm_map_unlock_read(dst_map);
9133 /*
9134  *              Copy as much as possible in one pass
9135  */
9136                 kr = vm_fault_copy(
9137                         VME_OBJECT(copy_entry),
9138                         VME_OFFSET(copy_entry) + src_offset,
9139                         &copy_size,
9140                         dst_object,
9141                         entry_offset + dst_offset,
9142                         dst_map,
9143                         &version,
9144                         THREAD_UNINT );
9145
9146                 start += copy_size;
9147                 src_offset += copy_size;
9148                 amount_left -= copy_size;
9149 /*
9150  *              Release the object reference
9151  */
9152                 vm_object_deallocate(dst_object);
9153 /*
9154  *              If a hard error occurred, return it now
9155  */
9156                 if (kr != KERN_SUCCESS)
9157                         return kr;
9158
9159                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9160                     || amount_left == 0)
9161                 {
9162 /*
9163  *                      all done with this copy entry, dispose.
9164  */
9165                         copy_entry_next = copy_entry->vme_next;
9166
9167                         if (discard_on_success) {
9168                                 vm_map_copy_entry_unlink(copy, copy_entry);
9169                                 assert(!copy_entry->is_sub_map);
9170                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9171                                 vm_map_copy_entry_dispose(copy, copy_entry);
9172                         }
9173
9174                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9175                             amount_left) {
9176 /*
9177  *                              not finished copying but run out of source
9178  */
9179                                 return KERN_INVALID_ADDRESS;
9180                         }
9181
9182                         copy_entry = copy_entry_next;
9183
9184                         src_offset = 0;
9185                 }
9186
9187                 if (amount_left == 0)
9188                         return KERN_SUCCESS;
9189
9190                 vm_map_lock_read(dst_map);
9191                 if (version.main_timestamp == dst_map->timestamp) {
9192                         if (start == entry_end) {
9193 /*
9194  *                              destination region is split.  Use the version
9195  *                              information to avoid a lookup in the normal
9196  *                              case.
9197  */
9198                                 entry = entry->vme_next;
9199 /*
9200  *                              should be contiguous. Fail if we encounter
9201  *                              a hole in the destination.
9202  */
9203                                 if (start != entry->vme_start) {
9204                                         vm_map_unlock_read(dst_map);
9205                                         return KERN_INVALID_ADDRESS ;
9206                                 }
9207                         }
9208                 } else {
9209 /*
9210  *                      Map version check failed.
9211  *                      we must lookup the entry because somebody
9212  *                      might have changed the map behind our backs.
9213  */
9214                 RetryLookup:
9215                         if (!vm_map_lookup_entry(dst_map, start, &entry))
9216                         {
9217                                 vm_map_unlock_read(dst_map);
9218                                 return KERN_INVALID_ADDRESS ;
9219                         }
9220                 }
9221         }/* while */
9222
9223         return KERN_SUCCESS;
9224 }/* vm_map_copy_overwrite_unaligned */
9225
9226 /*
9227  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
9228  *
9229  *      Description:
9230  *      Does all the vm_trickery possible for whole pages.
9231  *
9232  *      Implementation:
9233  *
9234  *      If there are no permanent objects in the destination,
9235  *      and the source and destination map entry zones match,
9236  *      and the destination map entry is not shared,
9237  *      then the map entries can be deleted and replaced
9238  *      with those from the copy.  The following code is the
9239  *      basic idea of what to do, but there are lots of annoying
9240  *      little details about getting protection and inheritance
9241  *      right.  Should add protection, inheritance, and sharing checks
9242  *      to the above pass and make sure that no wiring is involved.
9243  */
9244
9245 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9246 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9247 int vm_map_copy_overwrite_aligned_src_large = 0;
9248
9249 static kern_return_t
9250 vm_map_copy_overwrite_aligned(
9251         vm_map_t        dst_map,
9252         vm_map_entry_t  tmp_entry,
9253         vm_map_copy_t   copy,
9254         vm_map_offset_t start,
9255         __unused pmap_t pmap)
9256 {
9257         vm_object_t     object;
9258         vm_map_entry_t  copy_entry;
9259         vm_map_size_t   copy_size;
9260         vm_map_size_t   size;
9261         vm_map_entry_t  entry;
9262
9263         while ((copy_entry = vm_map_copy_first_entry(copy))
9264                != vm_map_copy_to_entry(copy))
9265         {
9266                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9267
9268                 entry = tmp_entry;
9269                 if (entry->is_sub_map) {
9270                         /* unnested when clipped earlier */
9271                         assert(!entry->use_pmap);
9272                 }
9273                 if (entry == vm_map_to_entry(dst_map)) {
9274                         vm_map_unlock(dst_map);
9275                         return KERN_INVALID_ADDRESS;
9276                 }
9277                 size = (entry->vme_end - entry->vme_start);
9278                 /*
9279                  *      Make sure that no holes popped up in the
9280                  *      address map, and that the protection is
9281                  *      still valid, in case the map was unlocked
9282                  *      earlier.
9283                  */
9284
9285                 if ((entry->vme_start != start) || ((entry->is_sub_map)
9286                                                     && !entry->needs_copy)) {
9287                         vm_map_unlock(dst_map);
9288                         return(KERN_INVALID_ADDRESS);
9289                 }
9290                 assert(entry != vm_map_to_entry(dst_map));
9291
9292                 /*
9293                  *      Check protection again
9294                  */
9295
9296                 if ( ! (entry->protection & VM_PROT_WRITE)) {
9297                         vm_map_unlock(dst_map);
9298                         return(KERN_PROTECTION_FAILURE);
9299                 }
9300
9301                 /*
9302                  *      Adjust to source size first
9303                  */
9304
9305                 if (copy_size < size) {
9306                         if (entry->map_aligned &&
9307                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9308                                                  VM_MAP_PAGE_MASK(dst_map))) {
9309                                 /* no longer map-aligned */
9310                                 entry->map_aligned = FALSE;
9311                         }
9312                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9313                         size = copy_size;
9314                 }
9315
9316                 /*
9317                  *      Adjust to destination size
9318                  */
9319
9320                 if (size < copy_size) {
9321                         vm_map_copy_clip_end(copy, copy_entry,
9322                                              copy_entry->vme_start + size);
9323                         copy_size = size;
9324                 }
9325
9326                 assert((entry->vme_end - entry->vme_start) == size);
9327                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9328                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9329
9330                 /*
9331                  *      If the destination contains temporary unshared memory,
9332                  *      we can perform the copy by throwing it away and
9333                  *      installing the source data.
9334                  */
9335
9336                 object = VME_OBJECT(entry);
9337                 if ((!entry->is_shared &&
9338                      ((object == VM_OBJECT_NULL) ||
9339                       (object->internal && !object->true_share))) ||
9340                     entry->needs_copy) {
9341                         vm_object_t     old_object = VME_OBJECT(entry);
9342                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
9343                         vm_object_offset_t      offset;
9344
9345                         /*
9346                          * Ensure that the source and destination aren't
9347                          * identical
9348                          */
9349                         if (old_object == VME_OBJECT(copy_entry) &&
9350                             old_offset == VME_OFFSET(copy_entry)) {
9351                                 vm_map_copy_entry_unlink(copy, copy_entry);
9352                                 vm_map_copy_entry_dispose(copy, copy_entry);
9353
9354                                 if (old_object != VM_OBJECT_NULL)
9355                                         vm_object_deallocate(old_object);
9356
9357                                 start = tmp_entry->vme_end;
9358                                 tmp_entry = tmp_entry->vme_next;
9359                                 continue;
9360                         }
9361
9362 #if !CONFIG_EMBEDDED
9363 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9364 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
9365                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9366                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9367                             copy_size <= __TRADEOFF1_COPY_SIZE) {
9368                                 /*
9369                                  * Virtual vs. Physical copy tradeoff #1.
9370                                  *
9371                                  * Copying only a few pages out of a large
9372                                  * object:  do a physical copy instead of
9373                                  * a virtual copy, to avoid possibly keeping
9374                                  * the entire large object alive because of
9375                                  * those few copy-on-write pages.
9376                                  */
9377                                 vm_map_copy_overwrite_aligned_src_large++;
9378                                 goto slow_copy;
9379                         }
9380 #endif /* !CONFIG_EMBEDDED */
9381
9382                         if ((dst_map->pmap != kernel_pmap) &&
9383                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9384                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
9385                                 vm_object_t new_object, new_shadow;
9386
9387                                 /*
9388                                  * We're about to map something over a mapping
9389                                  * established by malloc()...
9390                                  */
9391                                 new_object = VME_OBJECT(copy_entry);
9392                                 if (new_object != VM_OBJECT_NULL) {
9393                                         vm_object_lock_shared(new_object);
9394                                 }
9395                                 while (new_object != VM_OBJECT_NULL &&
9396 #if !CONFIG_EMBEDDED
9397                                        !new_object->true_share &&
9398                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9399 #endif /* !CONFIG_EMBEDDED */
9400                                        new_object->internal) {
9401                                         new_shadow = new_object->shadow;
9402                                         if (new_shadow == VM_OBJECT_NULL) {
9403                                                 break;
9404                                         }
9405                                         vm_object_lock_shared(new_shadow);
9406                                         vm_object_unlock(new_object);
9407                                         new_object = new_shadow;
9408                                 }
9409                                 if (new_object != VM_OBJECT_NULL) {
9410                                         if (!new_object->internal) {
9411                                                 /*
9412                                                  * The new mapping is backed
9413                                                  * by an external object.  We
9414                                                  * don't want malloc'ed memory
9415                                                  * to be replaced with such a
9416                                                  * non-anonymous mapping, so
9417                                                  * let's go off the optimized
9418                                                  * path...
9419                                                  */
9420                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
9421                                                 vm_object_unlock(new_object);
9422                                                 goto slow_copy;
9423                                         }
9424 #if !CONFIG_EMBEDDED
9425                                         if (new_object->true_share ||
9426                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9427                                                 /*
9428                                                  * Same if there's a "true_share"
9429                                                  * object in the shadow chain, or
9430                                                  * an object with a non-default
9431                                                  * (SYMMETRIC) copy strategy.
9432                                                  */
9433                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9434                                                 vm_object_unlock(new_object);
9435                                                 goto slow_copy;
9436                                         }
9437 #endif /* !CONFIG_EMBEDDED */
9438                                         vm_object_unlock(new_object);
9439                                 }
9440                                 /*
9441                                  * The new mapping is still backed by
9442                                  * anonymous (internal) memory, so it's
9443                                  * OK to substitute it for the original
9444                                  * malloc() mapping.
9445                                  */
9446                         }
9447
9448                         if (old_object != VM_OBJECT_NULL) {
9449                                 if(entry->is_sub_map) {
9450                                         if(entry->use_pmap) {
9451 #ifndef NO_NESTED_PMAP
9452                                                 pmap_unnest(dst_map->pmap,
9453                                                             (addr64_t)entry->vme_start,
9454                                                             entry->vme_end - entry->vme_start);
9455 #endif  /* NO_NESTED_PMAP */
9456                                                 if(dst_map->mapped_in_other_pmaps) {
9457                                                         /* clean up parent */
9458                                                         /* map/maps */
9459                                                         vm_map_submap_pmap_clean(
9460                                                                 dst_map, entry->vme_start,
9461                                                                 entry->vme_end,
9462                                                                 VME_SUBMAP(entry),
9463                                                                 VME_OFFSET(entry));
9464                                                 }
9465                                         } else {
9466                                                 vm_map_submap_pmap_clean(
9467                                                         dst_map, entry->vme_start,
9468                                                         entry->vme_end,
9469                                                         VME_SUBMAP(entry),
9470                                                         VME_OFFSET(entry));
9471                                         }
9472                                         vm_map_deallocate(VME_SUBMAP(entry));
9473                                 } else {
9474                                         if(dst_map->mapped_in_other_pmaps) {
9475                                                 vm_object_pmap_protect_options(
9476                                                         VME_OBJECT(entry),
9477                                                         VME_OFFSET(entry),
9478                                                         entry->vme_end
9479                                                         - entry->vme_start,
9480                                                         PMAP_NULL,
9481                                                         entry->vme_start,
9482                                                         VM_PROT_NONE,
9483                                                         PMAP_OPTIONS_REMOVE);
9484                                         } else {
9485                                                 pmap_remove_options(
9486                                                         dst_map->pmap,
9487                                                         (addr64_t)(entry->vme_start),
9488                                                         (addr64_t)(entry->vme_end),
9489                                                         PMAP_OPTIONS_REMOVE);
9490                                         }
9491                                         vm_object_deallocate(old_object);
9492                                 }
9493                         }
9494
9495                         if (entry->iokit_acct) {
9496                                 /* keep using iokit accounting */
9497                                 entry->use_pmap = FALSE;
9498                         } else {
9499                                 /* use pmap accounting */
9500                                 entry->use_pmap = TRUE;
9501                         }
9502                         entry->is_sub_map = FALSE;
9503                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9504                         object = VME_OBJECT(entry);
9505                         entry->needs_copy = copy_entry->needs_copy;
9506                         entry->wired_count = 0;
9507                         entry->user_wired_count = 0;
9508                         offset = VME_OFFSET(copy_entry);
9509                         VME_OFFSET_SET(entry, offset);
9510
9511                         vm_map_copy_entry_unlink(copy, copy_entry);
9512                         vm_map_copy_entry_dispose(copy, copy_entry);
9513
9514                         /*
9515                          * we could try to push pages into the pmap at this point, BUT
9516                          * this optimization only saved on average 2 us per page if ALL
9517                          * the pages in the source were currently mapped
9518                          * and ALL the pages in the dest were touched, if there were fewer
9519                          * than 2/3 of the pages touched, this optimization actually cost more cycles
9520                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
9521                          */
9522
9523                         /*
9524                          *      Set up for the next iteration.  The map
9525                          *      has not been unlocked, so the next
9526                          *      address should be at the end of this
9527                          *      entry, and the next map entry should be
9528                          *      the one following it.
9529                          */
9530
9531                         start = tmp_entry->vme_end;
9532                         tmp_entry = tmp_entry->vme_next;
9533                 } else {
9534                         vm_map_version_t        version;
9535                         vm_object_t             dst_object;
9536                         vm_object_offset_t      dst_offset;
9537                         kern_return_t           r;
9538
9539                 slow_copy:
9540                         if (entry->needs_copy) {
9541                                 VME_OBJECT_SHADOW(entry,
9542                                                   (entry->vme_end -
9543                                                    entry->vme_start));
9544                                 entry->needs_copy = FALSE;
9545                         }
9546
9547                         dst_object = VME_OBJECT(entry);
9548                         dst_offset = VME_OFFSET(entry);
9549
9550                         /*
9551                          *      Take an object reference, and record
9552                          *      the map version information so that the
9553                          *      map can be safely unlocked.
9554                          */
9555
9556                         if (dst_object == VM_OBJECT_NULL) {
9557                                 /*
9558                                  * We would usually have just taken the
9559                                  * optimized path above if the destination
9560                                  * object has not been allocated yet.  But we
9561                                  * now disable that optimization if the copy
9562                                  * entry's object is not backed by anonymous
9563                                  * memory to avoid replacing malloc'ed
9564                                  * (i.e. re-usable) anonymous memory with a
9565                                  * not-so-anonymous mapping.
9566                                  * So we have to handle this case here and
9567                                  * allocate a new VM object for this map entry.
9568                                  */
9569                                 dst_object = vm_object_allocate(
9570                                         entry->vme_end - entry->vme_start);
9571                                 dst_offset = 0;
9572                                 VME_OBJECT_SET(entry, dst_object);
9573                                 VME_OFFSET_SET(entry, dst_offset);
9574                                 assert(entry->use_pmap);
9575
9576                         }
9577
9578                         vm_object_reference(dst_object);
9579
9580                         /* account for unlock bumping up timestamp */
9581                         version.main_timestamp = dst_map->timestamp + 1;
9582
9583                         vm_map_unlock(dst_map);
9584
9585                         /*
9586                          *      Copy as much as possible in one pass
9587                          */
9588
9589                         copy_size = size;
9590                         r = vm_fault_copy(
9591                                 VME_OBJECT(copy_entry),
9592                                 VME_OFFSET(copy_entry),
9593                                 &copy_size,
9594                                 dst_object,
9595                                 dst_offset,
9596                                 dst_map,
9597                                 &version,
9598                                 THREAD_UNINT );
9599
9600                         /*
9601                          *      Release the object reference
9602                          */
9603
9604                         vm_object_deallocate(dst_object);
9605
9606                         /*
9607                          *      If a hard error occurred, return it now
9608                          */
9609
9610                         if (r != KERN_SUCCESS)
9611                                 return(r);
9612
9613                         if (copy_size != 0) {
9614                                 /*
9615                                  *      Dispose of the copied region
9616                                  */
9617
9618                                 vm_map_copy_clip_end(copy, copy_entry,
9619                                                      copy_entry->vme_start + copy_size);
9620                                 vm_map_copy_entry_unlink(copy, copy_entry);
9621                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9622                                 vm_map_copy_entry_dispose(copy, copy_entry);
9623                         }
9624
9625                         /*
9626                          *      Pick up in the destination map where we left off.
9627                          *
9628                          *      Use the version information to avoid a lookup
9629                          *      in the normal case.
9630                          */
9631
9632                         start += copy_size;
9633                         vm_map_lock(dst_map);
9634                         if (version.main_timestamp == dst_map->timestamp &&
9635                             copy_size != 0) {
9636                                 /* We can safely use saved tmp_entry value */
9637
9638                                 if (tmp_entry->map_aligned &&
9639                                     !VM_MAP_PAGE_ALIGNED(
9640                                             start,
9641                                             VM_MAP_PAGE_MASK(dst_map))) {
9642                                         /* no longer map-aligned */
9643                                         tmp_entry->map_aligned = FALSE;
9644                                 }
9645                                 vm_map_clip_end(dst_map, tmp_entry, start);
9646                                 tmp_entry = tmp_entry->vme_next;
9647                         } else {
9648                                 /* Must do lookup of tmp_entry */
9649
9650                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
9651                                         vm_map_unlock(dst_map);
9652                                         return(KERN_INVALID_ADDRESS);
9653                                 }
9654                                 if (tmp_entry->map_aligned &&
9655                                     !VM_MAP_PAGE_ALIGNED(
9656                                             start,
9657                                             VM_MAP_PAGE_MASK(dst_map))) {
9658                                         /* no longer map-aligned */
9659                                         tmp_entry->map_aligned = FALSE;
9660                                 }
9661                                 vm_map_clip_start(dst_map, tmp_entry, start);
9662                         }
9663                 }
9664         }/* while */
9665
9666         return(KERN_SUCCESS);
9667 }/* vm_map_copy_overwrite_aligned */
9668
9669 /*
9670  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
9671  *
9672  *      Description:
9673  *              Copy in data to a kernel buffer from space in the
9674  *              source map. The original space may be optionally
9675  *              deallocated.
9676  *
9677  *              If successful, returns a new copy object.
9678  */
9679 static kern_return_t
9680 vm_map_copyin_kernel_buffer(
9681         vm_map_t        src_map,
9682         vm_map_offset_t src_addr,
9683         vm_map_size_t   len,
9684         boolean_t       src_destroy,
9685         vm_map_copy_t   *copy_result)
9686 {
9687         kern_return_t kr;
9688         vm_map_copy_t copy;
9689         vm_size_t kalloc_size;
9690
9691         if (len > msg_ool_size_small)
9692                 return KERN_INVALID_ARGUMENT;
9693
9694         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
9695
9696         copy = (vm_map_copy_t)kalloc(kalloc_size);
9697         if (copy == VM_MAP_COPY_NULL)
9698                 return KERN_RESOURCE_SHORTAGE;
9699         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
9700         copy->size = len;
9701         copy->offset = 0;
9702
9703         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
9704         if (kr != KERN_SUCCESS) {
9705                 kfree(copy, kalloc_size);
9706                 return kr;
9707         }
9708         if (src_destroy) {
9709                 (void) vm_map_remove(
9710                         src_map,
9711                         vm_map_trunc_page(src_addr,
9712                                           VM_MAP_PAGE_MASK(src_map)),
9713                         vm_map_round_page(src_addr + len,
9714                                           VM_MAP_PAGE_MASK(src_map)),
9715                         (VM_MAP_REMOVE_INTERRUPTIBLE |
9716                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
9717                          ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
9718         }
9719         *copy_result = copy;
9720         return KERN_SUCCESS;
9721 }
9722
9723 /*
9724  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
9725  *
9726  *      Description:
9727  *              Copy out data from a kernel buffer into space in the
9728  *              destination map. The space may be otpionally dynamically
9729  *              allocated.
9730  *
9731  *              If successful, consumes the copy object.
9732  *              Otherwise, the caller is responsible for it.
9733  */
9734 static int vm_map_copyout_kernel_buffer_failures = 0;
9735 static kern_return_t
9736 vm_map_copyout_kernel_buffer(
9737         vm_map_t                map,
9738         vm_map_address_t        *addr,  /* IN/OUT */
9739         vm_map_copy_t           copy,
9740         vm_map_size_t           copy_size,
9741         boolean_t               overwrite,
9742         boolean_t               consume_on_success)
9743 {
9744         kern_return_t kr = KERN_SUCCESS;
9745         thread_t thread = current_thread();
9746
9747         assert(copy->size == copy_size);
9748
9749         /*
9750          * check for corrupted vm_map_copy structure
9751          */
9752         if (copy_size > msg_ool_size_small || copy->offset)
9753                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
9754                       (long long)copy->size, (long long)copy->offset);
9755
9756         if (!overwrite) {
9757
9758                 /*
9759                  * Allocate space in the target map for the data
9760                  */
9761                 *addr = 0;
9762                 kr = vm_map_enter(map,
9763                                   addr,
9764                                   vm_map_round_page(copy_size,
9765                                                     VM_MAP_PAGE_MASK(map)),
9766                                   (vm_map_offset_t) 0,
9767                                   VM_FLAGS_ANYWHERE,
9768                                   VM_MAP_KERNEL_FLAGS_NONE,
9769                                   VM_KERN_MEMORY_NONE,
9770                                   VM_OBJECT_NULL,
9771                                   (vm_object_offset_t) 0,
9772                                   FALSE,
9773                                   VM_PROT_DEFAULT,
9774                                   VM_PROT_ALL,
9775                                   VM_INHERIT_DEFAULT);
9776                 if (kr != KERN_SUCCESS)
9777                         return kr;
9778 #if KASAN
9779                 if (map->pmap == kernel_pmap) {
9780                         kasan_notify_address(*addr, copy->size);
9781                 }
9782 #endif
9783         }
9784
9785         /*
9786          * Copyout the data from the kernel buffer to the target map.
9787          */
9788         if (thread->map == map) {
9789
9790                 /*
9791                  * If the target map is the current map, just do
9792                  * the copy.
9793                  */
9794                 assert((vm_size_t)copy_size == copy_size);
9795                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
9796                         kr = KERN_INVALID_ADDRESS;
9797                 }
9798         }
9799         else {
9800                 vm_map_t oldmap;
9801
9802                 /*
9803                  * If the target map is another map, assume the
9804                  * target's address space identity for the duration
9805                  * of the copy.
9806                  */
9807                 vm_map_reference(map);
9808                 oldmap = vm_map_switch(map);
9809
9810                 assert((vm_size_t)copy_size == copy_size);
9811                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
9812                         vm_map_copyout_kernel_buffer_failures++;
9813                         kr = KERN_INVALID_ADDRESS;
9814                 }
9815
9816                 (void) vm_map_switch(oldmap);
9817                 vm_map_deallocate(map);
9818         }
9819
9820         if (kr != KERN_SUCCESS) {
9821                 /* the copy failed, clean up */
9822                 if (!overwrite) {
9823                         /*
9824                          * Deallocate the space we allocated in the target map.
9825                          */
9826                         (void) vm_map_remove(
9827                                 map,
9828                                 vm_map_trunc_page(*addr,
9829                                                   VM_MAP_PAGE_MASK(map)),
9830                                 vm_map_round_page((*addr +
9831                                                    vm_map_round_page(copy_size,
9832                                                                      VM_MAP_PAGE_MASK(map))),
9833                                                   VM_MAP_PAGE_MASK(map)),
9834                                 VM_MAP_NO_FLAGS);
9835                         *addr = 0;
9836                 }
9837         } else {
9838                 /* copy was successful, dicard the copy structure */
9839                 if (consume_on_success) {
9840                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
9841                 }
9842         }
9843
9844         return kr;
9845 }
9846
9847 /*
9848  *      Macro:          vm_map_copy_insert
9849  *
9850  *      Description:
9851  *              Link a copy chain ("copy") into a map at the
9852  *              specified location (after "where").
9853  *      Side effects:
9854  *              The copy chain is destroyed.
9855  *      Warning:
9856  *              The arguments are evaluated multiple times.
9857  */
9858 #define vm_map_copy_insert(map, where, copy)                            \
9859 MACRO_BEGIN                                                             \
9860         vm_map_store_copy_insert(map, where, copy);       \
9861         zfree(vm_map_copy_zone, copy);          \
9862 MACRO_END
9863
9864 void
9865 vm_map_copy_remap(
9866         vm_map_t        map,
9867         vm_map_entry_t  where,
9868         vm_map_copy_t   copy,
9869         vm_map_offset_t adjustment,
9870         vm_prot_t       cur_prot,
9871         vm_prot_t       max_prot,
9872         vm_inherit_t    inheritance)
9873 {
9874         vm_map_entry_t  copy_entry, new_entry;
9875
9876         for (copy_entry = vm_map_copy_first_entry(copy);
9877              copy_entry != vm_map_copy_to_entry(copy);
9878              copy_entry = copy_entry->vme_next) {
9879                 /* get a new VM map entry for the map */
9880                 new_entry = vm_map_entry_create(map,
9881                                                 !map->hdr.entries_pageable);
9882                 /* copy the "copy entry" to the new entry */
9883                 vm_map_entry_copy(new_entry, copy_entry);
9884                 /* adjust "start" and "end" */
9885                 new_entry->vme_start += adjustment;
9886                 new_entry->vme_end += adjustment;
9887                 /* clear some attributes */
9888                 new_entry->inheritance = inheritance;
9889                 new_entry->protection = cur_prot;
9890                 new_entry->max_protection = max_prot;
9891                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
9892                 /* take an extra reference on the entry's "object" */
9893                 if (new_entry->is_sub_map) {
9894                         assert(!new_entry->use_pmap); /* not nested */
9895                         vm_map_lock(VME_SUBMAP(new_entry));
9896                         vm_map_reference(VME_SUBMAP(new_entry));
9897                         vm_map_unlock(VME_SUBMAP(new_entry));
9898                 } else {
9899                         vm_object_reference(VME_OBJECT(new_entry));
9900                 }
9901                 /* insert the new entry in the map */
9902                 vm_map_store_entry_link(map, where, new_entry);
9903                 /* continue inserting the "copy entries" after the new entry */
9904                 where = new_entry;
9905         }
9906 }
9907
9908
9909 /*
9910  * Returns true if *size matches (or is in the range of) copy->size.
9911  * Upon returning true, the *size field is updated with the actual size of the
9912  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
9913  */
9914 boolean_t
9915 vm_map_copy_validate_size(
9916         vm_map_t                dst_map,
9917         vm_map_copy_t           copy,
9918         vm_map_size_t           *size)
9919 {
9920         if (copy == VM_MAP_COPY_NULL)
9921                 return FALSE;
9922         vm_map_size_t copy_sz = copy->size;
9923         vm_map_size_t sz = *size;
9924         switch (copy->type) {
9925         case VM_MAP_COPY_OBJECT:
9926         case VM_MAP_COPY_KERNEL_BUFFER:
9927                 if (sz == copy_sz)
9928                         return TRUE;
9929                 break;
9930         case VM_MAP_COPY_ENTRY_LIST:
9931                 /*
9932                  * potential page-size rounding prevents us from exactly
9933                  * validating this flavor of vm_map_copy, but we can at least
9934                  * assert that it's within a range.
9935                  */
9936                 if (copy_sz >= sz &&
9937                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
9938                         *size = copy_sz;
9939                         return TRUE;
9940                 }
9941                 break;
9942         default:
9943                 break;
9944         }
9945         return FALSE;
9946 }
9947
9948 /*
9949  *      Routine:        vm_map_copyout_size
9950  *
9951  *      Description:
9952  *              Copy out a copy chain ("copy") into newly-allocated
9953  *              space in the destination map. Uses a prevalidated
9954  *              size for the copy object (vm_map_copy_validate_size).
9955  *
9956  *              If successful, consumes the copy object.
9957  *              Otherwise, the caller is responsible for it.
9958  */
9959 kern_return_t
9960 vm_map_copyout_size(
9961         vm_map_t                dst_map,
9962         vm_map_address_t        *dst_addr,      /* OUT */
9963         vm_map_copy_t           copy,
9964         vm_map_size_t           copy_size)
9965 {
9966         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
9967                                        TRUE, /* consume_on_success */
9968                                        VM_PROT_DEFAULT,
9969                                        VM_PROT_ALL,
9970                                        VM_INHERIT_DEFAULT);
9971 }
9972
9973 /*
9974  *      Routine:        vm_map_copyout
9975  *
9976  *      Description:
9977  *              Copy out a copy chain ("copy") into newly-allocated
9978  *              space in the destination map.
9979  *
9980  *              If successful, consumes the copy object.
9981  *              Otherwise, the caller is responsible for it.
9982  */
9983 kern_return_t
9984 vm_map_copyout(
9985         vm_map_t                dst_map,
9986         vm_map_address_t        *dst_addr,      /* OUT */
9987         vm_map_copy_t           copy)
9988 {
9989         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
9990                                        TRUE, /* consume_on_success */
9991                                        VM_PROT_DEFAULT,
9992                                        VM_PROT_ALL,
9993                                        VM_INHERIT_DEFAULT);
9994 }
9995
9996 kern_return_t
9997 vm_map_copyout_internal(
9998         vm_map_t                dst_map,
9999         vm_map_address_t        *dst_addr,      /* OUT */
10000         vm_map_copy_t           copy,
10001         vm_map_size_t           copy_size,
10002         boolean_t               consume_on_success,
10003         vm_prot_t               cur_protection,
10004         vm_prot_t               max_protection,
10005         vm_inherit_t            inheritance)
10006 {
10007         vm_map_size_t           size;
10008         vm_map_size_t           adjustment;
10009         vm_map_offset_t         start;
10010         vm_object_offset_t      vm_copy_start;
10011         vm_map_entry_t          last;
10012         vm_map_entry_t          entry;
10013         vm_map_entry_t          hole_entry;
10014
10015         /*
10016          *      Check for null copy object.
10017          */
10018
10019         if (copy == VM_MAP_COPY_NULL) {
10020                 *dst_addr = 0;
10021                 return(KERN_SUCCESS);
10022         }
10023
10024         if (copy->size != copy_size) {
10025                 *dst_addr = 0;
10026                 return KERN_FAILURE;
10027         }
10028
10029         /*
10030          *      Check for special copy object, created
10031          *      by vm_map_copyin_object.
10032          */
10033
10034         if (copy->type == VM_MAP_COPY_OBJECT) {
10035                 vm_object_t             object = copy->cpy_object;
10036                 kern_return_t           kr;
10037                 vm_object_offset_t      offset;
10038
10039                 offset = vm_object_trunc_page(copy->offset);
10040                 size = vm_map_round_page((copy_size +
10041                                           (vm_map_size_t)(copy->offset -
10042                                                           offset)),
10043                                          VM_MAP_PAGE_MASK(dst_map));
10044                 *dst_addr = 0;
10045                 kr = vm_map_enter(dst_map, dst_addr, size,
10046                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10047                                   VM_MAP_KERNEL_FLAGS_NONE,
10048                                   VM_KERN_MEMORY_NONE,
10049                                   object, offset, FALSE,
10050                                   VM_PROT_DEFAULT, VM_PROT_ALL,
10051                                   VM_INHERIT_DEFAULT);
10052                 if (kr != KERN_SUCCESS)
10053                         return(kr);
10054                 /* Account for non-pagealigned copy object */
10055                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10056                 if (consume_on_success)
10057                         zfree(vm_map_copy_zone, copy);
10058                 return(KERN_SUCCESS);
10059         }
10060
10061         /*
10062          *      Check for special kernel buffer allocated
10063          *      by new_ipc_kmsg_copyin.
10064          */
10065
10066         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10067                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10068                                                     copy, copy_size, FALSE,
10069                                                     consume_on_success);
10070         }
10071
10072
10073         /*
10074          *      Find space for the data
10075          */
10076
10077         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10078                                           VM_MAP_COPY_PAGE_MASK(copy));
10079         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10080                                  VM_MAP_COPY_PAGE_MASK(copy))
10081                 - vm_copy_start;
10082
10083
10084 StartAgain: ;
10085
10086         vm_map_lock(dst_map);
10087         if( dst_map->disable_vmentry_reuse == TRUE) {
10088                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10089                 last = entry;
10090         } else {
10091                 if (dst_map->holelistenabled) {
10092                         hole_entry = (vm_map_entry_t)dst_map->holes_list;
10093
10094                         if (hole_entry == NULL) {
10095                                 /*
10096                                  * No more space in the map?
10097                                  */
10098                                 vm_map_unlock(dst_map);
10099                                 return(KERN_NO_SPACE);
10100                         }
10101
10102                         last = hole_entry;
10103                         start = last->vme_start;
10104                 } else {
10105                         assert(first_free_is_valid(dst_map));
10106                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10107                         vm_map_min(dst_map) : last->vme_end;
10108                 }
10109                 start = vm_map_round_page(start,
10110                                           VM_MAP_PAGE_MASK(dst_map));
10111         }
10112
10113         while (TRUE) {
10114                 vm_map_entry_t  next = last->vme_next;
10115                 vm_map_offset_t end = start + size;
10116
10117                 if ((end > dst_map->max_offset) || (end < start)) {
10118                         if (dst_map->wait_for_space) {
10119                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10120                                         assert_wait((event_t) dst_map,
10121                                                     THREAD_INTERRUPTIBLE);
10122                                         vm_map_unlock(dst_map);
10123                                         thread_block(THREAD_CONTINUE_NULL);
10124                                         goto StartAgain;
10125                                 }
10126                         }
10127                         vm_map_unlock(dst_map);
10128                         return(KERN_NO_SPACE);
10129                 }
10130
10131                 if (dst_map->holelistenabled) {
10132                         if (last->vme_end >= end)
10133                                 break;
10134                 } else {
10135                         /*
10136                          *      If there are no more entries, we must win.
10137                          *
10138                          *      OR
10139                          *
10140                          *      If there is another entry, it must be
10141                          *      after the end of the potential new region.
10142                          */
10143
10144                         if (next == vm_map_to_entry(dst_map))
10145                                 break;
10146
10147                         if (next->vme_start >= end)
10148                                 break;
10149                 }
10150
10151                 last = next;
10152
10153                 if (dst_map->holelistenabled) {
10154                         if (last == (vm_map_entry_t) dst_map->holes_list) {
10155                                 /*
10156                                  * Wrapped around
10157                                  */
10158                                 vm_map_unlock(dst_map);
10159                                 return(KERN_NO_SPACE);
10160                         }
10161                         start = last->vme_start;
10162                 } else {
10163                         start = last->vme_end;
10164                 }
10165                 start = vm_map_round_page(start,
10166                                           VM_MAP_PAGE_MASK(dst_map));
10167         }
10168
10169         if (dst_map->holelistenabled) {
10170                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10171                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10172                 }
10173         }
10174
10175
10176         adjustment = start - vm_copy_start;
10177         if (! consume_on_success) {
10178                 /*
10179                  * We're not allowed to consume "copy", so we'll have to
10180                  * copy its map entries into the destination map below.
10181                  * No need to re-allocate map entries from the correct
10182                  * (pageable or not) zone, since we'll get new map entries
10183                  * during the transfer.
10184                  * We'll also adjust the map entries's "start" and "end"
10185                  * during the transfer, to keep "copy"'s entries consistent
10186                  * with its "offset".
10187                  */
10188                 goto after_adjustments;
10189         }
10190
10191         /*
10192          *      Since we're going to just drop the map
10193          *      entries from the copy into the destination
10194          *      map, they must come from the same pool.
10195          */
10196
10197         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10198                 /*
10199                  * Mismatches occur when dealing with the default
10200                  * pager.
10201                  */
10202                 zone_t          old_zone;
10203                 vm_map_entry_t  next, new;
10204
10205                 /*
10206                  * Find the zone that the copies were allocated from
10207                  */
10208
10209                 entry = vm_map_copy_first_entry(copy);
10210
10211                 /*
10212                  * Reinitialize the copy so that vm_map_copy_entry_link
10213                  * will work.
10214                  */
10215                 vm_map_store_copy_reset(copy, entry);
10216                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10217
10218                 /*
10219                  * Copy each entry.
10220                  */
10221                 while (entry != vm_map_copy_to_entry(copy)) {
10222                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10223                         vm_map_entry_copy_full(new, entry);
10224                         assert(!new->iokit_acct);
10225                         if (new->is_sub_map) {
10226                                 /* clr address space specifics */
10227                                 new->use_pmap = FALSE;
10228                         }
10229                         vm_map_copy_entry_link(copy,
10230                                                vm_map_copy_last_entry(copy),
10231                                                new);
10232                         next = entry->vme_next;
10233                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10234                         zfree(old_zone, entry);
10235                         entry = next;
10236                 }
10237         }
10238
10239         /*
10240          *      Adjust the addresses in the copy chain, and
10241          *      reset the region attributes.
10242          */
10243
10244         for (entry = vm_map_copy_first_entry(copy);
10245              entry != vm_map_copy_to_entry(copy);
10246              entry = entry->vme_next) {
10247                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10248                         /*
10249                          * We're injecting this copy entry into a map that
10250                          * has the standard page alignment, so clear
10251                          * "map_aligned" (which might have been inherited
10252                          * from the original map entry).
10253                          */
10254                         entry->map_aligned = FALSE;
10255                 }
10256
10257                 entry->vme_start += adjustment;
10258                 entry->vme_end += adjustment;
10259
10260                 if (entry->map_aligned) {
10261                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10262                                                    VM_MAP_PAGE_MASK(dst_map)));
10263                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10264                                                    VM_MAP_PAGE_MASK(dst_map)));
10265                 }
10266
10267                 entry->inheritance = VM_INHERIT_DEFAULT;
10268                 entry->protection = VM_PROT_DEFAULT;
10269                 entry->max_protection = VM_PROT_ALL;
10270                 entry->behavior = VM_BEHAVIOR_DEFAULT;
10271
10272                 /*
10273                  * If the entry is now wired,
10274                  * map the pages into the destination map.
10275                  */
10276                 if (entry->wired_count != 0) {
10277                         vm_map_offset_t va;
10278                         vm_object_offset_t       offset;
10279                         vm_object_t object;
10280                         vm_prot_t prot;
10281                         int     type_of_fault;
10282
10283                         object = VME_OBJECT(entry);
10284                         offset = VME_OFFSET(entry);
10285                         va = entry->vme_start;
10286
10287                         pmap_pageable(dst_map->pmap,
10288                                       entry->vme_start,
10289                                       entry->vme_end,
10290                                       TRUE);
10291
10292                         while (va < entry->vme_end) {
10293                                 vm_page_t       m;
10294
10295                                 /*
10296                                  * Look up the page in the object.
10297                                  * Assert that the page will be found in the
10298                                  * top object:
10299                                  * either
10300                                  *      the object was newly created by
10301                                  *      vm_object_copy_slowly, and has
10302                                  *      copies of all of the pages from
10303                                  *      the source object
10304                                  * or
10305                                  *      the object was moved from the old
10306                                  *      map entry; because the old map
10307                                  *      entry was wired, all of the pages
10308                                  *      were in the top-level object.
10309                                  *      (XXX not true if we wire pages for
10310                                  *       reading)
10311                                  */
10312                                 vm_object_lock(object);
10313
10314                                 m = vm_page_lookup(object, offset);
10315                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10316                                     m->absent)
10317                                         panic("vm_map_copyout: wiring %p", m);
10318
10319                                 prot = entry->protection;
10320
10321                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10322                                     prot)
10323                                         prot |= VM_PROT_EXECUTE;
10324
10325                                 type_of_fault = DBG_CACHE_HIT_FAULT;
10326
10327                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
10328                                                                 VM_PAGE_WIRED(m),
10329                                                                 FALSE, /* change_wiring */
10330                                                                 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10331                                                                 FALSE, /* no_cache */
10332                                                                 FALSE, /* cs_bypass */
10333                                                                 VME_ALIAS(entry),
10334                                                                 ((entry->iokit_acct ||
10335                                                                  (!entry->is_sub_map &&
10336                                                                   !entry->use_pmap))
10337                                                                 ? PMAP_OPTIONS_ALT_ACCT
10338                                                                 : 0),  /* pmap_options */
10339                                                                 NULL,  /* need_retry */
10340                                                                 &type_of_fault);
10341
10342                                 vm_object_unlock(object);
10343
10344                                 offset += PAGE_SIZE_64;
10345                                 va += PAGE_SIZE;
10346                         }
10347                 }
10348         }
10349
10350 after_adjustments:
10351
10352         /*
10353          *      Correct the page alignment for the result
10354          */
10355
10356         *dst_addr = start + (copy->offset - vm_copy_start);
10357
10358 #if KASAN
10359         kasan_notify_address(*dst_addr, size);
10360 #endif
10361
10362         /*
10363          *      Update the hints and the map size
10364          */
10365
10366         if (consume_on_success) {
10367                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10368         } else {
10369                 SAVE_HINT_MAP_WRITE(dst_map, last);
10370         }
10371
10372         dst_map->size += size;
10373
10374         /*
10375          *      Link in the copy
10376          */
10377
10378         if (consume_on_success) {
10379                 vm_map_copy_insert(dst_map, last, copy);
10380         } else {
10381                 vm_map_copy_remap(dst_map, last, copy, adjustment,
10382                                   cur_protection, max_protection,
10383                                   inheritance);
10384         }
10385
10386         vm_map_unlock(dst_map);
10387
10388         /*
10389          * XXX  If wiring_required, call vm_map_pageable
10390          */
10391
10392         return(KERN_SUCCESS);
10393 }
10394
10395 /*
10396  *      Routine:        vm_map_copyin
10397  *
10398  *      Description:
10399  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
10400  *
10401  */
10402
10403 #undef vm_map_copyin
10404
10405 kern_return_t
10406 vm_map_copyin(
10407         vm_map_t                        src_map,
10408         vm_map_address_t        src_addr,
10409         vm_map_size_t           len,
10410         boolean_t                       src_destroy,
10411         vm_map_copy_t           *copy_result)   /* OUT */
10412 {
10413         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10414                                         FALSE, copy_result, FALSE));
10415 }
10416
10417 /*
10418  *      Routine:        vm_map_copyin_common
10419  *
10420  *      Description:
10421  *              Copy the specified region (src_addr, len) from the
10422  *              source address space (src_map), possibly removing
10423  *              the region from the source address space (src_destroy).
10424  *
10425  *      Returns:
10426  *              A vm_map_copy_t object (copy_result), suitable for
10427  *              insertion into another address space (using vm_map_copyout),
10428  *              copying over another address space region (using
10429  *              vm_map_copy_overwrite).  If the copy is unused, it
10430  *              should be destroyed (using vm_map_copy_discard).
10431  *
10432  *      In/out conditions:
10433  *              The source map should not be locked on entry.
10434  */
10435
10436 typedef struct submap_map {
10437         vm_map_t        parent_map;
10438         vm_map_offset_t base_start;
10439         vm_map_offset_t base_end;
10440         vm_map_size_t   base_len;
10441         struct submap_map *next;
10442 } submap_map_t;
10443
10444 kern_return_t
10445 vm_map_copyin_common(
10446         vm_map_t        src_map,
10447         vm_map_address_t src_addr,
10448         vm_map_size_t   len,
10449         boolean_t       src_destroy,
10450         __unused boolean_t      src_volatile,
10451         vm_map_copy_t   *copy_result,   /* OUT */
10452         boolean_t       use_maxprot)
10453 {
10454         int flags;
10455
10456         flags = 0;
10457         if (src_destroy) {
10458                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10459         }
10460         if (use_maxprot) {
10461                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10462         }
10463         return vm_map_copyin_internal(src_map,
10464                                       src_addr,
10465                                       len,
10466                                       flags,
10467                                       copy_result);
10468 }
10469 kern_return_t
10470 vm_map_copyin_internal(
10471         vm_map_t        src_map,
10472         vm_map_address_t src_addr,
10473         vm_map_size_t   len,
10474         int             flags,
10475         vm_map_copy_t   *copy_result)   /* OUT */
10476 {
10477         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
10478                                          * in multi-level lookup, this
10479                                          * entry contains the actual
10480                                          * vm_object/offset.
10481                                          */
10482         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
10483
10484         vm_map_offset_t src_start;      /* Start of current entry --
10485                                          * where copy is taking place now
10486                                          */
10487         vm_map_offset_t src_end;        /* End of entire region to be
10488                                          * copied */
10489         vm_map_offset_t src_base;
10490         vm_map_t        base_map = src_map;
10491         boolean_t       map_share=FALSE;
10492         submap_map_t    *parent_maps = NULL;
10493
10494         vm_map_copy_t   copy;           /* Resulting copy */
10495         vm_map_address_t copy_addr;
10496         vm_map_size_t   copy_size;
10497         boolean_t       src_destroy;
10498         boolean_t       use_maxprot;
10499         boolean_t       preserve_purgeable;
10500         boolean_t       entry_was_shared;
10501         vm_map_entry_t  saved_src_entry;
10502
10503         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
10504                 return KERN_INVALID_ARGUMENT;
10505         }
10506
10507         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
10508         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
10509         preserve_purgeable =
10510                 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
10511
10512         /*
10513          *      Check for copies of zero bytes.
10514          */
10515
10516         if (len == 0) {
10517                 *copy_result = VM_MAP_COPY_NULL;
10518                 return(KERN_SUCCESS);
10519         }
10520
10521         /*
10522          *      Check that the end address doesn't overflow
10523          */
10524         src_end = src_addr + len;
10525         if (src_end < src_addr)
10526                 return KERN_INVALID_ADDRESS;
10527
10528         /*
10529          *      Compute (page aligned) start and end of region
10530          */
10531         src_start = vm_map_trunc_page(src_addr,
10532                                       VM_MAP_PAGE_MASK(src_map));
10533         src_end = vm_map_round_page(src_end,
10534                                     VM_MAP_PAGE_MASK(src_map));
10535
10536         /*
10537          * If the copy is sufficiently small, use a kernel buffer instead
10538          * of making a virtual copy.  The theory being that the cost of
10539          * setting up VM (and taking C-O-W faults) dominates the copy costs
10540          * for small regions.
10541          */
10542         if ((len < msg_ool_size_small) &&
10543             !use_maxprot &&
10544             !preserve_purgeable &&
10545             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
10546             /*
10547              * Since the "msg_ool_size_small" threshold was increased and
10548              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10549              * address space limits, we revert to doing a virtual copy if the
10550              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
10551              * of the commpage would now fail when it used to work.
10552              */
10553             (src_start >= vm_map_min(src_map) &&
10554              src_start < vm_map_max(src_map) &&
10555              src_end >= vm_map_min(src_map) &&
10556              src_end < vm_map_max(src_map)))
10557                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
10558                                                    src_destroy, copy_result);
10559
10560         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
10561
10562         /*
10563          *      Allocate a header element for the list.
10564          *
10565          *      Use the start and end in the header to
10566          *      remember the endpoints prior to rounding.
10567          */
10568
10569         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10570         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10571         vm_map_copy_first_entry(copy) =
10572                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10573         copy->type = VM_MAP_COPY_ENTRY_LIST;
10574         copy->cpy_hdr.nentries = 0;
10575         copy->cpy_hdr.entries_pageable = TRUE;
10576 #if 00
10577         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
10578 #else
10579         /*
10580          * The copy entries can be broken down for a variety of reasons,
10581          * so we can't guarantee that they will remain map-aligned...
10582          * Will need to adjust the first copy_entry's "vme_start" and
10583          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10584          * rather than the original map's alignment.
10585          */
10586         copy->cpy_hdr.page_shift = PAGE_SHIFT;
10587 #endif
10588
10589         vm_map_store_init( &(copy->cpy_hdr) );
10590
10591         copy->offset = src_addr;
10592         copy->size = len;
10593
10594         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10595
10596 #define RETURN(x)                                               \
10597         MACRO_BEGIN                                             \
10598         vm_map_unlock(src_map);                                 \
10599         if(src_map != base_map)                                 \
10600                 vm_map_deallocate(src_map);                     \
10601         if (new_entry != VM_MAP_ENTRY_NULL)                     \
10602                 vm_map_copy_entry_dispose(copy,new_entry);      \
10603         vm_map_copy_discard(copy);                              \
10604         {                                                       \
10605                 submap_map_t    *_ptr;                          \
10606                                                                 \
10607                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
10608                         parent_maps=parent_maps->next;          \
10609                         if (_ptr->parent_map != base_map)       \
10610                                 vm_map_deallocate(_ptr->parent_map);    \
10611                         kfree(_ptr, sizeof(submap_map_t));      \
10612                 }                                               \
10613         }                                                       \
10614         MACRO_RETURN(x);                                        \
10615         MACRO_END
10616
10617         /*
10618          *      Find the beginning of the region.
10619          */
10620
10621         vm_map_lock(src_map);
10622
10623         /*
10624          * Lookup the original "src_addr" rather than the truncated
10625          * "src_start", in case "src_start" falls in a non-map-aligned
10626          * map entry *before* the map entry that contains "src_addr"...
10627          */
10628         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
10629                 RETURN(KERN_INVALID_ADDRESS);
10630         if(!tmp_entry->is_sub_map) {
10631                 /*
10632                  * ... but clip to the map-rounded "src_start" rather than
10633                  * "src_addr" to preserve map-alignment.  We'll adjust the
10634                  * first copy entry at the end, if needed.
10635                  */
10636                 vm_map_clip_start(src_map, tmp_entry, src_start);
10637         }
10638         if (src_start < tmp_entry->vme_start) {
10639                 /*
10640                  * Move "src_start" up to the start of the
10641                  * first map entry to copy.
10642                  */
10643                 src_start = tmp_entry->vme_start;
10644         }
10645         /* set for later submap fix-up */
10646         copy_addr = src_start;
10647
10648         /*
10649          *      Go through entries until we get to the end.
10650          */
10651
10652         while (TRUE) {
10653                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
10654                 vm_map_size_t   src_size;               /* Size of source
10655                                                          * map entry (in both
10656                                                          * maps)
10657                                                          */
10658
10659                 vm_object_t             src_object;     /* Object to copy */
10660                 vm_object_offset_t      src_offset;
10661
10662                 boolean_t       src_needs_copy;         /* Should source map
10663                                                          * be made read-only
10664                                                          * for copy-on-write?
10665                                                          */
10666
10667                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
10668
10669                 boolean_t       was_wired;              /* Was source wired? */
10670                 vm_map_version_t version;               /* Version before locks
10671                                                          * dropped to make copy
10672                                                          */
10673                 kern_return_t   result;                 /* Return value from
10674                                                          * copy_strategically.
10675                                                          */
10676                 while(tmp_entry->is_sub_map) {
10677                         vm_map_size_t submap_len;
10678                         submap_map_t *ptr;
10679
10680                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
10681                         ptr->next = parent_maps;
10682                         parent_maps = ptr;
10683                         ptr->parent_map = src_map;
10684                         ptr->base_start = src_start;
10685                         ptr->base_end = src_end;
10686                         submap_len = tmp_entry->vme_end - src_start;
10687                         if(submap_len > (src_end-src_start))
10688                                 submap_len = src_end-src_start;
10689                         ptr->base_len = submap_len;
10690
10691                         src_start -= tmp_entry->vme_start;
10692                         src_start += VME_OFFSET(tmp_entry);
10693                         src_end = src_start + submap_len;
10694                         src_map = VME_SUBMAP(tmp_entry);
10695                         vm_map_lock(src_map);
10696                         /* keep an outstanding reference for all maps in */
10697                         /* the parents tree except the base map */
10698                         vm_map_reference(src_map);
10699                         vm_map_unlock(ptr->parent_map);
10700                         if (!vm_map_lookup_entry(
10701                                     src_map, src_start, &tmp_entry))
10702                                 RETURN(KERN_INVALID_ADDRESS);
10703                         map_share = TRUE;
10704                         if(!tmp_entry->is_sub_map)
10705                                 vm_map_clip_start(src_map, tmp_entry, src_start);
10706                         src_entry = tmp_entry;
10707                 }
10708                 /* we are now in the lowest level submap... */
10709
10710                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
10711                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
10712                         /* This is not, supported for now.In future */
10713                         /* we will need to detect the phys_contig   */
10714                         /* condition and then upgrade copy_slowly   */
10715                         /* to do physical copy from the device mem  */
10716                         /* based object. We can piggy-back off of   */
10717                         /* the was wired boolean to set-up the      */
10718                         /* proper handling */
10719                         RETURN(KERN_PROTECTION_FAILURE);
10720                 }
10721                 /*
10722                  *      Create a new address map entry to hold the result.
10723                  *      Fill in the fields from the appropriate source entries.
10724                  *      We must unlock the source map to do this if we need
10725                  *      to allocate a map entry.
10726                  */
10727                 if (new_entry == VM_MAP_ENTRY_NULL) {
10728                         version.main_timestamp = src_map->timestamp;
10729                         vm_map_unlock(src_map);
10730
10731                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10732
10733                         vm_map_lock(src_map);
10734                         if ((version.main_timestamp + 1) != src_map->timestamp) {
10735                                 if (!vm_map_lookup_entry(src_map, src_start,
10736                                                          &tmp_entry)) {
10737                                         RETURN(KERN_INVALID_ADDRESS);
10738                                 }
10739                                 if (!tmp_entry->is_sub_map)
10740                                         vm_map_clip_start(src_map, tmp_entry, src_start);
10741                                 continue; /* restart w/ new tmp_entry */
10742                         }
10743                 }
10744
10745                 /*
10746                  *      Verify that the region can be read.
10747                  */
10748                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
10749                      !use_maxprot) ||
10750                     (src_entry->max_protection & VM_PROT_READ) == 0)
10751                         RETURN(KERN_PROTECTION_FAILURE);
10752
10753                 /*
10754                  *      Clip against the endpoints of the entire region.
10755                  */
10756
10757                 vm_map_clip_end(src_map, src_entry, src_end);
10758
10759                 src_size = src_entry->vme_end - src_start;
10760                 src_object = VME_OBJECT(src_entry);
10761                 src_offset = VME_OFFSET(src_entry);
10762                 was_wired = (src_entry->wired_count != 0);
10763
10764                 vm_map_entry_copy(new_entry, src_entry);
10765                 if (new_entry->is_sub_map) {
10766                         /* clr address space specifics */
10767                         new_entry->use_pmap = FALSE;
10768                 } else {
10769                         /*
10770                          * We're dealing with a copy-on-write operation,
10771                          * so the resulting mapping should not inherit the
10772                          * original mapping's accounting settings.
10773                          * "iokit_acct" should have been cleared in
10774                          * vm_map_entry_copy().
10775                          * "use_pmap" should be reset to its default (TRUE)
10776                          * so that the new mapping gets accounted for in
10777                          * the task's memory footprint.
10778                          */
10779                         assert(!new_entry->iokit_acct);
10780                         new_entry->use_pmap = TRUE;
10781                 }
10782
10783                 /*
10784                  *      Attempt non-blocking copy-on-write optimizations.
10785                  */
10786
10787                 if (src_destroy &&
10788                     (src_object == VM_OBJECT_NULL ||
10789                      (src_object->internal &&
10790                       src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10791                       !map_share))) {
10792                         /*
10793                          * If we are destroying the source, and the object
10794                          * is internal, we can move the object reference
10795                          * from the source to the copy.  The copy is
10796                          * copy-on-write only if the source is.
10797                          * We make another reference to the object, because
10798                          * destroying the source entry will deallocate it.
10799                          */
10800                         vm_object_reference(src_object);
10801
10802                         /*
10803                          * Copy is always unwired.  vm_map_copy_entry
10804                          * set its wired count to zero.
10805                          */
10806
10807                         goto CopySuccessful;
10808                 }
10809
10810
10811         RestartCopy:
10812                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
10813                     src_object, new_entry, VME_OBJECT(new_entry),
10814                     was_wired, 0);
10815                 if ((src_object == VM_OBJECT_NULL ||
10816                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
10817                     vm_object_copy_quickly(
10818                             &VME_OBJECT(new_entry),
10819                             src_offset,
10820                             src_size,
10821                             &src_needs_copy,
10822                             &new_entry_needs_copy)) {
10823
10824                         new_entry->needs_copy = new_entry_needs_copy;
10825
10826                         /*
10827                          *      Handle copy-on-write obligations
10828                          */
10829
10830                         if (src_needs_copy && !tmp_entry->needs_copy) {
10831                                 vm_prot_t prot;
10832
10833                                 prot = src_entry->protection & ~VM_PROT_WRITE;
10834
10835                                 if (override_nx(src_map, VME_ALIAS(src_entry))
10836                                     && prot)
10837                                         prot |= VM_PROT_EXECUTE;
10838
10839                                 vm_object_pmap_protect(
10840                                         src_object,
10841                                         src_offset,
10842                                         src_size,
10843                                         (src_entry->is_shared ?
10844                                          PMAP_NULL
10845                                          : src_map->pmap),
10846                                         src_entry->vme_start,
10847                                         prot);
10848
10849                                 assert(tmp_entry->wired_count == 0);
10850                                 tmp_entry->needs_copy = TRUE;
10851                         }
10852
10853                         /*
10854                          *      The map has never been unlocked, so it's safe
10855                          *      to move to the next entry rather than doing
10856                          *      another lookup.
10857                          */
10858
10859                         goto CopySuccessful;
10860                 }
10861
10862                 entry_was_shared = tmp_entry->is_shared;
10863
10864                 /*
10865                  *      Take an object reference, so that we may
10866                  *      release the map lock(s).
10867                  */
10868
10869                 assert(src_object != VM_OBJECT_NULL);
10870                 vm_object_reference(src_object);
10871
10872                 /*
10873                  *      Record the timestamp for later verification.
10874                  *      Unlock the map.
10875                  */
10876
10877                 version.main_timestamp = src_map->timestamp;
10878                 vm_map_unlock(src_map); /* Increments timestamp once! */
10879                 saved_src_entry = src_entry;
10880                 tmp_entry = VM_MAP_ENTRY_NULL;
10881                 src_entry = VM_MAP_ENTRY_NULL;
10882
10883                 /*
10884                  *      Perform the copy
10885                  */
10886
10887                 if (was_wired) {
10888                 CopySlowly:
10889                         vm_object_lock(src_object);
10890                         result = vm_object_copy_slowly(
10891                                 src_object,
10892                                 src_offset,
10893                                 src_size,
10894                                 THREAD_UNINT,
10895                                 &VME_OBJECT(new_entry));
10896                         VME_OFFSET_SET(new_entry, 0);
10897                         new_entry->needs_copy = FALSE;
10898                 }
10899                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10900                          (entry_was_shared  || map_share)) {
10901                         vm_object_t new_object;
10902
10903                         vm_object_lock_shared(src_object);
10904                         new_object = vm_object_copy_delayed(
10905                                 src_object,
10906                                 src_offset,
10907                                 src_size,
10908                                 TRUE);
10909                         if (new_object == VM_OBJECT_NULL)
10910                                 goto CopySlowly;
10911
10912                         VME_OBJECT_SET(new_entry, new_object);
10913                         assert(new_entry->wired_count == 0);
10914                         new_entry->needs_copy = TRUE;
10915                         assert(!new_entry->iokit_acct);
10916                         assert(new_object->purgable == VM_PURGABLE_DENY);
10917                         assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
10918                         result = KERN_SUCCESS;
10919
10920                 } else {
10921                         vm_object_offset_t new_offset;
10922                         new_offset = VME_OFFSET(new_entry);
10923                         result = vm_object_copy_strategically(src_object,
10924                                                               src_offset,
10925                                                               src_size,
10926                                                               &VME_OBJECT(new_entry),
10927                                                               &new_offset,
10928                                                               &new_entry_needs_copy);
10929                         if (new_offset != VME_OFFSET(new_entry)) {
10930                                 VME_OFFSET_SET(new_entry, new_offset);
10931                         }
10932
10933                         new_entry->needs_copy = new_entry_needs_copy;
10934                 }
10935
10936                 if (result == KERN_SUCCESS &&
10937                     preserve_purgeable &&
10938                     src_object->purgable != VM_PURGABLE_DENY) {
10939                         vm_object_t     new_object;
10940
10941                         new_object = VME_OBJECT(new_entry);
10942                         assert(new_object != src_object);
10943                         vm_object_lock(new_object);
10944                         assert(new_object->ref_count == 1);
10945                         assert(new_object->shadow == VM_OBJECT_NULL);
10946                         assert(new_object->copy == VM_OBJECT_NULL);
10947                         assert(new_object->vo_purgeable_owner == NULL);
10948
10949                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
10950                         new_object->true_share = TRUE;
10951                         /* start as non-volatile with no owner... */
10952                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
10953                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
10954                         /* ... and move to src_object's purgeable state */
10955                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
10956                                 int state;
10957                                 state = src_object->purgable;
10958                                 vm_object_purgable_control(
10959                                         new_object,
10960                                         VM_PURGABLE_SET_STATE_FROM_KERNEL,
10961                                         &state);
10962                         }
10963                         vm_object_unlock(new_object);
10964                         new_object = VM_OBJECT_NULL;
10965                         /* no pmap accounting for purgeable objects */
10966                         new_entry->use_pmap = FALSE;
10967                 }
10968
10969                 if (result != KERN_SUCCESS &&
10970                     result != KERN_MEMORY_RESTART_COPY) {
10971                         vm_map_lock(src_map);
10972                         RETURN(result);
10973                 }
10974
10975                 /*
10976                  *      Throw away the extra reference
10977                  */
10978
10979                 vm_object_deallocate(src_object);
10980
10981                 /*
10982                  *      Verify that the map has not substantially
10983                  *      changed while the copy was being made.
10984                  */
10985
10986                 vm_map_lock(src_map);
10987
10988                 if ((version.main_timestamp + 1) == src_map->timestamp) {
10989                         /* src_map hasn't changed: src_entry is still valid */
10990                         src_entry = saved_src_entry;
10991                         goto VerificationSuccessful;
10992                 }
10993
10994                 /*
10995                  *      Simple version comparison failed.
10996                  *
10997                  *      Retry the lookup and verify that the
10998                  *      same object/offset are still present.
10999                  *
11000                  *      [Note: a memory manager that colludes with
11001                  *      the calling task can detect that we have
11002                  *      cheated.  While the map was unlocked, the
11003                  *      mapping could have been changed and restored.]
11004                  */
11005
11006                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11007                         if (result != KERN_MEMORY_RESTART_COPY) {
11008                                 vm_object_deallocate(VME_OBJECT(new_entry));
11009                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11010                                 /* reset accounting state */
11011                                 new_entry->iokit_acct = FALSE;
11012                                 new_entry->use_pmap = TRUE;
11013                         }
11014                         RETURN(KERN_INVALID_ADDRESS);
11015                 }
11016
11017                 src_entry = tmp_entry;
11018                 vm_map_clip_start(src_map, src_entry, src_start);
11019
11020                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11021                      !use_maxprot) ||
11022                     ((src_entry->max_protection & VM_PROT_READ) == 0))
11023                         goto VerificationFailed;
11024
11025                 if (src_entry->vme_end < new_entry->vme_end) {
11026                         /*
11027                          * This entry might have been shortened
11028                          * (vm_map_clip_end) or been replaced with
11029                          * an entry that ends closer to "src_start"
11030                          * than before.
11031                          * Adjust "new_entry" accordingly; copying
11032                          * less memory would be correct but we also
11033                          * redo the copy (see below) if the new entry
11034                          * no longer points at the same object/offset.
11035                          */
11036                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11037                                                    VM_MAP_COPY_PAGE_MASK(copy)));
11038                         new_entry->vme_end = src_entry->vme_end;
11039                         src_size = new_entry->vme_end - src_start;
11040                 } else if (src_entry->vme_end > new_entry->vme_end) {
11041                         /*
11042                          * This entry might have been extended
11043                          * (vm_map_entry_simplify() or coalesce)
11044                          * or been replaced with an entry that ends farther
11045                          * from "src_start" than before.
11046                          *
11047                          * We've called vm_object_copy_*() only on
11048                          * the previous <start:end> range, so we can't
11049                          * just extend new_entry.  We have to re-do
11050                          * the copy based on the new entry as if it was
11051                          * pointing at a different object/offset (see
11052                          * "Verification failed" below).
11053                          */
11054                 }
11055
11056                 if ((VME_OBJECT(src_entry) != src_object) ||
11057                     (VME_OFFSET(src_entry) != src_offset) ||
11058                     (src_entry->vme_end > new_entry->vme_end)) {
11059
11060                         /*
11061                          *      Verification failed.
11062                          *
11063                          *      Start over with this top-level entry.
11064                          */
11065
11066                 VerificationFailed: ;
11067
11068                         vm_object_deallocate(VME_OBJECT(new_entry));
11069                         tmp_entry = src_entry;
11070                         continue;
11071                 }
11072
11073                 /*
11074                  *      Verification succeeded.
11075                  */
11076
11077         VerificationSuccessful: ;
11078
11079                 if (result == KERN_MEMORY_RESTART_COPY)
11080                         goto RestartCopy;
11081
11082                 /*
11083                  *      Copy succeeded.
11084                  */
11085
11086         CopySuccessful: ;
11087
11088                 /*
11089                  *      Link in the new copy entry.
11090                  */
11091
11092                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11093                                        new_entry);
11094
11095                 /*
11096                  *      Determine whether the entire region
11097                  *      has been copied.
11098                  */
11099                 src_base = src_start;
11100                 src_start = new_entry->vme_end;
11101                 new_entry = VM_MAP_ENTRY_NULL;
11102                 while ((src_start >= src_end) && (src_end != 0)) {
11103                         submap_map_t    *ptr;
11104
11105                         if (src_map == base_map) {
11106                                 /* back to the top */
11107                                 break;
11108                         }
11109
11110                         ptr = parent_maps;
11111                         assert(ptr != NULL);
11112                         parent_maps = parent_maps->next;
11113
11114                         /* fix up the damage we did in that submap */
11115                         vm_map_simplify_range(src_map,
11116                                               src_base,
11117                                               src_end);
11118
11119                         vm_map_unlock(src_map);
11120                         vm_map_deallocate(src_map);
11121                         vm_map_lock(ptr->parent_map);
11122                         src_map = ptr->parent_map;
11123                         src_base = ptr->base_start;
11124                         src_start = ptr->base_start + ptr->base_len;
11125                         src_end = ptr->base_end;
11126                         if (!vm_map_lookup_entry(src_map,
11127                                                  src_start,
11128                                                  &tmp_entry) &&
11129                             (src_end > src_start)) {
11130                                 RETURN(KERN_INVALID_ADDRESS);
11131                         }
11132                         kfree(ptr, sizeof(submap_map_t));
11133                         if (parent_maps == NULL)
11134                                 map_share = FALSE;
11135                         src_entry = tmp_entry->vme_prev;
11136                 }
11137
11138                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11139                     (src_start >= src_addr + len) &&
11140                     (src_addr + len != 0)) {
11141                         /*
11142                          * Stop copying now, even though we haven't reached
11143                          * "src_end".  We'll adjust the end of the last copy
11144                          * entry at the end, if needed.
11145                          *
11146                          * If src_map's aligment is different from the
11147                          * system's page-alignment, there could be
11148                          * extra non-map-aligned map entries between
11149                          * the original (non-rounded) "src_addr + len"
11150                          * and the rounded "src_end".
11151                          * We do not want to copy those map entries since
11152                          * they're not part of the copied range.
11153                          */
11154                         break;
11155                 }
11156
11157                 if ((src_start >= src_end) && (src_end != 0))
11158                         break;
11159
11160                 /*
11161                  *      Verify that there are no gaps in the region
11162                  */
11163
11164                 tmp_entry = src_entry->vme_next;
11165                 if ((tmp_entry->vme_start != src_start) ||
11166                     (tmp_entry == vm_map_to_entry(src_map))) {
11167                         RETURN(KERN_INVALID_ADDRESS);
11168                 }
11169         }
11170
11171         /*
11172          * If the source should be destroyed, do it now, since the
11173          * copy was successful.
11174          */
11175         if (src_destroy) {
11176                 (void) vm_map_delete(
11177                         src_map,
11178                         vm_map_trunc_page(src_addr,
11179                                           VM_MAP_PAGE_MASK(src_map)),
11180                         src_end,
11181                         ((src_map == kernel_map) ?
11182                          VM_MAP_REMOVE_KUNWIRE :
11183                          VM_MAP_NO_FLAGS),
11184                         VM_MAP_NULL);
11185         } else {
11186                 /* fix up the damage we did in the base map */
11187                 vm_map_simplify_range(
11188                         src_map,
11189                         vm_map_trunc_page(src_addr,
11190                                           VM_MAP_PAGE_MASK(src_map)),
11191                         vm_map_round_page(src_end,
11192                                           VM_MAP_PAGE_MASK(src_map)));
11193         }
11194
11195         vm_map_unlock(src_map);
11196         tmp_entry = VM_MAP_ENTRY_NULL;
11197
11198         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11199                 vm_map_offset_t original_start, original_offset, original_end;
11200
11201                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11202
11203                 /* adjust alignment of first copy_entry's "vme_start" */
11204                 tmp_entry = vm_map_copy_first_entry(copy);
11205                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11206                         vm_map_offset_t adjustment;
11207
11208                         original_start = tmp_entry->vme_start;
11209                         original_offset = VME_OFFSET(tmp_entry);
11210
11211                         /* map-align the start of the first copy entry... */
11212                         adjustment = (tmp_entry->vme_start -
11213                                       vm_map_trunc_page(
11214                                               tmp_entry->vme_start,
11215                                               VM_MAP_PAGE_MASK(src_map)));
11216                         tmp_entry->vme_start -= adjustment;
11217                         VME_OFFSET_SET(tmp_entry,
11218                                        VME_OFFSET(tmp_entry) - adjustment);
11219                         copy_addr -= adjustment;
11220                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
11221                         /* ... adjust for mis-aligned start of copy range */
11222                         adjustment =
11223                                 (vm_map_trunc_page(copy->offset,
11224                                                    PAGE_MASK) -
11225                                  vm_map_trunc_page(copy->offset,
11226                                                    VM_MAP_PAGE_MASK(src_map)));
11227                         if (adjustment) {
11228                                 assert(page_aligned(adjustment));
11229                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11230                                 tmp_entry->vme_start += adjustment;
11231                                 VME_OFFSET_SET(tmp_entry,
11232                                                (VME_OFFSET(tmp_entry) +
11233                                                 adjustment));
11234                                 copy_addr += adjustment;
11235                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11236                         }
11237
11238                         /*
11239                          * Assert that the adjustments haven't exposed
11240                          * more than was originally copied...
11241                          */
11242                         assert(tmp_entry->vme_start >= original_start);
11243                         assert(VME_OFFSET(tmp_entry) >= original_offset);
11244                         /*
11245                          * ... and that it did not adjust outside of a
11246                          * a single 16K page.
11247                          */
11248                         assert(vm_map_trunc_page(tmp_entry->vme_start,
11249                                                  VM_MAP_PAGE_MASK(src_map)) ==
11250                                vm_map_trunc_page(original_start,
11251                                                  VM_MAP_PAGE_MASK(src_map)));
11252                 }
11253
11254                 /* adjust alignment of last copy_entry's "vme_end" */
11255                 tmp_entry = vm_map_copy_last_entry(copy);
11256                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11257                         vm_map_offset_t adjustment;
11258
11259                         original_end = tmp_entry->vme_end;
11260
11261                         /* map-align the end of the last copy entry... */
11262                         tmp_entry->vme_end =
11263                                 vm_map_round_page(tmp_entry->vme_end,
11264                                                   VM_MAP_PAGE_MASK(src_map));
11265                         /* ... adjust for mis-aligned end of copy range */
11266                         adjustment =
11267                                 (vm_map_round_page((copy->offset +
11268                                                     copy->size),
11269                                                    VM_MAP_PAGE_MASK(src_map)) -
11270                                  vm_map_round_page((copy->offset +
11271                                                     copy->size),
11272                                                    PAGE_MASK));
11273                         if (adjustment) {
11274                                 assert(page_aligned(adjustment));
11275                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11276                                 tmp_entry->vme_end -= adjustment;
11277                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11278                         }
11279
11280                         /*
11281                          * Assert that the adjustments haven't exposed
11282                          * more than was originally copied...
11283                          */
11284                         assert(tmp_entry->vme_end <= original_end);
11285                         /*
11286                          * ... and that it did not adjust outside of a
11287                          * a single 16K page.
11288                          */
11289                         assert(vm_map_round_page(tmp_entry->vme_end,
11290                                                  VM_MAP_PAGE_MASK(src_map)) ==
11291                                vm_map_round_page(original_end,
11292                                                  VM_MAP_PAGE_MASK(src_map)));
11293                 }
11294         }
11295
11296         /* Fix-up start and end points in copy.  This is necessary */
11297         /* when the various entries in the copy object were picked */
11298         /* up from different sub-maps */
11299
11300         tmp_entry = vm_map_copy_first_entry(copy);
11301         copy_size = 0; /* compute actual size */
11302         while (tmp_entry != vm_map_copy_to_entry(copy)) {
11303                 assert(VM_MAP_PAGE_ALIGNED(
11304                                copy_addr + (tmp_entry->vme_end -
11305                                             tmp_entry->vme_start),
11306                                VM_MAP_COPY_PAGE_MASK(copy)));
11307                 assert(VM_MAP_PAGE_ALIGNED(
11308                                copy_addr,
11309                                VM_MAP_COPY_PAGE_MASK(copy)));
11310
11311                 /*
11312                  * The copy_entries will be injected directly into the
11313                  * destination map and might not be "map aligned" there...
11314                  */
11315                 tmp_entry->map_aligned = FALSE;
11316
11317                 tmp_entry->vme_end = copy_addr +
11318                         (tmp_entry->vme_end - tmp_entry->vme_start);
11319                 tmp_entry->vme_start = copy_addr;
11320                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11321                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11322                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11323                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11324         }
11325
11326         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11327             copy_size < copy->size) {
11328                 /*
11329                  * The actual size of the VM map copy is smaller than what
11330                  * was requested by the caller.  This must be because some
11331                  * PAGE_SIZE-sized pages are missing at the end of the last
11332                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11333                  * The caller might not have been aware of those missing
11334                  * pages and might not want to be aware of it, which is
11335                  * fine as long as they don't try to access (and crash on)
11336                  * those missing pages.
11337                  * Let's adjust the size of the "copy", to avoid failing
11338                  * in vm_map_copyout() or vm_map_copy_overwrite().
11339                  */
11340                 assert(vm_map_round_page(copy_size,
11341                                          VM_MAP_PAGE_MASK(src_map)) ==
11342                        vm_map_round_page(copy->size,
11343                                          VM_MAP_PAGE_MASK(src_map)));
11344                 copy->size = copy_size;
11345         }
11346
11347         *copy_result = copy;
11348         return(KERN_SUCCESS);
11349
11350 #undef  RETURN
11351 }
11352
11353 kern_return_t
11354 vm_map_copy_extract(
11355         vm_map_t                src_map,
11356         vm_map_address_t        src_addr,
11357         vm_map_size_t           len,
11358         vm_map_copy_t           *copy_result,   /* OUT */
11359         vm_prot_t               *cur_prot,      /* OUT */
11360         vm_prot_t               *max_prot)
11361 {
11362         vm_map_offset_t src_start, src_end;
11363         vm_map_copy_t   copy;
11364         kern_return_t   kr;
11365
11366         /*
11367          *      Check for copies of zero bytes.
11368          */
11369
11370         if (len == 0) {
11371                 *copy_result = VM_MAP_COPY_NULL;
11372                 return(KERN_SUCCESS);
11373         }
11374
11375         /*
11376          *      Check that the end address doesn't overflow
11377          */
11378         src_end = src_addr + len;
11379         if (src_end < src_addr)
11380                 return KERN_INVALID_ADDRESS;
11381
11382         /*
11383          *      Compute (page aligned) start and end of region
11384          */
11385         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11386         src_end = vm_map_round_page(src_end, PAGE_MASK);
11387
11388         /*
11389          *      Allocate a header element for the list.
11390          *
11391          *      Use the start and end in the header to
11392          *      remember the endpoints prior to rounding.
11393          */
11394
11395         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
11396         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
11397         vm_map_copy_first_entry(copy) =
11398                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
11399         copy->type = VM_MAP_COPY_ENTRY_LIST;
11400         copy->cpy_hdr.nentries = 0;
11401         copy->cpy_hdr.entries_pageable = TRUE;
11402
11403         vm_map_store_init(&copy->cpy_hdr);
11404
11405         copy->offset = 0;
11406         copy->size = len;
11407
11408         kr = vm_map_remap_extract(src_map,
11409                                   src_addr,
11410                                   len,
11411                                   FALSE, /* copy */
11412                                   &copy->cpy_hdr,
11413                                   cur_prot,
11414                                   max_prot,
11415                                   VM_INHERIT_SHARE,
11416                                   TRUE, /* pageable */
11417                                   FALSE, /* same_map */
11418                                   VM_MAP_KERNEL_FLAGS_NONE);
11419         if (kr != KERN_SUCCESS) {
11420                 vm_map_copy_discard(copy);
11421                 return kr;
11422         }
11423
11424         *copy_result = copy;
11425         return KERN_SUCCESS;
11426 }
11427
11428 /*
11429  *      vm_map_copyin_object:
11430  *
11431  *      Create a copy object from an object.
11432  *      Our caller donates an object reference.
11433  */
11434
11435 kern_return_t
11436 vm_map_copyin_object(
11437         vm_object_t             object,
11438         vm_object_offset_t      offset, /* offset of region in object */
11439         vm_object_size_t        size,   /* size of region in object */
11440         vm_map_copy_t   *copy_result)   /* OUT */
11441 {
11442         vm_map_copy_t   copy;           /* Resulting copy */
11443
11444         /*
11445          *      We drop the object into a special copy object
11446          *      that contains the object directly.
11447          */
11448
11449         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
11450         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
11451         copy->type = VM_MAP_COPY_OBJECT;
11452         copy->cpy_object = object;
11453         copy->offset = offset;
11454         copy->size = size;
11455
11456         *copy_result = copy;
11457         return(KERN_SUCCESS);
11458 }
11459
11460 static void
11461 vm_map_fork_share(
11462         vm_map_t        old_map,
11463         vm_map_entry_t  old_entry,
11464         vm_map_t        new_map)
11465 {
11466         vm_object_t     object;
11467         vm_map_entry_t  new_entry;
11468
11469         /*
11470          *      New sharing code.  New map entry
11471          *      references original object.  Internal
11472          *      objects use asynchronous copy algorithm for
11473          *      future copies.  First make sure we have
11474          *      the right object.  If we need a shadow,
11475          *      or someone else already has one, then
11476          *      make a new shadow and share it.
11477          */
11478
11479         object = VME_OBJECT(old_entry);
11480         if (old_entry->is_sub_map) {
11481                 assert(old_entry->wired_count == 0);
11482 #ifndef NO_NESTED_PMAP
11483                 if(old_entry->use_pmap) {
11484                         kern_return_t   result;
11485
11486                         result = pmap_nest(new_map->pmap,
11487                                            (VME_SUBMAP(old_entry))->pmap,
11488                                            (addr64_t)old_entry->vme_start,
11489                                            (addr64_t)old_entry->vme_start,
11490                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
11491                         if(result)
11492                                 panic("vm_map_fork_share: pmap_nest failed!");
11493                 }
11494 #endif  /* NO_NESTED_PMAP */
11495         } else if (object == VM_OBJECT_NULL) {
11496                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
11497                                                             old_entry->vme_start));
11498                 VME_OFFSET_SET(old_entry, 0);
11499                 VME_OBJECT_SET(old_entry, object);
11500                 old_entry->use_pmap = TRUE;
11501 //              assert(!old_entry->needs_copy);
11502         } else if (object->copy_strategy !=
11503                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11504
11505                 /*
11506                  *      We are already using an asymmetric
11507                  *      copy, and therefore we already have
11508                  *      the right object.
11509                  */
11510
11511                 assert(! old_entry->needs_copy);
11512         }
11513         else if (old_entry->needs_copy ||       /* case 1 */
11514                  object->shadowed ||            /* case 2 */
11515                  (!object->true_share &&        /* case 3 */
11516                   !old_entry->is_shared &&
11517                   (object->vo_size >
11518                    (vm_map_size_t)(old_entry->vme_end -
11519                                    old_entry->vme_start)))) {
11520
11521                 /*
11522                  *      We need to create a shadow.
11523                  *      There are three cases here.
11524                  *      In the first case, we need to
11525                  *      complete a deferred symmetrical
11526                  *      copy that we participated in.
11527                  *      In the second and third cases,
11528                  *      we need to create the shadow so
11529                  *      that changes that we make to the
11530                  *      object do not interfere with
11531                  *      any symmetrical copies which
11532                  *      have occured (case 2) or which
11533                  *      might occur (case 3).
11534                  *
11535                  *      The first case is when we had
11536                  *      deferred shadow object creation
11537                  *      via the entry->needs_copy mechanism.
11538                  *      This mechanism only works when
11539                  *      only one entry points to the source
11540                  *      object, and we are about to create
11541                  *      a second entry pointing to the
11542                  *      same object. The problem is that
11543                  *      there is no way of mapping from
11544                  *      an object to the entries pointing
11545                  *      to it. (Deferred shadow creation
11546                  *      works with one entry because occurs
11547                  *      at fault time, and we walk from the
11548                  *      entry to the object when handling
11549                  *      the fault.)
11550                  *
11551                  *      The second case is when the object
11552                  *      to be shared has already been copied
11553                  *      with a symmetric copy, but we point
11554                  *      directly to the object without
11555                  *      needs_copy set in our entry. (This
11556                  *      can happen because different ranges
11557                  *      of an object can be pointed to by
11558                  *      different entries. In particular,
11559                  *      a single entry pointing to an object
11560                  *      can be split by a call to vm_inherit,
11561                  *      which, combined with task_create, can
11562                  *      result in the different entries
11563                  *      having different needs_copy values.)
11564                  *      The shadowed flag in the object allows
11565                  *      us to detect this case. The problem
11566                  *      with this case is that if this object
11567                  *      has or will have shadows, then we
11568                  *      must not perform an asymmetric copy
11569                  *      of this object, since such a copy
11570                  *      allows the object to be changed, which
11571                  *      will break the previous symmetrical
11572                  *      copies (which rely upon the object
11573                  *      not changing). In a sense, the shadowed
11574                  *      flag says "don't change this object".
11575                  *      We fix this by creating a shadow
11576                  *      object for this object, and sharing
11577                  *      that. This works because we are free
11578                  *      to change the shadow object (and thus
11579                  *      to use an asymmetric copy strategy);
11580                  *      this is also semantically correct,
11581                  *      since this object is temporary, and
11582                  *      therefore a copy of the object is
11583                  *      as good as the object itself. (This
11584                  *      is not true for permanent objects,
11585                  *      since the pager needs to see changes,
11586                  *      which won't happen if the changes
11587                  *      are made to a copy.)
11588                  *
11589                  *      The third case is when the object
11590                  *      to be shared has parts sticking
11591                  *      outside of the entry we're working
11592                  *      with, and thus may in the future
11593                  *      be subject to a symmetrical copy.
11594                  *      (This is a preemptive version of
11595                  *      case 2.)
11596                  */
11597                 VME_OBJECT_SHADOW(old_entry,
11598                                   (vm_map_size_t) (old_entry->vme_end -
11599                                                    old_entry->vme_start));
11600
11601                 /*
11602                  *      If we're making a shadow for other than
11603                  *      copy on write reasons, then we have
11604                  *      to remove write permission.
11605                  */
11606
11607                 if (!old_entry->needs_copy &&
11608                     (old_entry->protection & VM_PROT_WRITE)) {
11609                         vm_prot_t prot;
11610
11611                         assert(!pmap_has_prot_policy(old_entry->protection));
11612
11613                         prot = old_entry->protection & ~VM_PROT_WRITE;
11614
11615                         assert(!pmap_has_prot_policy(prot));
11616
11617                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
11618                                 prot |= VM_PROT_EXECUTE;
11619
11620
11621                         if (old_map->mapped_in_other_pmaps) {
11622                                 vm_object_pmap_protect(
11623                                         VME_OBJECT(old_entry),
11624                                         VME_OFFSET(old_entry),
11625                                         (old_entry->vme_end -
11626                                          old_entry->vme_start),
11627                                         PMAP_NULL,
11628                                         old_entry->vme_start,
11629                                         prot);
11630                         } else {
11631                                 pmap_protect(old_map->pmap,
11632                                              old_entry->vme_start,
11633                                              old_entry->vme_end,
11634                                              prot);
11635                         }
11636                 }
11637
11638                 old_entry->needs_copy = FALSE;
11639                 object = VME_OBJECT(old_entry);
11640         }
11641
11642
11643         /*
11644          *      If object was using a symmetric copy strategy,
11645          *      change its copy strategy to the default
11646          *      asymmetric copy strategy, which is copy_delay
11647          *      in the non-norma case and copy_call in the
11648          *      norma case. Bump the reference count for the
11649          *      new entry.
11650          */
11651
11652         if(old_entry->is_sub_map) {
11653                 vm_map_lock(VME_SUBMAP(old_entry));
11654                 vm_map_reference(VME_SUBMAP(old_entry));
11655                 vm_map_unlock(VME_SUBMAP(old_entry));
11656         } else {
11657                 vm_object_lock(object);
11658                 vm_object_reference_locked(object);
11659                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
11660                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
11661                 }
11662                 vm_object_unlock(object);
11663         }
11664
11665         /*
11666          *      Clone the entry, using object ref from above.
11667          *      Mark both entries as shared.
11668          */
11669
11670         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
11671                                                           * map or descendants */
11672         vm_map_entry_copy(new_entry, old_entry);
11673         old_entry->is_shared = TRUE;
11674         new_entry->is_shared = TRUE;
11675
11676         /*
11677          * We're dealing with a shared mapping, so the resulting mapping
11678          * should inherit some of the original mapping's accounting settings.
11679          * "iokit_acct" should have been cleared in vm_map_entry_copy().
11680          * "use_pmap" should stay the same as before (if it hasn't been reset
11681          * to TRUE when we cleared "iokit_acct").
11682          */
11683         assert(!new_entry->iokit_acct);
11684
11685         /*
11686          *      If old entry's inheritence is VM_INHERIT_NONE,
11687          *      the new entry is for corpse fork, remove the
11688          *      write permission from the new entry.
11689          */
11690         if (old_entry->inheritance == VM_INHERIT_NONE) {
11691
11692                 new_entry->protection &= ~VM_PROT_WRITE;
11693                 new_entry->max_protection &= ~VM_PROT_WRITE;
11694         }
11695
11696         /*
11697          *      Insert the entry into the new map -- we
11698          *      know we're inserting at the end of the new
11699          *      map.
11700          */
11701
11702         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
11703
11704         /*
11705          *      Update the physical map
11706          */
11707
11708         if (old_entry->is_sub_map) {
11709                 /* Bill Angell pmap support goes here */
11710         } else {
11711                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
11712                           old_entry->vme_end - old_entry->vme_start,
11713                           old_entry->vme_start);
11714         }
11715 }
11716
11717 static boolean_t
11718 vm_map_fork_copy(
11719         vm_map_t        old_map,
11720         vm_map_entry_t  *old_entry_p,
11721         vm_map_t        new_map,
11722         int             vm_map_copyin_flags)
11723 {
11724         vm_map_entry_t old_entry = *old_entry_p;
11725         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
11726         vm_map_offset_t start = old_entry->vme_start;
11727         vm_map_copy_t copy;
11728         vm_map_entry_t last = vm_map_last_entry(new_map);
11729
11730         vm_map_unlock(old_map);
11731         /*
11732          *      Use maxprot version of copyin because we
11733          *      care about whether this memory can ever
11734          *      be accessed, not just whether it's accessible
11735          *      right now.
11736          */
11737         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
11738         if (vm_map_copyin_internal(old_map, start, entry_size,
11739                                    vm_map_copyin_flags, &copy)
11740             != KERN_SUCCESS) {
11741                 /*
11742                  *      The map might have changed while it
11743                  *      was unlocked, check it again.  Skip
11744                  *      any blank space or permanently
11745                  *      unreadable region.
11746                  */
11747                 vm_map_lock(old_map);
11748                 if (!vm_map_lookup_entry(old_map, start, &last) ||
11749                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
11750                         last = last->vme_next;
11751                 }
11752                 *old_entry_p = last;
11753
11754                 /*
11755                  * XXX  For some error returns, want to
11756                  * XXX  skip to the next element.  Note
11757                  *      that INVALID_ADDRESS and
11758                  *      PROTECTION_FAILURE are handled above.
11759                  */
11760
11761                 return FALSE;
11762         }
11763
11764         /*
11765          *      Insert the copy into the new map
11766          */
11767
11768         vm_map_copy_insert(new_map, last, copy);
11769
11770         /*
11771          *      Pick up the traversal at the end of
11772          *      the copied region.
11773          */
11774
11775         vm_map_lock(old_map);
11776         start += entry_size;
11777         if (! vm_map_lookup_entry(old_map, start, &last)) {
11778                 last = last->vme_next;
11779         } else {
11780                 if (last->vme_start == start) {
11781                         /*
11782                          * No need to clip here and we don't
11783                          * want to cause any unnecessary
11784                          * unnesting...
11785                          */
11786                 } else {
11787                         vm_map_clip_start(old_map, last, start);
11788                 }
11789         }
11790         *old_entry_p = last;
11791
11792         return TRUE;
11793 }
11794
11795 /*
11796  *      vm_map_fork:
11797  *
11798  *      Create and return a new map based on the old
11799  *      map, according to the inheritance values on the
11800  *      regions in that map and the options.
11801  *
11802  *      The source map must not be locked.
11803  */
11804 vm_map_t
11805 vm_map_fork(
11806         ledger_t        ledger,
11807         vm_map_t        old_map,
11808         int             options)
11809 {
11810         pmap_t          new_pmap;
11811         vm_map_t        new_map;
11812         vm_map_entry_t  old_entry;
11813         vm_map_size_t   new_size = 0, entry_size;
11814         vm_map_entry_t  new_entry;
11815         boolean_t       src_needs_copy;
11816         boolean_t       new_entry_needs_copy;
11817         boolean_t       pmap_is64bit;
11818         int             vm_map_copyin_flags;
11819
11820         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
11821                         VM_MAP_FORK_PRESERVE_PURGEABLE)) {
11822                 /* unsupported option */
11823                 return VM_MAP_NULL;
11824         }
11825
11826         pmap_is64bit =
11827 #if defined(__i386__) || defined(__x86_64__)
11828                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
11829 #elif defined(__arm64__)
11830                                old_map->pmap->max == MACH_VM_MAX_ADDRESS;
11831 #elif defined(__arm__)
11832                                FALSE;
11833 #else
11834 #error Unknown architecture.
11835 #endif
11836
11837         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
11838
11839         vm_map_reference_swap(old_map);
11840         vm_map_lock(old_map);
11841
11842         new_map = vm_map_create(new_pmap,
11843                                 old_map->min_offset,
11844                                 old_map->max_offset,
11845                                 old_map->hdr.entries_pageable);
11846         vm_map_lock(new_map);
11847         vm_commit_pagezero_status(new_map);
11848         /* inherit the parent map's page size */
11849         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
11850         for (
11851                 old_entry = vm_map_first_entry(old_map);
11852                 old_entry != vm_map_to_entry(old_map);
11853                 ) {
11854
11855                 entry_size = old_entry->vme_end - old_entry->vme_start;
11856
11857                 switch (old_entry->inheritance) {
11858                 case VM_INHERIT_NONE:
11859                         /*
11860                          * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
11861                          * is not passed or it is backed by a device pager.
11862                          */
11863                         if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
11864                                 (!old_entry->is_sub_map &&
11865                                 VME_OBJECT(old_entry) != NULL &&
11866                                 VME_OBJECT(old_entry)->pager != NULL &&
11867                                 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
11868                                 break;
11869                         }
11870                         /* FALLTHROUGH */
11871
11872                 case VM_INHERIT_SHARE:
11873                         vm_map_fork_share(old_map, old_entry, new_map);
11874                         new_size += entry_size;
11875                         break;
11876
11877                 case VM_INHERIT_COPY:
11878
11879                         /*
11880                          *      Inline the copy_quickly case;
11881                          *      upon failure, fall back on call
11882                          *      to vm_map_fork_copy.
11883                          */
11884
11885                         if(old_entry->is_sub_map)
11886                                 break;
11887                         if ((old_entry->wired_count != 0) ||
11888                             ((VME_OBJECT(old_entry) != NULL) &&
11889                              (VME_OBJECT(old_entry)->true_share))) {
11890                                 goto slow_vm_map_fork_copy;
11891                         }
11892
11893                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
11894                         vm_map_entry_copy(new_entry, old_entry);
11895                         if (new_entry->is_sub_map) {
11896                                 /* clear address space specifics */
11897                                 new_entry->use_pmap = FALSE;
11898                         } else {
11899                                 /*
11900                                  * We're dealing with a copy-on-write operation,
11901                                  * so the resulting mapping should not inherit
11902                                  * the original mapping's accounting settings.
11903                                  * "iokit_acct" should have been cleared in
11904                                  * vm_map_entry_copy().
11905                                  * "use_pmap" should be reset to its default
11906                                  * (TRUE) so that the new mapping gets
11907                                  * accounted for in the task's memory footprint.
11908                                  */
11909                                 assert(!new_entry->iokit_acct);
11910                                 new_entry->use_pmap = TRUE;
11911                         }
11912
11913                         if (! vm_object_copy_quickly(
11914                                     &VME_OBJECT(new_entry),
11915                                     VME_OFFSET(old_entry),
11916                                     (old_entry->vme_end -
11917                                      old_entry->vme_start),
11918                                     &src_needs_copy,
11919                                     &new_entry_needs_copy)) {
11920                                 vm_map_entry_dispose(new_map, new_entry);
11921                                 goto slow_vm_map_fork_copy;
11922                         }
11923
11924                         /*
11925                          *      Handle copy-on-write obligations
11926                          */
11927
11928                         if (src_needs_copy && !old_entry->needs_copy) {
11929                                 vm_prot_t prot;
11930
11931                                 assert(!pmap_has_prot_policy(old_entry->protection));
11932
11933                                 prot = old_entry->protection & ~VM_PROT_WRITE;
11934
11935                                 if (override_nx(old_map, VME_ALIAS(old_entry))
11936                                     && prot)
11937                                         prot |= VM_PROT_EXECUTE;
11938
11939                                 assert(!pmap_has_prot_policy(prot));
11940
11941                                 vm_object_pmap_protect(
11942                                         VME_OBJECT(old_entry),
11943                                         VME_OFFSET(old_entry),
11944                                         (old_entry->vme_end -
11945                                          old_entry->vme_start),
11946                                         ((old_entry->is_shared
11947                                           || old_map->mapped_in_other_pmaps)
11948                                          ? PMAP_NULL :
11949                                          old_map->pmap),
11950                                         old_entry->vme_start,
11951                                         prot);
11952
11953                                 assert(old_entry->wired_count == 0);
11954                                 old_entry->needs_copy = TRUE;
11955                         }
11956                         new_entry->needs_copy = new_entry_needs_copy;
11957
11958                         /*
11959                          *      Insert the entry at the end
11960                          *      of the map.
11961                          */
11962
11963                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
11964                                           new_entry);
11965                         new_size += entry_size;
11966                         break;
11967
11968                 slow_vm_map_fork_copy:
11969                         vm_map_copyin_flags = 0;
11970                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
11971                                 vm_map_copyin_flags |=
11972                                         VM_MAP_COPYIN_PRESERVE_PURGEABLE;
11973                         }
11974                         if (vm_map_fork_copy(old_map,
11975                                              &old_entry,
11976                                              new_map,
11977                                              vm_map_copyin_flags)) {
11978                                 new_size += entry_size;
11979                         }
11980                         continue;
11981                 }
11982                 old_entry = old_entry->vme_next;
11983         }
11984
11985 #if defined(__arm64__)
11986         pmap_insert_sharedpage(new_map->pmap);
11987 #endif
11988
11989         new_map->size = new_size;
11990         vm_map_unlock(new_map);
11991         vm_map_unlock(old_map);
11992         vm_map_deallocate(old_map);
11993
11994         return(new_map);
11995 }
11996
11997 /*
11998  * vm_map_exec:
11999  *
12000  *      Setup the "new_map" with the proper execution environment according
12001  *      to the type of executable (platform, 64bit, chroot environment).
12002  *      Map the comm page and shared region, etc...
12003  */
12004 kern_return_t
12005 vm_map_exec(
12006         vm_map_t        new_map,
12007         task_t          task,
12008         boolean_t       is64bit,
12009         void            *fsroot,
12010         cpu_type_t      cpu)
12011 {
12012         SHARED_REGION_TRACE_DEBUG(
12013                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
12014                  (void *)VM_KERNEL_ADDRPERM(current_task()),
12015                  (void *)VM_KERNEL_ADDRPERM(new_map),
12016                  (void *)VM_KERNEL_ADDRPERM(task),
12017                  (void *)VM_KERNEL_ADDRPERM(fsroot),
12018                  cpu));
12019         (void) vm_commpage_enter(new_map, task, is64bit);
12020         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
12021         SHARED_REGION_TRACE_DEBUG(
12022                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
12023                  (void *)VM_KERNEL_ADDRPERM(current_task()),
12024                  (void *)VM_KERNEL_ADDRPERM(new_map),
12025                  (void *)VM_KERNEL_ADDRPERM(task),
12026                  (void *)VM_KERNEL_ADDRPERM(fsroot),
12027                  cpu));
12028         return KERN_SUCCESS;
12029 }
12030
12031 /*
12032  *      vm_map_lookup_locked:
12033  *
12034  *      Finds the VM object, offset, and
12035  *      protection for a given virtual address in the
12036  *      specified map, assuming a page fault of the
12037  *      type specified.
12038  *
12039  *      Returns the (object, offset, protection) for
12040  *      this address, whether it is wired down, and whether
12041  *      this map has the only reference to the data in question.
12042  *      In order to later verify this lookup, a "version"
12043  *      is returned.
12044  *
12045  *      The map MUST be locked by the caller and WILL be
12046  *      locked on exit.  In order to guarantee the
12047  *      existence of the returned object, it is returned
12048  *      locked.
12049  *
12050  *      If a lookup is requested with "write protection"
12051  *      specified, the map may be changed to perform virtual
12052  *      copying operations, although the data referenced will
12053  *      remain the same.
12054  */
12055 kern_return_t
12056 vm_map_lookup_locked(
12057         vm_map_t                *var_map,       /* IN/OUT */
12058         vm_map_offset_t         vaddr,
12059         vm_prot_t               fault_type,
12060         int                     object_lock_type,
12061         vm_map_version_t        *out_version,   /* OUT */
12062         vm_object_t             *object,        /* OUT */
12063         vm_object_offset_t      *offset,        /* OUT */
12064         vm_prot_t               *out_prot,      /* OUT */
12065         boolean_t               *wired,         /* OUT */
12066         vm_object_fault_info_t  fault_info,     /* OUT */
12067         vm_map_t                *real_map)
12068 {
12069         vm_map_entry_t                  entry;
12070         vm_map_t                        map = *var_map;
12071         vm_map_t                        old_map = *var_map;
12072         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
12073         vm_map_offset_t                 cow_parent_vaddr = 0;
12074         vm_map_offset_t                 old_start = 0;
12075         vm_map_offset_t                 old_end = 0;
12076         vm_prot_t                       prot;
12077         boolean_t                       mask_protections;
12078         boolean_t                       force_copy;
12079         vm_prot_t                       original_fault_type;
12080
12081         /*
12082          * VM_PROT_MASK means that the caller wants us to use "fault_type"
12083          * as a mask against the mapping's actual protections, not as an
12084          * absolute value.
12085          */
12086         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12087         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12088         fault_type &= VM_PROT_ALL;
12089         original_fault_type = fault_type;
12090
12091         *real_map = map;
12092
12093 RetryLookup:
12094         fault_type = original_fault_type;
12095
12096         /*
12097          *      If the map has an interesting hint, try it before calling
12098          *      full blown lookup routine.
12099          */
12100         entry = map->hint;
12101
12102         if ((entry == vm_map_to_entry(map)) ||
12103             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12104                 vm_map_entry_t  tmp_entry;
12105
12106                 /*
12107                  *      Entry was either not a valid hint, or the vaddr
12108                  *      was not contained in the entry, so do a full lookup.
12109                  */
12110                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12111                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
12112                                 vm_map_unlock(cow_sub_map_parent);
12113                         if((*real_map != map)
12114                            && (*real_map != cow_sub_map_parent))
12115                                 vm_map_unlock(*real_map);
12116                         return KERN_INVALID_ADDRESS;
12117                 }
12118
12119                 entry = tmp_entry;
12120         }
12121         if(map == old_map) {
12122                 old_start = entry->vme_start;
12123                 old_end = entry->vme_end;
12124         }
12125
12126         /*
12127          *      Handle submaps.  Drop lock on upper map, submap is
12128          *      returned locked.
12129          */
12130
12131 submap_recurse:
12132         if (entry->is_sub_map) {
12133                 vm_map_offset_t         local_vaddr;
12134                 vm_map_offset_t         end_delta;
12135                 vm_map_offset_t         start_delta;
12136                 vm_map_entry_t          submap_entry;
12137                 vm_prot_t               subentry_protection;
12138                 vm_prot_t               subentry_max_protection;
12139                 boolean_t               mapped_needs_copy=FALSE;
12140
12141                 local_vaddr = vaddr;
12142
12143                 if ((entry->use_pmap &&
12144                      ! ((fault_type & VM_PROT_WRITE) ||
12145                         force_copy))) {
12146                         /* if real_map equals map we unlock below */
12147                         if ((*real_map != map) &&
12148                             (*real_map != cow_sub_map_parent))
12149                                 vm_map_unlock(*real_map);
12150                         *real_map = VME_SUBMAP(entry);
12151                 }
12152
12153                 if(entry->needs_copy &&
12154                    ((fault_type & VM_PROT_WRITE) ||
12155                     force_copy)) {
12156                         if (!mapped_needs_copy) {
12157                                 if (vm_map_lock_read_to_write(map)) {
12158                                         vm_map_lock_read(map);
12159                                         *real_map = map;
12160                                         goto RetryLookup;
12161                                 }
12162                                 vm_map_lock_read(VME_SUBMAP(entry));
12163                                 *var_map = VME_SUBMAP(entry);
12164                                 cow_sub_map_parent = map;
12165                                 /* reset base to map before cow object */
12166                                 /* this is the map which will accept   */
12167                                 /* the new cow object */
12168                                 old_start = entry->vme_start;
12169                                 old_end = entry->vme_end;
12170                                 cow_parent_vaddr = vaddr;
12171                                 mapped_needs_copy = TRUE;
12172                         } else {
12173                                 vm_map_lock_read(VME_SUBMAP(entry));
12174                                 *var_map = VME_SUBMAP(entry);
12175                                 if((cow_sub_map_parent != map) &&
12176                                    (*real_map != map))
12177                                         vm_map_unlock(map);
12178                         }
12179                 } else {
12180                         vm_map_lock_read(VME_SUBMAP(entry));
12181                         *var_map = VME_SUBMAP(entry);
12182                         /* leave map locked if it is a target */
12183                         /* cow sub_map above otherwise, just  */
12184                         /* follow the maps down to the object */
12185                         /* here we unlock knowing we are not  */
12186                         /* revisiting the map.  */
12187                         if((*real_map != map) && (map != cow_sub_map_parent))
12188                                 vm_map_unlock_read(map);
12189                 }
12190
12191                 map = *var_map;
12192
12193                 /* calculate the offset in the submap for vaddr */
12194                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12195
12196         RetrySubMap:
12197                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12198                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
12199                                 vm_map_unlock(cow_sub_map_parent);
12200                         }
12201                         if((*real_map != map)
12202                            && (*real_map != cow_sub_map_parent)) {
12203                                 vm_map_unlock(*real_map);
12204                         }
12205                         *real_map = map;
12206                         return KERN_INVALID_ADDRESS;
12207                 }
12208
12209                 /* find the attenuated shadow of the underlying object */
12210                 /* on our target map */
12211
12212                 /* in english the submap object may extend beyond the     */
12213                 /* region mapped by the entry or, may only fill a portion */
12214                 /* of it.  For our purposes, we only care if the object   */
12215                 /* doesn't fill.  In this case the area which will        */
12216                 /* ultimately be clipped in the top map will only need    */
12217                 /* to be as big as the portion of the underlying entry    */
12218                 /* which is mapped */
12219                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12220                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
12221
12222                 end_delta =
12223                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12224                         submap_entry->vme_end ?
12225                         0 : (VME_OFFSET(entry) +
12226                              (old_end - old_start))
12227                         - submap_entry->vme_end;
12228
12229                 old_start += start_delta;
12230                 old_end -= end_delta;
12231
12232                 if(submap_entry->is_sub_map) {
12233                         entry = submap_entry;
12234                         vaddr = local_vaddr;
12235                         goto submap_recurse;
12236                 }
12237
12238                 if (((fault_type & VM_PROT_WRITE) ||
12239                      force_copy)
12240                     && cow_sub_map_parent) {
12241
12242                         vm_object_t     sub_object, copy_object;
12243                         vm_object_offset_t copy_offset;
12244                         vm_map_offset_t local_start;
12245                         vm_map_offset_t local_end;
12246                         boolean_t               copied_slowly = FALSE;
12247
12248                         if (vm_map_lock_read_to_write(map)) {
12249                                 vm_map_lock_read(map);
12250                                 old_start -= start_delta;
12251                                 old_end += end_delta;
12252                                 goto RetrySubMap;
12253                         }
12254
12255
12256                         sub_object = VME_OBJECT(submap_entry);
12257                         if (sub_object == VM_OBJECT_NULL) {
12258                                 sub_object =
12259                                         vm_object_allocate(
12260                                                 (vm_map_size_t)
12261                                                 (submap_entry->vme_end -
12262                                                  submap_entry->vme_start));
12263                                 VME_OBJECT_SET(submap_entry, sub_object);
12264                                 VME_OFFSET_SET(submap_entry, 0);
12265                                 assert(!submap_entry->is_sub_map);
12266                                 assert(submap_entry->use_pmap);
12267                         }
12268                         local_start =  local_vaddr -
12269                                 (cow_parent_vaddr - old_start);
12270                         local_end = local_vaddr +
12271                                 (old_end - cow_parent_vaddr);
12272                         vm_map_clip_start(map, submap_entry, local_start);
12273                         vm_map_clip_end(map, submap_entry, local_end);
12274                         if (submap_entry->is_sub_map) {
12275                                 /* unnesting was done when clipping */
12276                                 assert(!submap_entry->use_pmap);
12277                         }
12278
12279                         /* This is the COW case, lets connect */
12280                         /* an entry in our space to the underlying */
12281                         /* object in the submap, bypassing the  */
12282                         /* submap. */
12283
12284
12285                         if(submap_entry->wired_count != 0 ||
12286                            (sub_object->copy_strategy ==
12287                             MEMORY_OBJECT_COPY_NONE)) {
12288                                 vm_object_lock(sub_object);
12289                                 vm_object_copy_slowly(sub_object,
12290                                                       VME_OFFSET(submap_entry),
12291                                                       (submap_entry->vme_end -
12292                                                        submap_entry->vme_start),
12293                                                       FALSE,
12294                                                       &copy_object);
12295                                 copied_slowly = TRUE;
12296                         } else {
12297
12298                                 /* set up shadow object */
12299                                 copy_object = sub_object;
12300                                 vm_object_lock(sub_object);
12301                                 vm_object_reference_locked(sub_object);
12302                                 sub_object->shadowed = TRUE;
12303                                 vm_object_unlock(sub_object);
12304
12305                                 assert(submap_entry->wired_count == 0);
12306                                 submap_entry->needs_copy = TRUE;
12307
12308                                 prot = submap_entry->protection;
12309                                 assert(!pmap_has_prot_policy(prot));
12310                                 prot = prot & ~VM_PROT_WRITE;
12311                                 assert(!pmap_has_prot_policy(prot));
12312
12313                                 if (override_nx(old_map,
12314                                                 VME_ALIAS(submap_entry))
12315                                     && prot)
12316                                         prot |= VM_PROT_EXECUTE;
12317
12318                                 vm_object_pmap_protect(
12319                                         sub_object,
12320                                         VME_OFFSET(submap_entry),
12321                                         submap_entry->vme_end -
12322                                         submap_entry->vme_start,
12323                                         (submap_entry->is_shared
12324                                          || map->mapped_in_other_pmaps) ?
12325                                         PMAP_NULL : map->pmap,
12326                                         submap_entry->vme_start,
12327                                         prot);
12328                         }
12329
12330                         /*
12331                          * Adjust the fault offset to the submap entry.
12332                          */
12333                         copy_offset = (local_vaddr -
12334                                        submap_entry->vme_start +
12335                                        VME_OFFSET(submap_entry));
12336
12337                         /* This works diffently than the   */
12338                         /* normal submap case. We go back  */
12339                         /* to the parent of the cow map and*/
12340                         /* clip out the target portion of  */
12341                         /* the sub_map, substituting the   */
12342                         /* new copy object,                */
12343
12344                         subentry_protection = submap_entry->protection;
12345                         subentry_max_protection = submap_entry->max_protection;
12346                         vm_map_unlock(map);
12347                         submap_entry = NULL; /* not valid after map unlock */
12348
12349                         local_start = old_start;
12350                         local_end = old_end;
12351                         map = cow_sub_map_parent;
12352                         *var_map = cow_sub_map_parent;
12353                         vaddr = cow_parent_vaddr;
12354                         cow_sub_map_parent = NULL;
12355
12356                         if(!vm_map_lookup_entry(map,
12357                                                 vaddr, &entry)) {
12358                                 vm_object_deallocate(
12359                                         copy_object);
12360                                 vm_map_lock_write_to_read(map);
12361                                 return KERN_INVALID_ADDRESS;
12362                         }
12363
12364                         /* clip out the portion of space */
12365                         /* mapped by the sub map which   */
12366                         /* corresponds to the underlying */
12367                         /* object */
12368
12369                         /*
12370                          * Clip (and unnest) the smallest nested chunk
12371                          * possible around the faulting address...
12372                          */
12373                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
12374                         local_end = local_start + pmap_nesting_size_min;
12375                         /*
12376                          * ... but don't go beyond the "old_start" to "old_end"
12377                          * range, to avoid spanning over another VM region
12378                          * with a possibly different VM object and/or offset.
12379                          */
12380                         if (local_start < old_start) {
12381                                 local_start = old_start;
12382                         }
12383                         if (local_end > old_end) {
12384                                 local_end = old_end;
12385                         }
12386                         /*
12387                          * Adjust copy_offset to the start of the range.
12388                          */
12389                         copy_offset -= (vaddr - local_start);
12390
12391                         vm_map_clip_start(map, entry, local_start);
12392                         vm_map_clip_end(map, entry, local_end);
12393                         if (entry->is_sub_map) {
12394                                 /* unnesting was done when clipping */
12395                                 assert(!entry->use_pmap);
12396                         }
12397
12398                         /* substitute copy object for */
12399                         /* shared map entry           */
12400                         vm_map_deallocate(VME_SUBMAP(entry));
12401                         assert(!entry->iokit_acct);
12402                         entry->is_sub_map = FALSE;
12403                         entry->use_pmap = TRUE;
12404                         VME_OBJECT_SET(entry, copy_object);
12405
12406                         /* propagate the submap entry's protections */
12407                         entry->protection |= subentry_protection;
12408                         entry->max_protection |= subentry_max_protection;
12409
12410 #if CONFIG_EMBEDDED
12411                         if (entry->protection & VM_PROT_WRITE) {
12412                                 if ((entry->protection & VM_PROT_EXECUTE) && !(entry->used_for_jit)) {
12413                                         printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
12414                                         entry->protection &= ~VM_PROT_EXECUTE;
12415                                 }
12416                         }
12417 #endif
12418
12419                         if(copied_slowly) {
12420                                 VME_OFFSET_SET(entry, local_start - old_start);
12421                                 entry->needs_copy = FALSE;
12422                                 entry->is_shared = FALSE;
12423                         } else {
12424                                 VME_OFFSET_SET(entry, copy_offset);
12425                                 assert(entry->wired_count == 0);
12426                                 entry->needs_copy = TRUE;
12427                                 if(entry->inheritance == VM_INHERIT_SHARE)
12428                                         entry->inheritance = VM_INHERIT_COPY;
12429                                 if (map != old_map)
12430                                         entry->is_shared = TRUE;
12431                         }
12432                         if(entry->inheritance == VM_INHERIT_SHARE)
12433                                 entry->inheritance = VM_INHERIT_COPY;
12434
12435                         vm_map_lock_write_to_read(map);
12436                 } else {
12437                         if((cow_sub_map_parent)
12438                            && (cow_sub_map_parent != *real_map)
12439                            && (cow_sub_map_parent != map)) {
12440                                 vm_map_unlock(cow_sub_map_parent);
12441                         }
12442                         entry = submap_entry;
12443                         vaddr = local_vaddr;
12444                 }
12445         }
12446
12447         /*
12448          *      Check whether this task is allowed to have
12449          *      this page.
12450          */
12451
12452         prot = entry->protection;
12453
12454         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
12455                 /*
12456                  * HACK -- if not a stack, then allow execution
12457                  */
12458                 prot |= VM_PROT_EXECUTE;
12459         }
12460
12461         if (mask_protections) {
12462                 fault_type &= prot;
12463                 if (fault_type == VM_PROT_NONE) {
12464                         goto protection_failure;
12465                 }
12466         }
12467         if (((fault_type & prot) != fault_type)
12468 #if __arm64__
12469             /* prefetch abort in execute-only page */
12470             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
12471 #endif
12472             ) {
12473         protection_failure:
12474                 if (*real_map != map) {
12475                         vm_map_unlock(*real_map);
12476                 }
12477                 *real_map = map;
12478
12479                 if ((fault_type & VM_PROT_EXECUTE) && prot)
12480                         log_stack_execution_failure((addr64_t)vaddr, prot);
12481
12482                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
12483                 return KERN_PROTECTION_FAILURE;
12484         }
12485
12486         /*
12487          *      If this page is not pageable, we have to get
12488          *      it for all possible accesses.
12489          */
12490
12491         *wired = (entry->wired_count != 0);
12492         if (*wired)
12493                 fault_type = prot;
12494
12495         /*
12496          *      If the entry was copy-on-write, we either ...
12497          */
12498
12499         if (entry->needs_copy) {
12500                 /*
12501                  *      If we want to write the page, we may as well
12502                  *      handle that now since we've got the map locked.
12503                  *
12504                  *      If we don't need to write the page, we just
12505                  *      demote the permissions allowed.
12506                  */
12507
12508                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
12509                         /*
12510                          *      Make a new object, and place it in the
12511                          *      object chain.  Note that no new references
12512                          *      have appeared -- one just moved from the
12513                          *      map to the new object.
12514                          */
12515
12516                         if (vm_map_lock_read_to_write(map)) {
12517                                 vm_map_lock_read(map);
12518                                 goto RetryLookup;
12519                         }
12520
12521                         if (VME_OBJECT(entry)->shadowed == FALSE) {
12522                                 vm_object_lock(VME_OBJECT(entry));
12523                                 VME_OBJECT(entry)->shadowed = TRUE;
12524                                 vm_object_unlock(VME_OBJECT(entry));
12525                         }
12526                         VME_OBJECT_SHADOW(entry,
12527                                           (vm_map_size_t) (entry->vme_end -
12528                                                            entry->vme_start));
12529                         entry->needs_copy = FALSE;
12530
12531                         vm_map_lock_write_to_read(map);
12532                 }
12533                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
12534                         /*
12535                          *      We're attempting to read a copy-on-write
12536                          *      page -- don't allow writes.
12537                          */
12538
12539                         prot &= (~VM_PROT_WRITE);
12540                 }
12541         }
12542
12543         /*
12544          *      Create an object if necessary.
12545          */
12546         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
12547
12548                 if (vm_map_lock_read_to_write(map)) {
12549                         vm_map_lock_read(map);
12550                         goto RetryLookup;
12551                 }
12552
12553                 VME_OBJECT_SET(entry,
12554                                vm_object_allocate(
12555                                        (vm_map_size_t)(entry->vme_end -
12556                                                        entry->vme_start)));
12557                 VME_OFFSET_SET(entry, 0);
12558                 assert(entry->use_pmap);
12559                 vm_map_lock_write_to_read(map);
12560         }
12561
12562         /*
12563          *      Return the object/offset from this entry.  If the entry
12564          *      was copy-on-write or empty, it has been fixed up.  Also
12565          *      return the protection.
12566          */
12567
12568         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
12569         *object = VME_OBJECT(entry);
12570         *out_prot = prot;
12571
12572         if (fault_info) {
12573                 fault_info->interruptible = THREAD_UNINT; /* for now... */
12574                 /* ... the caller will change "interruptible" if needed */
12575                 fault_info->cluster_size = 0;
12576                 fault_info->user_tag = VME_ALIAS(entry);
12577                 fault_info->pmap_options = 0;
12578                 if (entry->iokit_acct ||
12579                     (!entry->is_sub_map && !entry->use_pmap)) {
12580                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12581                 }
12582                 fault_info->behavior = entry->behavior;
12583                 fault_info->lo_offset = VME_OFFSET(entry);
12584                 fault_info->hi_offset =
12585                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
12586                 fault_info->no_cache  = entry->no_cache;
12587                 fault_info->stealth = FALSE;
12588                 fault_info->io_sync = FALSE;
12589                 if (entry->used_for_jit ||
12590                     entry->vme_resilient_codesign) {
12591                         fault_info->cs_bypass = TRUE;
12592                 } else {
12593                         fault_info->cs_bypass = FALSE;
12594                 }
12595                 fault_info->mark_zf_absent = FALSE;
12596                 fault_info->batch_pmap_op = FALSE;
12597         }
12598
12599         /*
12600          *      Lock the object to prevent it from disappearing
12601          */
12602         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
12603                 vm_object_lock(*object);
12604         else
12605                 vm_object_lock_shared(*object);
12606
12607         /*
12608          *      Save the version number
12609          */
12610
12611         out_version->main_timestamp = map->timestamp;
12612
12613         return KERN_SUCCESS;
12614 }
12615
12616
12617 /*
12618  *      vm_map_verify:
12619  *
12620  *      Verifies that the map in question has not changed
12621  *      since the given version. The map has to be locked
12622  *      ("shared" mode is fine) before calling this function
12623  *      and it will be returned locked too.
12624  */
12625 boolean_t
12626 vm_map_verify(
12627         vm_map_t                map,
12628         vm_map_version_t        *version)       /* REF */
12629 {
12630         boolean_t       result;
12631
12632         vm_map_lock_assert_held(map);
12633         result = (map->timestamp == version->main_timestamp);
12634
12635         return(result);
12636 }
12637
12638 /*
12639  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
12640  *      Goes away after regular vm_region_recurse function migrates to
12641  *      64 bits
12642  *      vm_region_recurse: A form of vm_region which follows the
12643  *      submaps in a target map
12644  *
12645  */
12646
12647 kern_return_t
12648 vm_map_region_recurse_64(
12649         vm_map_t                 map,
12650         vm_map_offset_t *address,               /* IN/OUT */
12651         vm_map_size_t           *size,                  /* OUT */
12652         natural_t               *nesting_depth, /* IN/OUT */
12653         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
12654         mach_msg_type_number_t  *count) /* IN/OUT */
12655 {
12656         mach_msg_type_number_t  original_count;
12657         vm_region_extended_info_data_t  extended;
12658         vm_map_entry_t                  tmp_entry;
12659         vm_map_offset_t                 user_address;
12660         unsigned int                    user_max_depth;
12661
12662         /*
12663          * "curr_entry" is the VM map entry preceding or including the
12664          * address we're looking for.
12665          * "curr_map" is the map or sub-map containing "curr_entry".
12666          * "curr_address" is the equivalent of the top map's "user_address"
12667          * in the current map.
12668          * "curr_offset" is the cumulated offset of "curr_map" in the
12669          * target task's address space.
12670          * "curr_depth" is the depth of "curr_map" in the chain of
12671          * sub-maps.
12672          *
12673          * "curr_max_below" and "curr_max_above" limit the range (around
12674          * "curr_address") we should take into account in the current (sub)map.
12675          * They limit the range to what's visible through the map entries
12676          * we've traversed from the top map to the current map.
12677
12678          */
12679         vm_map_entry_t                  curr_entry;
12680         vm_map_address_t                curr_address;
12681         vm_map_offset_t                 curr_offset;
12682         vm_map_t                        curr_map;
12683         unsigned int                    curr_depth;
12684         vm_map_offset_t                 curr_max_below, curr_max_above;
12685         vm_map_offset_t                 curr_skip;
12686
12687         /*
12688          * "next_" is the same as "curr_" but for the VM region immediately
12689          * after the address we're looking for.  We need to keep track of this
12690          * too because we want to return info about that region if the
12691          * address we're looking for is not mapped.
12692          */
12693         vm_map_entry_t                  next_entry;
12694         vm_map_offset_t                 next_offset;
12695         vm_map_offset_t                 next_address;
12696         vm_map_t                        next_map;
12697         unsigned int                    next_depth;
12698         vm_map_offset_t                 next_max_below, next_max_above;
12699         vm_map_offset_t                 next_skip;
12700
12701         boolean_t                       look_for_pages;
12702         vm_region_submap_short_info_64_t short_info;
12703         boolean_t                       do_region_footprint;
12704
12705         if (map == VM_MAP_NULL) {
12706                 /* no address space to work on */
12707                 return KERN_INVALID_ARGUMENT;
12708         }
12709
12710
12711         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
12712                 /*
12713                  * "info" structure is not big enough and
12714                  * would overflow
12715                  */
12716                 return KERN_INVALID_ARGUMENT;
12717         }
12718
12719         do_region_footprint = task_self_region_footprint();
12720         original_count = *count;
12721
12722         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
12723                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
12724                 look_for_pages = FALSE;
12725                 short_info = (vm_region_submap_short_info_64_t) submap_info;
12726                 submap_info = NULL;
12727         } else {
12728                 look_for_pages = TRUE;
12729                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
12730                 short_info = NULL;
12731
12732                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
12733                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
12734                 }
12735         }
12736
12737         user_address = *address;
12738         user_max_depth = *nesting_depth;
12739
12740         if (not_in_kdp) {
12741                 vm_map_lock_read(map);
12742         }
12743
12744 recurse_again:
12745         curr_entry = NULL;
12746         curr_map = map;
12747         curr_address = user_address;
12748         curr_offset = 0;
12749         curr_skip = 0;
12750         curr_depth = 0;
12751         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
12752         curr_max_below = curr_address;
12753
12754         next_entry = NULL;
12755         next_map = NULL;
12756         next_address = 0;
12757         next_offset = 0;
12758         next_skip = 0;
12759         next_depth = 0;
12760         next_max_above = (vm_map_offset_t) -1;
12761         next_max_below = (vm_map_offset_t) -1;
12762
12763         for (;;) {
12764                 if (vm_map_lookup_entry(curr_map,
12765                                         curr_address,
12766                                         &tmp_entry)) {
12767                         /* tmp_entry contains the address we're looking for */
12768                         curr_entry = tmp_entry;
12769                 } else {
12770                         vm_map_offset_t skip;
12771                         /*
12772                          * The address is not mapped.  "tmp_entry" is the
12773                          * map entry preceding the address.  We want the next
12774                          * one, if it exists.
12775                          */
12776                         curr_entry = tmp_entry->vme_next;
12777
12778                         if (curr_entry == vm_map_to_entry(curr_map) ||
12779                             (curr_entry->vme_start >=
12780                              curr_address + curr_max_above)) {
12781                                 /* no next entry at this level: stop looking */
12782                                 if (not_in_kdp) {
12783                                         vm_map_unlock_read(curr_map);
12784                                 }
12785                                 curr_entry = NULL;
12786                                 curr_map = NULL;
12787                                 curr_skip = 0;
12788                                 curr_offset = 0;
12789                                 curr_depth = 0;
12790                                 curr_max_above = 0;
12791                                 curr_max_below = 0;
12792                                 break;
12793                         }
12794
12795                         /* adjust current address and offset */
12796                         skip = curr_entry->vme_start - curr_address;
12797                         curr_address = curr_entry->vme_start;
12798                         curr_skip += skip;
12799                         curr_offset += skip;
12800                         curr_max_above -= skip;
12801                         curr_max_below = 0;
12802                 }
12803
12804                 /*
12805                  * Is the next entry at this level closer to the address (or
12806                  * deeper in the submap chain) than the one we had
12807                  * so far ?
12808                  */
12809                 tmp_entry = curr_entry->vme_next;
12810                 if (tmp_entry == vm_map_to_entry(curr_map)) {
12811                         /* no next entry at this level */
12812                 } else if (tmp_entry->vme_start >=
12813                            curr_address + curr_max_above) {
12814                         /*
12815                          * tmp_entry is beyond the scope of what we mapped of
12816                          * this submap in the upper level: ignore it.
12817                          */
12818                 } else if ((next_entry == NULL) ||
12819                            (tmp_entry->vme_start + curr_offset <=
12820                             next_entry->vme_start + next_offset)) {
12821                         /*
12822                          * We didn't have a "next_entry" or this one is
12823                          * closer to the address we're looking for:
12824                          * use this "tmp_entry" as the new "next_entry".
12825                          */
12826                         if (next_entry != NULL) {
12827                                 /* unlock the last "next_map" */
12828                                 if (next_map != curr_map && not_in_kdp) {
12829                                         vm_map_unlock_read(next_map);
12830                                 }
12831                         }
12832                         next_entry = tmp_entry;
12833                         next_map = curr_map;
12834                         next_depth = curr_depth;
12835                         next_address = next_entry->vme_start;
12836                         next_skip = curr_skip;
12837                         next_skip += (next_address - curr_address);
12838                         next_offset = curr_offset;
12839                         next_offset += (next_address - curr_address);
12840                         next_max_above = MIN(next_max_above, curr_max_above);
12841                         next_max_above = MIN(next_max_above,
12842                                              next_entry->vme_end - next_address);
12843                         next_max_below = MIN(next_max_below, curr_max_below);
12844                         next_max_below = MIN(next_max_below,
12845                                              next_address - next_entry->vme_start);
12846                 }
12847
12848                 /*
12849                  * "curr_max_{above,below}" allow us to keep track of the
12850                  * portion of the submap that is actually mapped at this level:
12851                  * the rest of that submap is irrelevant to us, since it's not
12852                  * mapped here.
12853                  * The relevant portion of the map starts at
12854                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
12855                  */
12856                 curr_max_above = MIN(curr_max_above,
12857                                      curr_entry->vme_end - curr_address);
12858                 curr_max_below = MIN(curr_max_below,
12859                                      curr_address - curr_entry->vme_start);
12860
12861                 if (!curr_entry->is_sub_map ||
12862                     curr_depth >= user_max_depth) {
12863                         /*
12864                          * We hit a leaf map or we reached the maximum depth
12865                          * we could, so stop looking.  Keep the current map
12866                          * locked.
12867                          */
12868                         break;
12869                 }
12870
12871                 /*
12872                  * Get down to the next submap level.
12873                  */
12874
12875                 /*
12876                  * Lock the next level and unlock the current level,
12877                  * unless we need to keep it locked to access the "next_entry"
12878                  * later.
12879                  */
12880                 if (not_in_kdp) {
12881                         vm_map_lock_read(VME_SUBMAP(curr_entry));
12882                 }
12883                 if (curr_map == next_map) {
12884                         /* keep "next_map" locked in case we need it */
12885                 } else {
12886                         /* release this map */
12887                         if (not_in_kdp)
12888                                 vm_map_unlock_read(curr_map);
12889                 }
12890
12891                 /*
12892                  * Adjust the offset.  "curr_entry" maps the submap
12893                  * at relative address "curr_entry->vme_start" in the
12894                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
12895                  * bytes of the submap.
12896                  * "curr_offset" always represents the offset of a virtual
12897                  * address in the curr_map relative to the absolute address
12898                  * space (i.e. the top-level VM map).
12899                  */
12900                 curr_offset +=
12901                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
12902                 curr_address = user_address + curr_offset;
12903                 /* switch to the submap */
12904                 curr_map = VME_SUBMAP(curr_entry);
12905                 curr_depth++;
12906                 curr_entry = NULL;
12907         }
12908
12909 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
12910 // so probably should be a real 32b ID vs. ptr.
12911 // Current users just check for equality
12912
12913         if (curr_entry == NULL) {
12914                 /* no VM region contains the address... */
12915
12916                 if (do_region_footprint && /* we want footprint numbers */
12917                     next_entry == NULL && /* & there are no more regions */
12918                     /* & we haven't already provided our fake region: */
12919                     user_address <= vm_map_last_entry(map)->vme_end) {
12920                         ledger_amount_t nonvol, nonvol_compressed;
12921                         /*
12922                          * Add a fake memory region to account for
12923                          * purgeable memory that counts towards this
12924                          * task's memory footprint, i.e. the resident
12925                          * compressed pages of non-volatile objects
12926                          * owned by that task.
12927                          */
12928                         ledger_get_balance(
12929                                 map->pmap->ledger,
12930                                 task_ledgers.purgeable_nonvolatile,
12931                                 &nonvol);
12932                         ledger_get_balance(
12933                                 map->pmap->ledger,
12934                                 task_ledgers.purgeable_nonvolatile_compressed,
12935                                 &nonvol_compressed);
12936                         if (nonvol + nonvol_compressed == 0) {
12937                                 /* no purgeable memory usage to report */
12938                                 return KERN_INVALID_ADDRESS;
12939                         }
12940                         /* fake region to show nonvolatile footprint */
12941                         if (look_for_pages) {
12942                                 submap_info->protection = VM_PROT_DEFAULT;
12943                                 submap_info->max_protection = VM_PROT_DEFAULT;
12944                                 submap_info->inheritance = VM_INHERIT_DEFAULT;
12945                                 submap_info->offset = 0;
12946                                 submap_info->user_tag = -1;
12947                                 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
12948                                 submap_info->pages_shared_now_private = 0;
12949                                 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
12950                                 submap_info->pages_dirtied = submap_info->pages_resident;
12951                                 submap_info->ref_count = 1;
12952                                 submap_info->shadow_depth = 0;
12953                                 submap_info->external_pager = 0;
12954                                 submap_info->share_mode = SM_PRIVATE;
12955                                 submap_info->is_submap = 0;
12956                                 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
12957                                 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
12958                                 submap_info->user_wired_count = 0;
12959                                 submap_info->pages_reusable = 0;
12960                         } else {
12961                                 short_info->user_tag = -1;
12962                                 short_info->offset = 0;
12963                                 short_info->protection = VM_PROT_DEFAULT;
12964                                 short_info->inheritance = VM_INHERIT_DEFAULT;
12965                                 short_info->max_protection = VM_PROT_DEFAULT;
12966                                 short_info->behavior = VM_BEHAVIOR_DEFAULT;
12967                                 short_info->user_wired_count = 0;
12968                                 short_info->is_submap = 0;
12969                                 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
12970                                 short_info->external_pager = 0;
12971                                 short_info->shadow_depth = 0;
12972                                 short_info->share_mode = SM_PRIVATE;
12973                                 short_info->ref_count = 1;
12974                         }
12975                         *nesting_depth = 0;
12976                         *size = (vm_map_size_t) (nonvol + nonvol_compressed);
12977 //                      *address = user_address;
12978                         *address = vm_map_last_entry(map)->vme_end;
12979                         return KERN_SUCCESS;
12980                 }
12981
12982                 if (next_entry == NULL) {
12983                         /* ... and no VM region follows it either */
12984                         return KERN_INVALID_ADDRESS;
12985                 }
12986                 /* ... gather info about the next VM region */
12987                 curr_entry = next_entry;
12988                 curr_map = next_map;    /* still locked ... */
12989                 curr_address = next_address;
12990                 curr_skip = next_skip;
12991                 curr_offset = next_offset;
12992                 curr_depth = next_depth;
12993                 curr_max_above = next_max_above;
12994                 curr_max_below = next_max_below;
12995         } else {
12996                 /* we won't need "next_entry" after all */
12997                 if (next_entry != NULL) {
12998                         /* release "next_map" */
12999                         if (next_map != curr_map && not_in_kdp) {
13000                                 vm_map_unlock_read(next_map);
13001                         }
13002                 }
13003         }
13004         next_entry = NULL;
13005         next_map = NULL;
13006         next_offset = 0;
13007         next_skip = 0;
13008         next_depth = 0;
13009         next_max_below = -1;
13010         next_max_above = -1;
13011
13012         if (curr_entry->is_sub_map &&
13013             curr_depth < user_max_depth) {
13014                 /*
13015                  * We're not as deep as we could be:  we must have
13016                  * gone back up after not finding anything mapped
13017                  * below the original top-level map entry's.
13018                  * Let's move "curr_address" forward and recurse again.
13019                  */
13020                 user_address = curr_address;
13021                 goto recurse_again;
13022         }
13023
13024         *nesting_depth = curr_depth;
13025         *size = curr_max_above + curr_max_below;
13026         *address = user_address + curr_skip - curr_max_below;
13027
13028 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13029 // so probably should be a real 32b ID vs. ptr.
13030 // Current users just check for equality
13031 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13032
13033         if (look_for_pages) {
13034                 submap_info->user_tag = VME_ALIAS(curr_entry);
13035                 submap_info->offset = VME_OFFSET(curr_entry);
13036                 submap_info->protection = curr_entry->protection;
13037                 submap_info->inheritance = curr_entry->inheritance;
13038                 submap_info->max_protection = curr_entry->max_protection;
13039                 submap_info->behavior = curr_entry->behavior;
13040                 submap_info->user_wired_count = curr_entry->user_wired_count;
13041                 submap_info->is_submap = curr_entry->is_sub_map;
13042                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13043         } else {
13044                 short_info->user_tag = VME_ALIAS(curr_entry);
13045                 short_info->offset = VME_OFFSET(curr_entry);
13046                 short_info->protection = curr_entry->protection;
13047                 short_info->inheritance = curr_entry->inheritance;
13048                 short_info->max_protection = curr_entry->max_protection;
13049                 short_info->behavior = curr_entry->behavior;
13050                 short_info->user_wired_count = curr_entry->user_wired_count;
13051                 short_info->is_submap = curr_entry->is_sub_map;
13052                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13053         }
13054
13055         extended.pages_resident = 0;
13056         extended.pages_swapped_out = 0;
13057         extended.pages_shared_now_private = 0;
13058         extended.pages_dirtied = 0;
13059         extended.pages_reusable = 0;
13060         extended.external_pager = 0;
13061         extended.shadow_depth = 0;
13062         extended.share_mode = SM_EMPTY;
13063         extended.ref_count = 0;
13064
13065         if (not_in_kdp) {
13066                 if (!curr_entry->is_sub_map) {
13067                         vm_map_offset_t range_start, range_end;
13068                         range_start = MAX((curr_address - curr_max_below),
13069                                           curr_entry->vme_start);
13070                         range_end = MIN((curr_address + curr_max_above),
13071                                         curr_entry->vme_end);
13072                         vm_map_region_walk(curr_map,
13073                                            range_start,
13074                                            curr_entry,
13075                                            (VME_OFFSET(curr_entry) +
13076                                             (range_start -
13077                                              curr_entry->vme_start)),
13078                                            range_end - range_start,
13079                                            &extended,
13080                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13081                         if (extended.external_pager &&
13082                             extended.ref_count == 2 &&
13083                             extended.share_mode == SM_SHARED) {
13084                                 extended.share_mode = SM_PRIVATE;
13085                         }
13086                 } else {
13087                         if (curr_entry->use_pmap) {
13088                                 extended.share_mode = SM_TRUESHARED;
13089                         } else {
13090                                 extended.share_mode = SM_PRIVATE;
13091                         }
13092                         extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
13093                 }
13094         }
13095
13096         if (look_for_pages) {
13097                 submap_info->pages_resident = extended.pages_resident;
13098                 submap_info->pages_swapped_out = extended.pages_swapped_out;
13099                 submap_info->pages_shared_now_private =
13100                         extended.pages_shared_now_private;
13101                 submap_info->pages_dirtied = extended.pages_dirtied;
13102                 submap_info->external_pager = extended.external_pager;
13103                 submap_info->shadow_depth = extended.shadow_depth;
13104                 submap_info->share_mode = extended.share_mode;
13105                 submap_info->ref_count = extended.ref_count;
13106
13107                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13108                         submap_info->pages_reusable = extended.pages_reusable;
13109                 }
13110         } else {
13111                 short_info->external_pager = extended.external_pager;
13112                 short_info->shadow_depth = extended.shadow_depth;
13113                 short_info->share_mode = extended.share_mode;
13114                 short_info->ref_count = extended.ref_count;
13115         }
13116
13117         if (not_in_kdp) {
13118                 vm_map_unlock_read(curr_map);
13119         }
13120
13121         return KERN_SUCCESS;
13122 }
13123
13124 /*
13125  *      vm_region:
13126  *
13127  *      User call to obtain information about a region in
13128  *      a task's address map. Currently, only one flavor is
13129  *      supported.
13130  *
13131  *      XXX The reserved and behavior fields cannot be filled
13132  *          in until the vm merge from the IK is completed, and
13133  *          vm_reserve is implemented.
13134  */
13135
13136 kern_return_t
13137 vm_map_region(
13138         vm_map_t                 map,
13139         vm_map_offset_t *address,               /* IN/OUT */
13140         vm_map_size_t           *size,                  /* OUT */
13141         vm_region_flavor_t       flavor,                /* IN */
13142         vm_region_info_t         info,                  /* OUT */
13143         mach_msg_type_number_t  *count, /* IN/OUT */
13144         mach_port_t             *object_name)           /* OUT */
13145 {
13146         vm_map_entry_t          tmp_entry;
13147         vm_map_entry_t          entry;
13148         vm_map_offset_t         start;
13149
13150         if (map == VM_MAP_NULL)
13151                 return(KERN_INVALID_ARGUMENT);
13152
13153         switch (flavor) {
13154
13155         case VM_REGION_BASIC_INFO:
13156                 /* legacy for old 32-bit objects info */
13157         {
13158                 vm_region_basic_info_t  basic;
13159
13160                 if (*count < VM_REGION_BASIC_INFO_COUNT)
13161                         return(KERN_INVALID_ARGUMENT);
13162
13163                 basic = (vm_region_basic_info_t) info;
13164                 *count = VM_REGION_BASIC_INFO_COUNT;
13165
13166                 vm_map_lock_read(map);
13167
13168                 start = *address;
13169                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13170                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13171                                 vm_map_unlock_read(map);
13172                                 return(KERN_INVALID_ADDRESS);
13173                         }
13174                 } else {
13175                         entry = tmp_entry;
13176                 }
13177
13178                 start = entry->vme_start;
13179
13180                 basic->offset = (uint32_t)VME_OFFSET(entry);
13181                 basic->protection = entry->protection;
13182                 basic->inheritance = entry->inheritance;
13183                 basic->max_protection = entry->max_protection;
13184                 basic->behavior = entry->behavior;
13185                 basic->user_wired_count = entry->user_wired_count;
13186                 basic->reserved = entry->is_sub_map;
13187                 *address = start;
13188                 *size = (entry->vme_end - start);
13189
13190                 if (object_name) *object_name = IP_NULL;
13191                 if (entry->is_sub_map) {
13192                         basic->shared = FALSE;
13193                 } else {
13194                         basic->shared = entry->is_shared;
13195                 }
13196
13197                 vm_map_unlock_read(map);
13198                 return(KERN_SUCCESS);
13199         }
13200
13201         case VM_REGION_BASIC_INFO_64:
13202         {
13203                 vm_region_basic_info_64_t       basic;
13204
13205                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
13206                         return(KERN_INVALID_ARGUMENT);
13207
13208                 basic = (vm_region_basic_info_64_t) info;
13209                 *count = VM_REGION_BASIC_INFO_COUNT_64;
13210
13211                 vm_map_lock_read(map);
13212
13213                 start = *address;
13214                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13215                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13216                                 vm_map_unlock_read(map);
13217                                 return(KERN_INVALID_ADDRESS);
13218                         }
13219                 } else {
13220                         entry = tmp_entry;
13221                 }
13222
13223                 start = entry->vme_start;
13224
13225                 basic->offset = VME_OFFSET(entry);
13226                 basic->protection = entry->protection;
13227                 basic->inheritance = entry->inheritance;
13228                 basic->max_protection = entry->max_protection;
13229                 basic->behavior = entry->behavior;
13230                 basic->user_wired_count = entry->user_wired_count;
13231                 basic->reserved = entry->is_sub_map;
13232                 *address = start;
13233                 *size = (entry->vme_end - start);
13234
13235                 if (object_name) *object_name = IP_NULL;
13236                 if (entry->is_sub_map) {
13237                         basic->shared = FALSE;
13238                 } else {
13239                         basic->shared = entry->is_shared;
13240                 }
13241
13242                 vm_map_unlock_read(map);
13243                 return(KERN_SUCCESS);
13244         }
13245         case VM_REGION_EXTENDED_INFO:
13246                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
13247                         return(KERN_INVALID_ARGUMENT);
13248                 /*fallthru*/
13249         case VM_REGION_EXTENDED_INFO__legacy:
13250                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
13251                         return KERN_INVALID_ARGUMENT;
13252
13253         {
13254                 vm_region_extended_info_t       extended;
13255                 mach_msg_type_number_t original_count;
13256
13257                 extended = (vm_region_extended_info_t) info;
13258
13259                 vm_map_lock_read(map);
13260
13261                 start = *address;
13262                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13263                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13264                                 vm_map_unlock_read(map);
13265                                 return(KERN_INVALID_ADDRESS);
13266                         }
13267                 } else {
13268                         entry = tmp_entry;
13269                 }
13270                 start = entry->vme_start;
13271
13272                 extended->protection = entry->protection;
13273                 extended->user_tag = VME_ALIAS(entry);
13274                 extended->pages_resident = 0;
13275                 extended->pages_swapped_out = 0;
13276                 extended->pages_shared_now_private = 0;
13277                 extended->pages_dirtied = 0;
13278                 extended->external_pager = 0;
13279                 extended->shadow_depth = 0;
13280
13281                 original_count = *count;
13282                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13283                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13284                 } else {
13285                         extended->pages_reusable = 0;
13286                         *count = VM_REGION_EXTENDED_INFO_COUNT;
13287                 }
13288
13289                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13290
13291                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
13292                         extended->share_mode = SM_PRIVATE;
13293
13294                 if (object_name)
13295                         *object_name = IP_NULL;
13296                 *address = start;
13297                 *size = (entry->vme_end - start);
13298
13299                 vm_map_unlock_read(map);
13300                 return(KERN_SUCCESS);
13301         }
13302         case VM_REGION_TOP_INFO:
13303         {
13304                 vm_region_top_info_t    top;
13305
13306                 if (*count < VM_REGION_TOP_INFO_COUNT)
13307                         return(KERN_INVALID_ARGUMENT);
13308
13309                 top = (vm_region_top_info_t) info;
13310                 *count = VM_REGION_TOP_INFO_COUNT;
13311
13312                 vm_map_lock_read(map);
13313
13314                 start = *address;
13315                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13316                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13317                                 vm_map_unlock_read(map);
13318                                 return(KERN_INVALID_ADDRESS);
13319                         }
13320                 } else {
13321                         entry = tmp_entry;
13322
13323                 }
13324                 start = entry->vme_start;
13325
13326                 top->private_pages_resident = 0;
13327                 top->shared_pages_resident = 0;
13328
13329                 vm_map_region_top_walk(entry, top);
13330
13331                 if (object_name)
13332                         *object_name = IP_NULL;
13333                 *address = start;
13334                 *size = (entry->vme_end - start);
13335
13336                 vm_map_unlock_read(map);
13337                 return(KERN_SUCCESS);
13338         }
13339         default:
13340                 return(KERN_INVALID_ARGUMENT);
13341         }
13342 }
13343
13344 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
13345         MIN((entry_size),                                               \
13346             ((obj)->all_reusable ?                                      \
13347              (obj)->wired_page_count :                                  \
13348              (obj)->resident_page_count - (obj)->reusable_page_count))
13349
13350 void
13351 vm_map_region_top_walk(
13352         vm_map_entry_t             entry,
13353         vm_region_top_info_t       top)
13354 {
13355
13356         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
13357                 top->share_mode = SM_EMPTY;
13358                 top->ref_count = 0;
13359                 top->obj_id = 0;
13360                 return;
13361         }
13362
13363         {
13364                 struct  vm_object *obj, *tmp_obj;
13365                 int             ref_count;
13366                 uint32_t        entry_size;
13367
13368                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
13369
13370                 obj = VME_OBJECT(entry);
13371
13372                 vm_object_lock(obj);
13373
13374                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13375                         ref_count--;
13376
13377                 assert(obj->reusable_page_count <= obj->resident_page_count);
13378                 if (obj->shadow) {
13379                         if (ref_count == 1)
13380                                 top->private_pages_resident =
13381                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13382                         else
13383                                 top->shared_pages_resident =
13384                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13385                         top->ref_count  = ref_count;
13386                         top->share_mode = SM_COW;
13387
13388                         while ((tmp_obj = obj->shadow)) {
13389                                 vm_object_lock(tmp_obj);
13390                                 vm_object_unlock(obj);
13391                                 obj = tmp_obj;
13392
13393                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13394                                         ref_count--;
13395
13396                                 assert(obj->reusable_page_count <= obj->resident_page_count);
13397                                 top->shared_pages_resident +=
13398                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13399                                 top->ref_count += ref_count - 1;
13400                         }
13401                 } else {
13402                         if (entry->superpage_size) {
13403                                 top->share_mode = SM_LARGE_PAGE;
13404                                 top->shared_pages_resident = 0;
13405                                 top->private_pages_resident = entry_size;
13406                         } else if (entry->needs_copy) {
13407                                 top->share_mode = SM_COW;
13408                                 top->shared_pages_resident =
13409                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13410                         } else {
13411                                 if (ref_count == 1 ||
13412                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
13413                                         top->share_mode = SM_PRIVATE;
13414                                                 top->private_pages_resident =
13415                                                         OBJ_RESIDENT_COUNT(obj,
13416                                                                            entry_size);
13417                                 } else {
13418                                         top->share_mode = SM_SHARED;
13419                                         top->shared_pages_resident =
13420                                                 OBJ_RESIDENT_COUNT(obj,
13421                                                                   entry_size);
13422                                 }
13423                         }
13424                         top->ref_count = ref_count;
13425                 }
13426                 /* XXX K64: obj_id will be truncated */
13427                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
13428
13429                 vm_object_unlock(obj);
13430         }
13431 }
13432
13433 void
13434 vm_map_region_walk(
13435         vm_map_t                        map,
13436         vm_map_offset_t                 va,
13437         vm_map_entry_t                  entry,
13438         vm_object_offset_t              offset,
13439         vm_object_size_t                range,
13440         vm_region_extended_info_t       extended,
13441         boolean_t                       look_for_pages,
13442         mach_msg_type_number_t count)
13443 {
13444         struct vm_object *obj, *tmp_obj;
13445         vm_map_offset_t       last_offset;
13446         int               i;
13447         int               ref_count;
13448         struct vm_object        *shadow_object;
13449         int                     shadow_depth;
13450         boolean_t         do_region_footprint;
13451
13452         do_region_footprint = task_self_region_footprint();
13453
13454         if ((VME_OBJECT(entry) == 0) ||
13455             (entry->is_sub_map) ||
13456             (VME_OBJECT(entry)->phys_contiguous &&
13457              !entry->superpage_size)) {
13458                 extended->share_mode = SM_EMPTY;
13459                 extended->ref_count = 0;
13460                 return;
13461         }
13462
13463         if (entry->superpage_size) {
13464                 extended->shadow_depth = 0;
13465                 extended->share_mode = SM_LARGE_PAGE;
13466                 extended->ref_count = 1;
13467                 extended->external_pager = 0;
13468                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
13469                 extended->shadow_depth = 0;
13470                 return;
13471         }
13472
13473         obj = VME_OBJECT(entry);
13474
13475         vm_object_lock(obj);
13476
13477         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13478                 ref_count--;
13479
13480         if (look_for_pages) {
13481                 for (last_offset = offset + range;
13482                      offset < last_offset;
13483                      offset += PAGE_SIZE_64, va += PAGE_SIZE) {
13484
13485                         if (do_region_footprint) {
13486                                 int disp;
13487
13488                                 disp = 0;
13489                                 pmap_query_page_info(map->pmap, va, &disp);
13490                                 if (disp & PMAP_QUERY_PAGE_PRESENT) {
13491                                         extended->pages_resident++;
13492                                         if (disp & PMAP_QUERY_PAGE_REUSABLE) {
13493                                                 extended->pages_reusable++;
13494                                         } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
13495                                                    (disp & PMAP_QUERY_PAGE_ALTACCT)) {
13496                                                 /* alternate accounting */
13497                                         } else {
13498                                                 extended->pages_dirtied++;
13499                                         }
13500                                 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
13501                                         if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
13502                                                 /* alternate accounting */
13503                                         } else {
13504                                                 extended->pages_swapped_out++;
13505                                         }
13506                                 }
13507                                 /* deal with alternate accounting */
13508                                 if (obj->purgable != VM_PURGABLE_DENY) {
13509                                         /*
13510                                          * Pages from purgeable objects
13511                                          * will be reported as dirty
13512                                          * appropriately in an extra
13513                                          * fake memory region at the end of
13514                                          * the address space.
13515                                          */
13516                                 } else if (entry->iokit_acct) {
13517                                         /*
13518                                          * IOKit mappings are considered
13519                                          * as fully dirty for footprint's
13520                                          * sake.
13521                                          */
13522                                         extended->pages_dirtied++;
13523                                 }
13524                                 continue;
13525                         }
13526
13527                         vm_map_region_look_for_page(map, va, obj,
13528                                                     offset, ref_count,
13529                                                     0, extended, count);
13530                 }
13531
13532                 if (do_region_footprint) {
13533                         goto collect_object_info;
13534                 }
13535
13536         } else {
13537         collect_object_info:
13538                 shadow_object = obj->shadow;
13539                 shadow_depth = 0;
13540
13541                 if ( !(obj->pager_trusted) && !(obj->internal))
13542                         extended->external_pager = 1;
13543
13544                 if (shadow_object != VM_OBJECT_NULL) {
13545                         vm_object_lock(shadow_object);
13546                         for (;
13547                              shadow_object != VM_OBJECT_NULL;
13548                              shadow_depth++) {
13549                                 vm_object_t     next_shadow;
13550
13551                                 if ( !(shadow_object->pager_trusted) &&
13552                                      !(shadow_object->internal))
13553                                         extended->external_pager = 1;
13554
13555                                 next_shadow = shadow_object->shadow;
13556                                 if (next_shadow) {
13557                                         vm_object_lock(next_shadow);
13558                                 }
13559                                 vm_object_unlock(shadow_object);
13560                                 shadow_object = next_shadow;
13561                         }
13562                 }
13563                 extended->shadow_depth = shadow_depth;
13564         }
13565
13566         if (extended->shadow_depth || entry->needs_copy)
13567                 extended->share_mode = SM_COW;
13568         else {
13569                 if (ref_count == 1)
13570                         extended->share_mode = SM_PRIVATE;
13571                 else {
13572                         if (obj->true_share)
13573                                 extended->share_mode = SM_TRUESHARED;
13574                         else
13575                                 extended->share_mode = SM_SHARED;
13576                 }
13577         }
13578         extended->ref_count = ref_count - extended->shadow_depth;
13579
13580         for (i = 0; i < extended->shadow_depth; i++) {
13581                 if ((tmp_obj = obj->shadow) == 0)
13582                         break;
13583                 vm_object_lock(tmp_obj);
13584                 vm_object_unlock(obj);
13585
13586                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
13587                         ref_count--;
13588
13589                 extended->ref_count += ref_count;
13590                 obj = tmp_obj;
13591         }
13592         vm_object_unlock(obj);
13593
13594         if (extended->share_mode == SM_SHARED) {
13595                 vm_map_entry_t       cur;
13596                 vm_map_entry_t       last;
13597                 int      my_refs;
13598
13599                 obj = VME_OBJECT(entry);
13600                 last = vm_map_to_entry(map);
13601                 my_refs = 0;
13602
13603                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13604                         ref_count--;
13605                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
13606                         my_refs += vm_map_region_count_obj_refs(cur, obj);
13607
13608                 if (my_refs == ref_count)
13609                         extended->share_mode = SM_PRIVATE_ALIASED;
13610                 else if (my_refs > 1)
13611                         extended->share_mode = SM_SHARED_ALIASED;
13612         }
13613 }
13614
13615
13616 /* object is locked on entry and locked on return */
13617
13618
13619 static void
13620 vm_map_region_look_for_page(
13621         __unused vm_map_t               map,
13622         __unused vm_map_offset_t        va,
13623         vm_object_t                     object,
13624         vm_object_offset_t              offset,
13625         int                             max_refcnt,
13626         int                             depth,
13627         vm_region_extended_info_t       extended,
13628         mach_msg_type_number_t count)
13629 {
13630         vm_page_t       p;
13631         vm_object_t     shadow;
13632         int             ref_count;
13633         vm_object_t     caller_object;
13634
13635         shadow = object->shadow;
13636         caller_object = object;
13637
13638
13639         while (TRUE) {
13640
13641                 if ( !(object->pager_trusted) && !(object->internal))
13642                         extended->external_pager = 1;
13643
13644                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
13645                         if (shadow && (max_refcnt == 1))
13646                                 extended->pages_shared_now_private++;
13647
13648                         if (!p->fictitious &&
13649                             (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
13650                                 extended->pages_dirtied++;
13651                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
13652                                 if (p->reusable || object->all_reusable) {
13653                                         extended->pages_reusable++;
13654                                 }
13655                         }
13656
13657                         extended->pages_resident++;
13658
13659                         if(object != caller_object)
13660                                 vm_object_unlock(object);
13661
13662                         return;
13663                 }
13664                 if (object->internal &&
13665                     object->alive &&
13666                     !object->terminating &&
13667                     object->pager_ready) {
13668
13669                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
13670                             == VM_EXTERNAL_STATE_EXISTS) {
13671                                 /* the pager has that page */
13672                                 extended->pages_swapped_out++;
13673                                 if (object != caller_object)
13674                                         vm_object_unlock(object);
13675                                 return;
13676                         }
13677                 }
13678
13679                 if (shadow) {
13680                         vm_object_lock(shadow);
13681
13682                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
13683                                 ref_count--;
13684
13685                         if (++depth > extended->shadow_depth)
13686                                 extended->shadow_depth = depth;
13687
13688                         if (ref_count > max_refcnt)
13689                                 max_refcnt = ref_count;
13690
13691                         if(object != caller_object)
13692                                 vm_object_unlock(object);
13693
13694                         offset = offset + object->vo_shadow_offset;
13695                         object = shadow;
13696                         shadow = object->shadow;
13697                         continue;
13698                 }
13699                 if(object != caller_object)
13700                         vm_object_unlock(object);
13701                 break;
13702         }
13703 }
13704
13705 static int
13706 vm_map_region_count_obj_refs(
13707         vm_map_entry_t    entry,
13708         vm_object_t       object)
13709 {
13710         int ref_count;
13711         vm_object_t chk_obj;
13712         vm_object_t tmp_obj;
13713
13714         if (VME_OBJECT(entry) == 0)
13715                 return(0);
13716
13717         if (entry->is_sub_map)
13718                 return(0);
13719         else {
13720                 ref_count = 0;
13721
13722                 chk_obj = VME_OBJECT(entry);
13723                 vm_object_lock(chk_obj);
13724
13725                 while (chk_obj) {
13726                         if (chk_obj == object)
13727                                 ref_count++;
13728                         tmp_obj = chk_obj->shadow;
13729                         if (tmp_obj)
13730                                 vm_object_lock(tmp_obj);
13731                         vm_object_unlock(chk_obj);
13732
13733                         chk_obj = tmp_obj;
13734                 }
13735         }
13736         return(ref_count);
13737 }
13738
13739
13740 /*
13741  *      Routine:        vm_map_simplify
13742  *
13743  *      Description:
13744  *              Attempt to simplify the map representation in
13745  *              the vicinity of the given starting address.
13746  *      Note:
13747  *              This routine is intended primarily to keep the
13748  *              kernel maps more compact -- they generally don't
13749  *              benefit from the "expand a map entry" technology
13750  *              at allocation time because the adjacent entry
13751  *              is often wired down.
13752  */
13753 void
13754 vm_map_simplify_entry(
13755         vm_map_t        map,
13756         vm_map_entry_t  this_entry)
13757 {
13758         vm_map_entry_t  prev_entry;
13759
13760         counter(c_vm_map_simplify_entry_called++);
13761
13762         prev_entry = this_entry->vme_prev;
13763
13764         if ((this_entry != vm_map_to_entry(map)) &&
13765             (prev_entry != vm_map_to_entry(map)) &&
13766
13767             (prev_entry->vme_end == this_entry->vme_start) &&
13768
13769             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
13770             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
13771             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
13772                                     prev_entry->vme_start))
13773              == VME_OFFSET(this_entry)) &&
13774
13775             (prev_entry->behavior == this_entry->behavior) &&
13776             (prev_entry->needs_copy == this_entry->needs_copy) &&
13777             (prev_entry->protection == this_entry->protection) &&
13778             (prev_entry->max_protection == this_entry->max_protection) &&
13779             (prev_entry->inheritance == this_entry->inheritance) &&
13780             (prev_entry->use_pmap == this_entry->use_pmap) &&
13781             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
13782             (prev_entry->no_cache == this_entry->no_cache) &&
13783             (prev_entry->permanent == this_entry->permanent) &&
13784             (prev_entry->map_aligned == this_entry->map_aligned) &&
13785             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
13786             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
13787             /* from_reserved_zone: OK if that field doesn't match */
13788             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
13789             (prev_entry->vme_resilient_codesign ==
13790              this_entry->vme_resilient_codesign) &&
13791             (prev_entry->vme_resilient_media ==
13792              this_entry->vme_resilient_media) &&
13793
13794             (prev_entry->wired_count == this_entry->wired_count) &&
13795             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
13796
13797             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
13798             (prev_entry->in_transition == FALSE) &&
13799             (this_entry->in_transition == FALSE) &&
13800             (prev_entry->needs_wakeup == FALSE) &&
13801             (this_entry->needs_wakeup == FALSE) &&
13802             (prev_entry->is_shared == FALSE) &&
13803             (this_entry->is_shared == FALSE) &&
13804             (prev_entry->superpage_size == FALSE) &&
13805             (this_entry->superpage_size == FALSE)
13806                 ) {
13807                 vm_map_store_entry_unlink(map, prev_entry);
13808                 assert(prev_entry->vme_start < this_entry->vme_end);
13809                 if (prev_entry->map_aligned)
13810                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
13811                                                    VM_MAP_PAGE_MASK(map)));
13812                 this_entry->vme_start = prev_entry->vme_start;
13813                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
13814
13815                 if (map->holelistenabled) {
13816                         vm_map_store_update_first_free(map, this_entry, TRUE);
13817                 }
13818
13819                 if (prev_entry->is_sub_map) {
13820                         vm_map_deallocate(VME_SUBMAP(prev_entry));
13821                 } else {
13822                         vm_object_deallocate(VME_OBJECT(prev_entry));
13823                 }
13824                 vm_map_entry_dispose(map, prev_entry);
13825                 SAVE_HINT_MAP_WRITE(map, this_entry);
13826                 counter(c_vm_map_simplified++);
13827         }
13828 }
13829
13830 void
13831 vm_map_simplify(
13832         vm_map_t        map,
13833         vm_map_offset_t start)
13834 {
13835         vm_map_entry_t  this_entry;
13836
13837         vm_map_lock(map);
13838         if (vm_map_lookup_entry(map, start, &this_entry)) {
13839                 vm_map_simplify_entry(map, this_entry);
13840                 vm_map_simplify_entry(map, this_entry->vme_next);
13841         }
13842         counter(c_vm_map_simplify_called++);
13843         vm_map_unlock(map);
13844 }
13845
13846 static void
13847 vm_map_simplify_range(
13848         vm_map_t        map,
13849         vm_map_offset_t start,
13850         vm_map_offset_t end)
13851 {
13852         vm_map_entry_t  entry;
13853
13854         /*
13855          * The map should be locked (for "write") by the caller.
13856          */
13857
13858         if (start >= end) {
13859                 /* invalid address range */
13860                 return;
13861         }
13862
13863         start = vm_map_trunc_page(start,
13864                                   VM_MAP_PAGE_MASK(map));
13865         end = vm_map_round_page(end,
13866                                 VM_MAP_PAGE_MASK(map));
13867
13868         if (!vm_map_lookup_entry(map, start, &entry)) {
13869                 /* "start" is not mapped and "entry" ends before "start" */
13870                 if (entry == vm_map_to_entry(map)) {
13871                         /* start with first entry in the map */
13872                         entry = vm_map_first_entry(map);
13873                 } else {
13874                         /* start with next entry */
13875                         entry = entry->vme_next;
13876                 }
13877         }
13878
13879         while (entry != vm_map_to_entry(map) &&
13880                entry->vme_start <= end) {
13881                 /* try and coalesce "entry" with its previous entry */
13882                 vm_map_simplify_entry(map, entry);
13883                 entry = entry->vme_next;
13884         }
13885 }
13886
13887
13888 /*
13889  *      Routine:        vm_map_machine_attribute
13890  *      Purpose:
13891  *              Provide machine-specific attributes to mappings,
13892  *              such as cachability etc. for machines that provide
13893  *              them.  NUMA architectures and machines with big/strange
13894  *              caches will use this.
13895  *      Note:
13896  *              Responsibilities for locking and checking are handled here,
13897  *              everything else in the pmap module. If any non-volatile
13898  *              information must be kept, the pmap module should handle
13899  *              it itself. [This assumes that attributes do not
13900  *              need to be inherited, which seems ok to me]
13901  */
13902 kern_return_t
13903 vm_map_machine_attribute(
13904         vm_map_t                        map,
13905         vm_map_offset_t         start,
13906         vm_map_offset_t         end,
13907         vm_machine_attribute_t  attribute,
13908         vm_machine_attribute_val_t* value)              /* IN/OUT */
13909 {
13910         kern_return_t   ret;
13911         vm_map_size_t sync_size;
13912         vm_map_entry_t entry;
13913
13914         if (start < vm_map_min(map) || end > vm_map_max(map))
13915                 return KERN_INVALID_ADDRESS;
13916
13917         /* Figure how much memory we need to flush (in page increments) */
13918         sync_size = end - start;
13919
13920         vm_map_lock(map);
13921
13922         if (attribute != MATTR_CACHE) {
13923                 /* If we don't have to find physical addresses, we */
13924                 /* don't have to do an explicit traversal here.    */
13925                 ret = pmap_attribute(map->pmap, start, end-start,
13926                                      attribute, value);
13927                 vm_map_unlock(map);
13928                 return ret;
13929         }
13930
13931         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
13932
13933         while(sync_size) {
13934                 if (vm_map_lookup_entry(map, start, &entry)) {
13935                         vm_map_size_t   sub_size;
13936                         if((entry->vme_end - start) > sync_size) {
13937                                 sub_size = sync_size;
13938                                 sync_size = 0;
13939                         } else {
13940                                 sub_size = entry->vme_end - start;
13941                                 sync_size -= sub_size;
13942                         }
13943                         if(entry->is_sub_map) {
13944                                 vm_map_offset_t sub_start;
13945                                 vm_map_offset_t sub_end;
13946
13947                                 sub_start = (start - entry->vme_start)
13948                                         + VME_OFFSET(entry);
13949                                 sub_end = sub_start + sub_size;
13950                                 vm_map_machine_attribute(
13951                                         VME_SUBMAP(entry),
13952                                         sub_start,
13953                                         sub_end,
13954                                         attribute, value);
13955                         } else {
13956                                 if (VME_OBJECT(entry)) {
13957                                         vm_page_t               m;
13958                                         vm_object_t             object;
13959                                         vm_object_t             base_object;
13960                                         vm_object_t             last_object;
13961                                         vm_object_offset_t      offset;
13962                                         vm_object_offset_t      base_offset;
13963                                         vm_map_size_t           range;
13964                                         range = sub_size;
13965                                         offset = (start - entry->vme_start)
13966                                                 + VME_OFFSET(entry);
13967                                         base_offset = offset;
13968                                         object = VME_OBJECT(entry);
13969                                         base_object = object;
13970                                         last_object = NULL;
13971
13972                                         vm_object_lock(object);
13973
13974                                         while (range) {
13975                                                 m = vm_page_lookup(
13976                                                         object, offset);
13977
13978                                                 if (m && !m->fictitious) {
13979                                                         ret =
13980                                                                 pmap_attribute_cache_sync(
13981                                                                         VM_PAGE_GET_PHYS_PAGE(m),
13982                                                                         PAGE_SIZE,
13983                                                                         attribute, value);
13984
13985                                                 } else if (object->shadow) {
13986                                                         offset = offset + object->vo_shadow_offset;
13987                                                         last_object = object;
13988                                                         object = object->shadow;
13989                                                         vm_object_lock(last_object->shadow);
13990                                                         vm_object_unlock(last_object);
13991                                                         continue;
13992                                                 }
13993                                                 range -= PAGE_SIZE;
13994
13995                                                 if (base_object != object) {
13996                                                         vm_object_unlock(object);
13997                                                         vm_object_lock(base_object);
13998                                                         object = base_object;
13999                                                 }
14000                                                 /* Bump to the next page */
14001                                                 base_offset += PAGE_SIZE;
14002                                                 offset = base_offset;
14003                                         }
14004                                         vm_object_unlock(object);
14005                                 }
14006                         }
14007                         start += sub_size;
14008                 } else {
14009                         vm_map_unlock(map);
14010                         return KERN_FAILURE;
14011                 }
14012
14013         }
14014
14015         vm_map_unlock(map);
14016
14017         return ret;
14018 }
14019
14020 /*
14021  *      vm_map_behavior_set:
14022  *
14023  *      Sets the paging reference behavior of the specified address
14024  *      range in the target map.  Paging reference behavior affects
14025  *      how pagein operations resulting from faults on the map will be
14026  *      clustered.
14027  */
14028 kern_return_t
14029 vm_map_behavior_set(
14030         vm_map_t        map,
14031         vm_map_offset_t start,
14032         vm_map_offset_t end,
14033         vm_behavior_t   new_behavior)
14034 {
14035         vm_map_entry_t  entry;
14036         vm_map_entry_t  temp_entry;
14037
14038         XPR(XPR_VM_MAP,
14039             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
14040             map, start, end, new_behavior, 0);
14041
14042         if (start > end ||
14043             start < vm_map_min(map) ||
14044             end > vm_map_max(map)) {
14045                 return KERN_NO_SPACE;
14046         }
14047
14048         switch (new_behavior) {
14049
14050         /*
14051          * This first block of behaviors all set a persistent state on the specified
14052          * memory range.  All we have to do here is to record the desired behavior
14053          * in the vm_map_entry_t's.
14054          */
14055
14056         case VM_BEHAVIOR_DEFAULT:
14057         case VM_BEHAVIOR_RANDOM:
14058         case VM_BEHAVIOR_SEQUENTIAL:
14059         case VM_BEHAVIOR_RSEQNTL:
14060         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14061                 vm_map_lock(map);
14062
14063                 /*
14064                  *      The entire address range must be valid for the map.
14065                  *      Note that vm_map_range_check() does a
14066                  *      vm_map_lookup_entry() internally and returns the
14067                  *      entry containing the start of the address range if
14068                  *      the entire range is valid.
14069                  */
14070                 if (vm_map_range_check(map, start, end, &temp_entry)) {
14071                         entry = temp_entry;
14072                         vm_map_clip_start(map, entry, start);
14073                 }
14074                 else {
14075                         vm_map_unlock(map);
14076                         return(KERN_INVALID_ADDRESS);
14077                 }
14078
14079                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14080                         vm_map_clip_end(map, entry, end);
14081                         if (entry->is_sub_map) {
14082                                 assert(!entry->use_pmap);
14083                         }
14084
14085                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
14086                                 entry->zero_wired_pages = TRUE;
14087                         } else {
14088                                 entry->behavior = new_behavior;
14089                         }
14090                         entry = entry->vme_next;
14091                 }
14092
14093                 vm_map_unlock(map);
14094                 break;
14095
14096         /*
14097          * The rest of these are different from the above in that they cause
14098          * an immediate action to take place as opposed to setting a behavior that
14099          * affects future actions.
14100          */
14101
14102         case VM_BEHAVIOR_WILLNEED:
14103                 return vm_map_willneed(map, start, end);
14104
14105         case VM_BEHAVIOR_DONTNEED:
14106                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14107
14108         case VM_BEHAVIOR_FREE:
14109                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14110
14111         case VM_BEHAVIOR_REUSABLE:
14112                 return vm_map_reusable_pages(map, start, end);
14113
14114         case VM_BEHAVIOR_REUSE:
14115                 return vm_map_reuse_pages(map, start, end);
14116
14117         case VM_BEHAVIOR_CAN_REUSE:
14118                 return vm_map_can_reuse(map, start, end);
14119
14120 #if MACH_ASSERT
14121         case VM_BEHAVIOR_PAGEOUT:
14122                 return vm_map_pageout(map, start, end);
14123 #endif /* MACH_ASSERT */
14124
14125         default:
14126                 return(KERN_INVALID_ARGUMENT);
14127         }
14128
14129         return(KERN_SUCCESS);
14130 }
14131
14132
14133 /*
14134  * Internals for madvise(MADV_WILLNEED) system call.
14135  *
14136  * The present implementation is to do a read-ahead if the mapping corresponds
14137  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
14138  * and basically ignore the "advice" (which we are always free to do).
14139  */
14140
14141
14142 static kern_return_t
14143 vm_map_willneed(
14144         vm_map_t        map,
14145         vm_map_offset_t start,
14146         vm_map_offset_t end
14147 )
14148 {
14149         vm_map_entry_t                  entry;
14150         vm_object_t                     object;
14151         memory_object_t                 pager;
14152         struct vm_object_fault_info     fault_info;
14153         kern_return_t                   kr;
14154         vm_object_size_t                len;
14155         vm_object_offset_t              offset;
14156
14157         /*
14158          * Fill in static values in fault_info.  Several fields get ignored by the code
14159          * we call, but we'll fill them in anyway since uninitialized fields are bad
14160          * when it comes to future backwards compatibility.
14161          */
14162
14163         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
14164         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
14165         fault_info.no_cache      = FALSE;                       /* ignored value */
14166         fault_info.stealth       = TRUE;
14167         fault_info.io_sync = FALSE;
14168         fault_info.cs_bypass = FALSE;
14169         fault_info.mark_zf_absent = FALSE;
14170         fault_info.batch_pmap_op = FALSE;
14171
14172         /*
14173          * The MADV_WILLNEED operation doesn't require any changes to the
14174          * vm_map_entry_t's, so the read lock is sufficient.
14175          */
14176
14177         vm_map_lock_read(map);
14178
14179         /*
14180          * The madvise semantics require that the address range be fully
14181          * allocated with no holes.  Otherwise, we're required to return
14182          * an error.
14183          */
14184
14185         if (! vm_map_range_check(map, start, end, &entry)) {
14186                 vm_map_unlock_read(map);
14187                 return KERN_INVALID_ADDRESS;
14188         }
14189
14190         /*
14191          * Examine each vm_map_entry_t in the range.
14192          */
14193         for (; entry != vm_map_to_entry(map) && start < end; ) {
14194
14195                 /*
14196                  * The first time through, the start address could be anywhere
14197                  * within the vm_map_entry we found.  So adjust the offset to
14198                  * correspond.  After that, the offset will always be zero to
14199                  * correspond to the beginning of the current vm_map_entry.
14200                  */
14201                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14202
14203                 /*
14204                  * Set the length so we don't go beyond the end of the
14205                  * map_entry or beyond the end of the range we were given.
14206                  * This range could span also multiple map entries all of which
14207                  * map different files, so make sure we only do the right amount
14208                  * of I/O for each object.  Note that it's possible for there
14209                  * to be multiple map entries all referring to the same object
14210                  * but with different page permissions, but it's not worth
14211                  * trying to optimize that case.
14212                  */
14213                 len = MIN(entry->vme_end - start, end - start);
14214
14215                 if ((vm_size_t) len != len) {
14216                         /* 32-bit overflow */
14217                         len = (vm_size_t) (0 - PAGE_SIZE);
14218                 }
14219                 fault_info.cluster_size = (vm_size_t) len;
14220                 fault_info.lo_offset    = offset;
14221                 fault_info.hi_offset    = offset + len;
14222                 fault_info.user_tag     = VME_ALIAS(entry);
14223                 fault_info.pmap_options = 0;
14224                 if (entry->iokit_acct ||
14225                     (!entry->is_sub_map && !entry->use_pmap)) {
14226                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14227                 }
14228
14229                 /*
14230                  * If there's no read permission to this mapping, then just
14231                  * skip it.
14232                  */
14233                 if ((entry->protection & VM_PROT_READ) == 0) {
14234                         entry = entry->vme_next;
14235                         start = entry->vme_start;
14236                         continue;
14237                 }
14238
14239                 /*
14240                  * Find the file object backing this map entry.  If there is
14241                  * none, then we simply ignore the "will need" advice for this
14242                  * entry and go on to the next one.
14243                  */
14244                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14245                         entry = entry->vme_next;
14246                         start = entry->vme_start;
14247                         continue;
14248                 }
14249
14250                 /*
14251                  * The data_request() could take a long time, so let's
14252                  * release the map lock to avoid blocking other threads.
14253                  */
14254                 vm_map_unlock_read(map);
14255
14256                 vm_object_paging_begin(object);
14257                 pager = object->pager;
14258                 vm_object_unlock(object);
14259
14260                 /*
14261                  * Get the data from the object asynchronously.
14262                  *
14263                  * Note that memory_object_data_request() places limits on the
14264                  * amount of I/O it will do.  Regardless of the len we
14265                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
14266                  * silently truncates the len to that size.  This isn't
14267                  * necessarily bad since madvise shouldn't really be used to
14268                  * page in unlimited amounts of data.  Other Unix variants
14269                  * limit the willneed case as well.  If this turns out to be an
14270                  * issue for developers, then we can always adjust the policy
14271                  * here and still be backwards compatible since this is all
14272                  * just "advice".
14273                  */
14274                 kr = memory_object_data_request(
14275                         pager,
14276                         offset + object->paging_offset,
14277                         0,      /* ignored */
14278                         VM_PROT_READ,
14279                         (memory_object_fault_info_t)&fault_info);
14280
14281                 vm_object_lock(object);
14282                 vm_object_paging_end(object);
14283                 vm_object_unlock(object);
14284
14285                 /*
14286                  * If we couldn't do the I/O for some reason, just give up on
14287                  * the madvise.  We still return success to the user since
14288                  * madvise isn't supposed to fail when the advice can't be
14289                  * taken.
14290                  */
14291                 if (kr != KERN_SUCCESS) {
14292                         return KERN_SUCCESS;
14293                 }
14294
14295                 start += len;
14296                 if (start >= end) {
14297                         /* done */
14298                         return KERN_SUCCESS;
14299                 }
14300
14301                 /* look up next entry */
14302                 vm_map_lock_read(map);
14303                 if (! vm_map_lookup_entry(map, start, &entry)) {
14304                         /*
14305                          * There's a new hole in the address range.
14306                          */
14307                         vm_map_unlock_read(map);
14308                         return KERN_INVALID_ADDRESS;
14309                 }
14310         }
14311
14312         vm_map_unlock_read(map);
14313         return KERN_SUCCESS;
14314 }
14315
14316 static boolean_t
14317 vm_map_entry_is_reusable(
14318         vm_map_entry_t entry)
14319 {
14320         /* Only user map entries */
14321
14322         vm_object_t object;
14323
14324         if (entry->is_sub_map) {
14325                 return FALSE;
14326         }
14327
14328         switch (VME_ALIAS(entry)) {
14329         case VM_MEMORY_MALLOC:
14330         case VM_MEMORY_MALLOC_SMALL:
14331         case VM_MEMORY_MALLOC_LARGE:
14332         case VM_MEMORY_REALLOC:
14333         case VM_MEMORY_MALLOC_TINY:
14334         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
14335         case VM_MEMORY_MALLOC_LARGE_REUSED:
14336                 /*
14337                  * This is a malloc() memory region: check if it's still
14338                  * in its original state and can be re-used for more
14339                  * malloc() allocations.
14340                  */
14341                 break;
14342         default:
14343                 /*
14344                  * Not a malloc() memory region: let the caller decide if
14345                  * it's re-usable.
14346                  */
14347                 return TRUE;
14348         }
14349
14350         if (entry->is_shared ||
14351             entry->is_sub_map ||
14352             entry->in_transition ||
14353             entry->protection != VM_PROT_DEFAULT ||
14354             entry->max_protection != VM_PROT_ALL ||
14355             entry->inheritance != VM_INHERIT_DEFAULT ||
14356             entry->no_cache ||
14357             entry->permanent ||
14358             entry->superpage_size != FALSE ||
14359             entry->zero_wired_pages ||
14360             entry->wired_count != 0 ||
14361             entry->user_wired_count != 0) {
14362                 return FALSE;
14363         }
14364
14365         object = VME_OBJECT(entry);
14366         if (object == VM_OBJECT_NULL) {
14367                 return TRUE;
14368         }
14369         if (
14370 #if 0
14371                 /*
14372                  * Let's proceed even if the VM object is potentially
14373                  * shared.
14374                  * We check for this later when processing the actual
14375                  * VM pages, so the contents will be safe if shared.
14376                  *
14377                  * But we can still mark this memory region as "reusable" to
14378                  * acknowledge that the caller did let us know that the memory
14379                  * could be re-used and should not be penalized for holding
14380                  * on to it.  This allows its "resident size" to not include
14381                  * the reusable range.
14382                  */
14383             object->ref_count == 1 &&
14384 #endif
14385             object->wired_page_count == 0 &&
14386             object->copy == VM_OBJECT_NULL &&
14387             object->shadow == VM_OBJECT_NULL &&
14388             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
14389             object->internal &&
14390             !object->true_share &&
14391             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
14392             !object->code_signed) {
14393                 return TRUE;
14394         }
14395         return FALSE;
14396
14397
14398 }
14399
14400 static kern_return_t
14401 vm_map_reuse_pages(
14402         vm_map_t        map,
14403         vm_map_offset_t start,
14404         vm_map_offset_t end)
14405 {
14406         vm_map_entry_t                  entry;
14407         vm_object_t                     object;
14408         vm_object_offset_t              start_offset, end_offset;
14409
14410         /*
14411          * The MADV_REUSE operation doesn't require any changes to the
14412          * vm_map_entry_t's, so the read lock is sufficient.
14413          */
14414
14415         vm_map_lock_read(map);
14416         assert(map->pmap != kernel_pmap);       /* protect alias access */
14417
14418         /*
14419          * The madvise semantics require that the address range be fully
14420          * allocated with no holes.  Otherwise, we're required to return
14421          * an error.
14422          */
14423
14424         if (!vm_map_range_check(map, start, end, &entry)) {
14425                 vm_map_unlock_read(map);
14426                 vm_page_stats_reusable.reuse_pages_failure++;
14427                 return KERN_INVALID_ADDRESS;
14428         }
14429
14430         /*
14431          * Examine each vm_map_entry_t in the range.
14432          */
14433         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14434              entry = entry->vme_next) {
14435                 /*
14436                  * Sanity check on the VM map entry.
14437                  */
14438                 if (! vm_map_entry_is_reusable(entry)) {
14439                         vm_map_unlock_read(map);
14440                         vm_page_stats_reusable.reuse_pages_failure++;
14441                         return KERN_INVALID_ADDRESS;
14442                 }
14443
14444                 /*
14445                  * The first time through, the start address could be anywhere
14446                  * within the vm_map_entry we found.  So adjust the offset to
14447                  * correspond.
14448                  */
14449                 if (entry->vme_start < start) {
14450                         start_offset = start - entry->vme_start;
14451                 } else {
14452                         start_offset = 0;
14453                 }
14454                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14455                 start_offset += VME_OFFSET(entry);
14456                 end_offset += VME_OFFSET(entry);
14457
14458                 assert(!entry->is_sub_map);
14459                 object = VME_OBJECT(entry);
14460                 if (object != VM_OBJECT_NULL) {
14461                         vm_object_lock(object);
14462                         vm_object_reuse_pages(object, start_offset, end_offset,
14463                                               TRUE);
14464                         vm_object_unlock(object);
14465                 }
14466
14467                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
14468                         /*
14469                          * XXX
14470                          * We do not hold the VM map exclusively here.
14471                          * The "alias" field is not that critical, so it's
14472                          * safe to update it here, as long as it is the only
14473                          * one that can be modified while holding the VM map
14474                          * "shared".
14475                          */
14476                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
14477                 }
14478         }
14479
14480         vm_map_unlock_read(map);
14481         vm_page_stats_reusable.reuse_pages_success++;
14482         return KERN_SUCCESS;
14483 }
14484
14485
14486 static kern_return_t
14487 vm_map_reusable_pages(
14488         vm_map_t        map,
14489         vm_map_offset_t start,
14490         vm_map_offset_t end)
14491 {
14492         vm_map_entry_t                  entry;
14493         vm_object_t                     object;
14494         vm_object_offset_t              start_offset, end_offset;
14495         vm_map_offset_t                 pmap_offset;
14496
14497         /*
14498          * The MADV_REUSABLE operation doesn't require any changes to the
14499          * vm_map_entry_t's, so the read lock is sufficient.
14500          */
14501
14502         vm_map_lock_read(map);
14503         assert(map->pmap != kernel_pmap);       /* protect alias access */
14504
14505         /*
14506          * The madvise semantics require that the address range be fully
14507          * allocated with no holes.  Otherwise, we're required to return
14508          * an error.
14509          */
14510
14511         if (!vm_map_range_check(map, start, end, &entry)) {
14512                 vm_map_unlock_read(map);
14513                 vm_page_stats_reusable.reusable_pages_failure++;
14514                 return KERN_INVALID_ADDRESS;
14515         }
14516
14517         /*
14518          * Examine each vm_map_entry_t in the range.
14519          */
14520         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14521              entry = entry->vme_next) {
14522                 int kill_pages = 0;
14523
14524                 /*
14525                  * Sanity check on the VM map entry.
14526                  */
14527                 if (! vm_map_entry_is_reusable(entry)) {
14528                         vm_map_unlock_read(map);
14529                         vm_page_stats_reusable.reusable_pages_failure++;
14530                         return KERN_INVALID_ADDRESS;
14531                 }
14532
14533                 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
14534                         /* not writable: can't discard contents */
14535                         vm_map_unlock_read(map);
14536                         vm_page_stats_reusable.reusable_nonwritable++;
14537                         vm_page_stats_reusable.reusable_pages_failure++;
14538                         return KERN_PROTECTION_FAILURE;
14539                 }
14540
14541                 /*
14542                  * The first time through, the start address could be anywhere
14543                  * within the vm_map_entry we found.  So adjust the offset to
14544                  * correspond.
14545                  */
14546                 if (entry->vme_start < start) {
14547                         start_offset = start - entry->vme_start;
14548                         pmap_offset = start;
14549                 } else {
14550                         start_offset = 0;
14551                         pmap_offset = entry->vme_start;
14552                 }
14553                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14554                 start_offset += VME_OFFSET(entry);
14555                 end_offset += VME_OFFSET(entry);
14556
14557                 assert(!entry->is_sub_map);
14558                 object = VME_OBJECT(entry);
14559                 if (object == VM_OBJECT_NULL)
14560                         continue;
14561
14562
14563                 vm_object_lock(object);
14564                 if (((object->ref_count == 1) ||
14565                      (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
14566                       object->copy == VM_OBJECT_NULL)) &&
14567                     object->shadow == VM_OBJECT_NULL &&
14568                     /*
14569                      * "iokit_acct" entries are billed for their virtual size
14570                      * (rather than for their resident pages only), so they
14571                      * wouldn't benefit from making pages reusable, and it
14572                      * would be hard to keep track of pages that are both
14573                      * "iokit_acct" and "reusable" in the pmap stats and
14574                      * ledgers.
14575                      */
14576                     !(entry->iokit_acct ||
14577                       (!entry->is_sub_map && !entry->use_pmap))) {
14578                         if (object->ref_count != 1) {
14579                                 vm_page_stats_reusable.reusable_shared++;
14580                         }
14581                         kill_pages = 1;
14582                 } else {
14583                         kill_pages = -1;
14584                 }
14585                 if (kill_pages != -1) {
14586                         vm_object_deactivate_pages(object,
14587                                                    start_offset,
14588                                                    end_offset - start_offset,
14589                                                    kill_pages,
14590                                                    TRUE /*reusable_pages*/,
14591                                                    map->pmap,
14592                                                    pmap_offset);
14593                 } else {
14594                         vm_page_stats_reusable.reusable_pages_shared++;
14595                 }
14596                 vm_object_unlock(object);
14597
14598                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
14599                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
14600                         /*
14601                          * XXX
14602                          * We do not hold the VM map exclusively here.
14603                          * The "alias" field is not that critical, so it's
14604                          * safe to update it here, as long as it is the only
14605                          * one that can be modified while holding the VM map
14606                          * "shared".
14607                          */
14608                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
14609                 }
14610         }
14611
14612         vm_map_unlock_read(map);
14613         vm_page_stats_reusable.reusable_pages_success++;
14614         return KERN_SUCCESS;
14615 }
14616
14617
14618 static kern_return_t
14619 vm_map_can_reuse(
14620         vm_map_t        map,
14621         vm_map_offset_t start,
14622         vm_map_offset_t end)
14623 {
14624         vm_map_entry_t                  entry;
14625
14626         /*
14627          * The MADV_REUSABLE operation doesn't require any changes to the
14628          * vm_map_entry_t's, so the read lock is sufficient.
14629          */
14630
14631         vm_map_lock_read(map);
14632         assert(map->pmap != kernel_pmap);       /* protect alias access */
14633
14634         /*
14635          * The madvise semantics require that the address range be fully
14636          * allocated with no holes.  Otherwise, we're required to return
14637          * an error.
14638          */
14639
14640         if (!vm_map_range_check(map, start, end, &entry)) {
14641                 vm_map_unlock_read(map);
14642                 vm_page_stats_reusable.can_reuse_failure++;
14643                 return KERN_INVALID_ADDRESS;
14644         }
14645
14646         /*
14647          * Examine each vm_map_entry_t in the range.
14648          */
14649         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14650              entry = entry->vme_next) {
14651                 /*
14652                  * Sanity check on the VM map entry.
14653                  */
14654                 if (! vm_map_entry_is_reusable(entry)) {
14655                         vm_map_unlock_read(map);
14656                         vm_page_stats_reusable.can_reuse_failure++;
14657                         return KERN_INVALID_ADDRESS;
14658                 }
14659         }
14660
14661         vm_map_unlock_read(map);
14662         vm_page_stats_reusable.can_reuse_success++;
14663         return KERN_SUCCESS;
14664 }
14665
14666
14667 #if MACH_ASSERT
14668 static kern_return_t
14669 vm_map_pageout(
14670         vm_map_t        map,
14671         vm_map_offset_t start,
14672         vm_map_offset_t end)
14673 {
14674         vm_map_entry_t                  entry;
14675
14676         /*
14677          * The MADV_PAGEOUT operation doesn't require any changes to the
14678          * vm_map_entry_t's, so the read lock is sufficient.
14679          */
14680
14681         vm_map_lock_read(map);
14682
14683         /*
14684          * The madvise semantics require that the address range be fully
14685          * allocated with no holes.  Otherwise, we're required to return
14686          * an error.
14687          */
14688
14689         if (!vm_map_range_check(map, start, end, &entry)) {
14690                 vm_map_unlock_read(map);
14691                 return KERN_INVALID_ADDRESS;
14692         }
14693
14694         /*
14695          * Examine each vm_map_entry_t in the range.
14696          */
14697         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14698              entry = entry->vme_next) {
14699                 vm_object_t     object;
14700
14701                 /*
14702                  * Sanity check on the VM map entry.
14703                  */
14704                 if (entry->is_sub_map) {
14705                         vm_map_t submap;
14706                         vm_map_offset_t submap_start;
14707                         vm_map_offset_t submap_end;
14708                         vm_map_entry_t submap_entry;
14709
14710                         submap = VME_SUBMAP(entry);
14711                         submap_start = VME_OFFSET(entry);
14712                         submap_end = submap_start + (entry->vme_end -
14713                                                      entry->vme_start);
14714
14715                         vm_map_lock_read(submap);
14716
14717                         if (! vm_map_range_check(submap,
14718                                                  submap_start,
14719                                                  submap_end,
14720                                                  &submap_entry)) {
14721                                 vm_map_unlock_read(submap);
14722                                 vm_map_unlock_read(map);
14723                                 return KERN_INVALID_ADDRESS;
14724                         }
14725
14726                         object = VME_OBJECT(submap_entry);
14727                         if (submap_entry->is_sub_map ||
14728                             object == VM_OBJECT_NULL ||
14729                             !object->internal) {
14730                                 vm_map_unlock_read(submap);
14731                                 continue;
14732                         }
14733
14734                         vm_object_pageout(object);
14735
14736                         vm_map_unlock_read(submap);
14737                         submap = VM_MAP_NULL;
14738                         submap_entry = VM_MAP_ENTRY_NULL;
14739                         continue;
14740                 }
14741
14742                 object = VME_OBJECT(entry);
14743                 if (entry->is_sub_map ||
14744                     object == VM_OBJECT_NULL ||
14745                     !object->internal) {
14746                         continue;
14747                 }
14748
14749                 vm_object_pageout(object);
14750         }
14751
14752         vm_map_unlock_read(map);
14753         return KERN_SUCCESS;
14754 }
14755 #endif /* MACH_ASSERT */
14756
14757
14758 /*
14759  *      Routine:        vm_map_entry_insert
14760  *
14761  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
14762  */
14763 vm_map_entry_t
14764 vm_map_entry_insert(
14765         vm_map_t                map,
14766         vm_map_entry_t          insp_entry,
14767         vm_map_offset_t         start,
14768         vm_map_offset_t         end,
14769         vm_object_t             object,
14770         vm_object_offset_t      offset,
14771         boolean_t               needs_copy,
14772         boolean_t               is_shared,
14773         boolean_t               in_transition,
14774         vm_prot_t               cur_protection,
14775         vm_prot_t               max_protection,
14776         vm_behavior_t           behavior,
14777         vm_inherit_t            inheritance,
14778         unsigned                wired_count,
14779         boolean_t               no_cache,
14780         boolean_t               permanent,
14781         unsigned int            superpage_size,
14782         boolean_t               clear_map_aligned,
14783         boolean_t               is_submap,
14784         boolean_t               used_for_jit,
14785         int                     alias)
14786 {
14787         vm_map_entry_t  new_entry;
14788
14789         assert(insp_entry != (vm_map_entry_t)0);
14790
14791 #if DEVELOPMENT || DEBUG
14792         vm_object_offset_t      end_offset = 0;
14793         assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
14794 #endif /* DEVELOPMENT || DEBUG */
14795
14796         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
14797
14798         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
14799                 new_entry->map_aligned = TRUE;
14800         } else {
14801                 new_entry->map_aligned = FALSE;
14802         }
14803         if (clear_map_aligned &&
14804             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
14805              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
14806                 new_entry->map_aligned = FALSE;
14807         }
14808
14809         new_entry->vme_start = start;
14810         new_entry->vme_end = end;
14811         assert(page_aligned(new_entry->vme_start));
14812         assert(page_aligned(new_entry->vme_end));
14813         if (new_entry->map_aligned) {
14814                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
14815                                            VM_MAP_PAGE_MASK(map)));
14816                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
14817                                            VM_MAP_PAGE_MASK(map)));
14818         }
14819         assert(new_entry->vme_start < new_entry->vme_end);
14820
14821         VME_OBJECT_SET(new_entry, object);
14822         VME_OFFSET_SET(new_entry, offset);
14823         new_entry->is_shared = is_shared;
14824         new_entry->is_sub_map = is_submap;
14825         new_entry->needs_copy = needs_copy;
14826         new_entry->in_transition = in_transition;
14827         new_entry->needs_wakeup = FALSE;
14828         new_entry->inheritance = inheritance;
14829         new_entry->protection = cur_protection;
14830         new_entry->max_protection = max_protection;
14831         new_entry->behavior = behavior;
14832         new_entry->wired_count = wired_count;
14833         new_entry->user_wired_count = 0;
14834         if (is_submap) {
14835                 /*
14836                  * submap: "use_pmap" means "nested".
14837                  * default: false.
14838                  */
14839                 new_entry->use_pmap = FALSE;
14840         } else {
14841                 /*
14842                  * object: "use_pmap" means "use pmap accounting" for footprint.
14843                  * default: true.
14844                  */
14845                 new_entry->use_pmap = TRUE;
14846         }
14847         VME_ALIAS_SET(new_entry, alias);
14848         new_entry->zero_wired_pages = FALSE;
14849         new_entry->no_cache = no_cache;
14850         new_entry->permanent = permanent;
14851         if (superpage_size)
14852                 new_entry->superpage_size = TRUE;
14853         else
14854                 new_entry->superpage_size = FALSE;
14855         if (used_for_jit){
14856                 if (!(map->jit_entry_exists)){
14857                         new_entry->used_for_jit = TRUE;
14858                         map->jit_entry_exists = TRUE;
14859
14860                         /* Tell the pmap that it supports JIT. */
14861                         pmap_set_jit_entitled(map->pmap);
14862                 }
14863         } else {
14864                 new_entry->used_for_jit = FALSE;
14865         }
14866         new_entry->iokit_acct = FALSE;
14867         new_entry->vme_resilient_codesign = FALSE;
14868         new_entry->vme_resilient_media = FALSE;
14869         new_entry->vme_atomic = FALSE;
14870
14871         /*
14872          *      Insert the new entry into the list.
14873          */
14874
14875         vm_map_store_entry_link(map, insp_entry, new_entry);
14876         map->size += end - start;
14877
14878         /*
14879          *      Update the free space hint and the lookup hint.
14880          */
14881
14882         SAVE_HINT_MAP_WRITE(map, new_entry);
14883         return new_entry;
14884 }
14885
14886 /*
14887  *      Routine:        vm_map_remap_extract
14888  *
14889  *      Descritpion:    This routine returns a vm_entry list from a map.
14890  */
14891 static kern_return_t
14892 vm_map_remap_extract(
14893         vm_map_t                map,
14894         vm_map_offset_t         addr,
14895         vm_map_size_t           size,
14896         boolean_t               copy,
14897         struct vm_map_header    *map_header,
14898         vm_prot_t               *cur_protection,
14899         vm_prot_t               *max_protection,
14900         /* What, no behavior? */
14901         vm_inherit_t            inheritance,
14902         boolean_t               pageable,
14903         boolean_t               same_map,
14904         vm_map_kernel_flags_t   vmk_flags)
14905 {
14906         kern_return_t           result;
14907         vm_map_size_t           mapped_size;
14908         vm_map_size_t           tmp_size;
14909         vm_map_entry_t          src_entry;     /* result of last map lookup */
14910         vm_map_entry_t          new_entry;
14911         vm_object_offset_t      offset;
14912         vm_map_offset_t         map_address;
14913         vm_map_offset_t         src_start;     /* start of entry to map */
14914         vm_map_offset_t         src_end;       /* end of region to be mapped */
14915         vm_object_t             object;
14916         vm_map_version_t        version;
14917         boolean_t               src_needs_copy;
14918         boolean_t               new_entry_needs_copy;
14919         vm_map_entry_t          saved_src_entry;
14920         boolean_t               src_entry_was_wired;
14921
14922         assert(map != VM_MAP_NULL);
14923         assert(size != 0);
14924         assert(size == vm_map_round_page(size, PAGE_MASK));
14925         assert(inheritance == VM_INHERIT_NONE ||
14926                inheritance == VM_INHERIT_COPY ||
14927                inheritance == VM_INHERIT_SHARE);
14928
14929         /*
14930          *      Compute start and end of region.
14931          */
14932         src_start = vm_map_trunc_page(addr, PAGE_MASK);
14933         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
14934
14935
14936         /*
14937          *      Initialize map_header.
14938          */
14939         map_header->links.next = (struct vm_map_entry *)&map_header->links;
14940         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
14941         map_header->nentries = 0;
14942         map_header->entries_pageable = pageable;
14943         map_header->page_shift = PAGE_SHIFT;
14944
14945         vm_map_store_init( map_header );
14946
14947         *cur_protection = VM_PROT_ALL;
14948         *max_protection = VM_PROT_ALL;
14949
14950         map_address = 0;
14951         mapped_size = 0;
14952         result = KERN_SUCCESS;
14953
14954         /*
14955          *      The specified source virtual space might correspond to
14956          *      multiple map entries, need to loop on them.
14957          */
14958         vm_map_lock(map);
14959         while (mapped_size != size) {
14960                 vm_map_size_t   entry_size;
14961
14962                 /*
14963                  *      Find the beginning of the region.
14964                  */
14965                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
14966                         result = KERN_INVALID_ADDRESS;
14967                         break;
14968                 }
14969
14970                 if (src_start < src_entry->vme_start ||
14971                     (mapped_size && src_start != src_entry->vme_start)) {
14972                         result = KERN_INVALID_ADDRESS;
14973                         break;
14974                 }
14975
14976                 tmp_size = size - mapped_size;
14977                 if (src_end > src_entry->vme_end)
14978                         tmp_size -= (src_end - src_entry->vme_end);
14979
14980                 entry_size = (vm_map_size_t)(src_entry->vme_end -
14981                                              src_entry->vme_start);
14982
14983                 if(src_entry->is_sub_map) {
14984                         vm_map_reference(VME_SUBMAP(src_entry));
14985                         object = VM_OBJECT_NULL;
14986                 } else {
14987                         object = VME_OBJECT(src_entry);
14988                         if (src_entry->iokit_acct) {
14989                                 /*
14990                                  * This entry uses "IOKit accounting".
14991                                  */
14992                         } else if (object != VM_OBJECT_NULL &&
14993                                    object->purgable != VM_PURGABLE_DENY) {
14994                                 /*
14995                                  * Purgeable objects have their own accounting:
14996                                  * no pmap accounting for them.
14997                                  */
14998                                 assertf(!src_entry->use_pmap,
14999                                         "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15000                                         map,
15001                                         src_entry,
15002                                         (uint64_t)src_entry->vme_start,
15003                                         (uint64_t)src_entry->vme_end,
15004                                         src_entry->protection,
15005                                         src_entry->max_protection,
15006                                         VME_ALIAS(src_entry));
15007                         } else {
15008                                 /*
15009                                  * Not IOKit or purgeable:
15010                                  * must be accounted by pmap stats.
15011                                  */
15012                                 assertf(src_entry->use_pmap,
15013                                         "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15014                                         map,
15015                                         src_entry,
15016                                         (uint64_t)src_entry->vme_start,
15017                                         (uint64_t)src_entry->vme_end,
15018                                         src_entry->protection,
15019                                         src_entry->max_protection,
15020                                         VME_ALIAS(src_entry));
15021                         }
15022
15023                         if (object == VM_OBJECT_NULL) {
15024                                 object = vm_object_allocate(entry_size);
15025                                 VME_OFFSET_SET(src_entry, 0);
15026                                 VME_OBJECT_SET(src_entry, object);
15027                                 assert(src_entry->use_pmap);
15028                         } else if (object->copy_strategy !=
15029                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
15030                                 /*
15031                                  *      We are already using an asymmetric
15032                                  *      copy, and therefore we already have
15033                                  *      the right object.
15034                                  */
15035                                 assert(!src_entry->needs_copy);
15036                         } else if (src_entry->needs_copy || object->shadowed ||
15037                                    (object->internal && !object->true_share &&
15038                                     !src_entry->is_shared &&
15039                                     object->vo_size > entry_size)) {
15040
15041                                 VME_OBJECT_SHADOW(src_entry, entry_size);
15042                                 assert(src_entry->use_pmap);
15043
15044                                 if (!src_entry->needs_copy &&
15045                                     (src_entry->protection & VM_PROT_WRITE)) {
15046                                         vm_prot_t prot;
15047
15048                                         assert(!pmap_has_prot_policy(src_entry->protection));
15049
15050                                         prot = src_entry->protection & ~VM_PROT_WRITE;
15051
15052                                         if (override_nx(map,
15053                                                         VME_ALIAS(src_entry))
15054                                             && prot)
15055                                                 prot |= VM_PROT_EXECUTE;
15056
15057                                         assert(!pmap_has_prot_policy(prot));
15058
15059                                         if(map->mapped_in_other_pmaps) {
15060                                                 vm_object_pmap_protect(
15061                                                         VME_OBJECT(src_entry),
15062                                                         VME_OFFSET(src_entry),
15063                                                         entry_size,
15064                                                         PMAP_NULL,
15065                                                         src_entry->vme_start,
15066                                                         prot);
15067                                         } else {
15068                                                 pmap_protect(vm_map_pmap(map),
15069                                                              src_entry->vme_start,
15070                                                              src_entry->vme_end,
15071                                                              prot);
15072                                         }
15073                                 }
15074
15075                                 object = VME_OBJECT(src_entry);
15076                                 src_entry->needs_copy = FALSE;
15077                         }
15078
15079
15080                         vm_object_lock(object);
15081                         vm_object_reference_locked(object); /* object ref. for new entry */
15082                         if (object->copy_strategy ==
15083                             MEMORY_OBJECT_COPY_SYMMETRIC) {
15084                                 object->copy_strategy =
15085                                         MEMORY_OBJECT_COPY_DELAY;
15086                         }
15087                         vm_object_unlock(object);
15088                 }
15089
15090                 offset = (VME_OFFSET(src_entry) +
15091                           (src_start - src_entry->vme_start));
15092
15093                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15094                 vm_map_entry_copy(new_entry, src_entry);
15095                 if (new_entry->is_sub_map) {
15096                         /* clr address space specifics */
15097                         new_entry->use_pmap = FALSE;
15098                 } else if (copy) {
15099                         /*
15100                          * We're dealing with a copy-on-write operation,
15101                          * so the resulting mapping should not inherit the
15102                          * original mapping's accounting settings.
15103                          * "use_pmap" should be reset to its default (TRUE)
15104                          * so that the new mapping gets accounted for in
15105                          * the task's memory footprint.
15106                          */
15107                         new_entry->use_pmap = TRUE;
15108                 }
15109                 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15110                 assert(!new_entry->iokit_acct);
15111
15112                 new_entry->map_aligned = FALSE;
15113
15114                 new_entry->vme_start = map_address;
15115                 new_entry->vme_end = map_address + tmp_size;
15116                 assert(new_entry->vme_start < new_entry->vme_end);
15117                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15118                         /*
15119                          * Remapping for vm_map_protect(VM_PROT_COPY)
15120                          * to convert a read-only mapping into a
15121                          * copy-on-write version of itself but
15122                          * with write access:
15123                          * keep the original inheritance and add
15124                          * VM_PROT_WRITE to the max protection.
15125                          */
15126                         new_entry->inheritance = src_entry->inheritance;
15127                         new_entry->max_protection |= VM_PROT_WRITE;
15128                 } else {
15129                         new_entry->inheritance = inheritance;
15130                 }
15131                 VME_OFFSET_SET(new_entry, offset);
15132
15133                 /*
15134                  * The new region has to be copied now if required.
15135                  */
15136         RestartCopy:
15137                 if (!copy) {
15138                         /*
15139                          * Cannot allow an entry describing a JIT
15140                          * region to be shared across address spaces.
15141                          */
15142                         if (src_entry->used_for_jit == TRUE && !same_map) {
15143                                 result = KERN_INVALID_ARGUMENT;
15144                                 break;
15145                         }
15146                         src_entry->is_shared = TRUE;
15147                         new_entry->is_shared = TRUE;
15148                         if (!(new_entry->is_sub_map))
15149                                 new_entry->needs_copy = FALSE;
15150
15151                 } else if (src_entry->is_sub_map) {
15152                         /* make this a COW sub_map if not already */
15153                         assert(new_entry->wired_count == 0);
15154                         new_entry->needs_copy = TRUE;
15155                         object = VM_OBJECT_NULL;
15156                 } else if (src_entry->wired_count == 0 &&
15157                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
15158                                                   VME_OFFSET(new_entry),
15159                                                   (new_entry->vme_end -
15160                                                    new_entry->vme_start),
15161                                                   &src_needs_copy,
15162                                                   &new_entry_needs_copy)) {
15163
15164                         new_entry->needs_copy = new_entry_needs_copy;
15165                         new_entry->is_shared = FALSE;
15166                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15167
15168                         /*
15169                          * Handle copy_on_write semantics.
15170                          */
15171                         if (src_needs_copy && !src_entry->needs_copy) {
15172                                 vm_prot_t prot;
15173
15174                                 assert(!pmap_has_prot_policy(src_entry->protection));
15175
15176                                 prot = src_entry->protection & ~VM_PROT_WRITE;
15177
15178                                 if (override_nx(map,
15179                                                 VME_ALIAS(src_entry))
15180                                     && prot)
15181                                         prot |= VM_PROT_EXECUTE;
15182
15183                                 assert(!pmap_has_prot_policy(prot));
15184
15185                                 vm_object_pmap_protect(object,
15186                                                        offset,
15187                                                        entry_size,
15188                                                        ((src_entry->is_shared
15189                                                          || map->mapped_in_other_pmaps) ?
15190                                                         PMAP_NULL : map->pmap),
15191                                                        src_entry->vme_start,
15192                                                        prot);
15193
15194                                 assert(src_entry->wired_count == 0);
15195                                 src_entry->needs_copy = TRUE;
15196                         }
15197                         /*
15198                          * Throw away the old object reference of the new entry.
15199                          */
15200                         vm_object_deallocate(object);
15201
15202                 } else {
15203                         new_entry->is_shared = FALSE;
15204                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15205
15206                         src_entry_was_wired = (src_entry->wired_count > 0);
15207                         saved_src_entry = src_entry;
15208                         src_entry = VM_MAP_ENTRY_NULL;
15209
15210                         /*
15211                          * The map can be safely unlocked since we
15212                          * already hold a reference on the object.
15213                          *
15214                          * Record the timestamp of the map for later
15215                          * verification, and unlock the map.
15216                          */
15217                         version.main_timestamp = map->timestamp;
15218                         vm_map_unlock(map);     /* Increments timestamp once! */
15219
15220                         /*
15221                          * Perform the copy.
15222                          */
15223                         if (src_entry_was_wired > 0) {
15224                                 vm_object_lock(object);
15225                                 result = vm_object_copy_slowly(
15226                                         object,
15227                                         offset,
15228                                         (new_entry->vme_end -
15229                                         new_entry->vme_start),
15230                                         THREAD_UNINT,
15231                                         &VME_OBJECT(new_entry));
15232
15233                                 VME_OFFSET_SET(new_entry, 0);
15234                                 new_entry->needs_copy = FALSE;
15235                         } else {
15236                                 vm_object_offset_t new_offset;
15237
15238                                 new_offset = VME_OFFSET(new_entry);
15239                                 result = vm_object_copy_strategically(
15240                                         object,
15241                                         offset,
15242                                         (new_entry->vme_end -
15243                                         new_entry->vme_start),
15244                                         &VME_OBJECT(new_entry),
15245                                         &new_offset,
15246                                         &new_entry_needs_copy);
15247                                 if (new_offset != VME_OFFSET(new_entry)) {
15248                                         VME_OFFSET_SET(new_entry, new_offset);
15249                                 }
15250
15251                                 new_entry->needs_copy = new_entry_needs_copy;
15252                         }
15253
15254                         /*
15255                          * Throw away the old object reference of the new entry.
15256                          */
15257                         vm_object_deallocate(object);
15258
15259                         if (result != KERN_SUCCESS &&
15260                             result != KERN_MEMORY_RESTART_COPY) {
15261                                 _vm_map_entry_dispose(map_header, new_entry);
15262                                 vm_map_lock(map);
15263                                 break;
15264                         }
15265
15266                         /*
15267                          * Verify that the map has not substantially
15268                          * changed while the copy was being made.
15269                          */
15270
15271                         vm_map_lock(map);
15272                         if (version.main_timestamp + 1 != map->timestamp) {
15273                                 /*
15274                                  * Simple version comparison failed.
15275                                  *
15276                                  * Retry the lookup and verify that the
15277                                  * same object/offset are still present.
15278                                  */
15279                                 saved_src_entry = VM_MAP_ENTRY_NULL;
15280                                 vm_object_deallocate(VME_OBJECT(new_entry));
15281                                 _vm_map_entry_dispose(map_header, new_entry);
15282                                 if (result == KERN_MEMORY_RESTART_COPY)
15283                                         result = KERN_SUCCESS;
15284                                 continue;
15285                         }
15286                         /* map hasn't changed: src_entry is still valid */
15287                         src_entry = saved_src_entry;
15288                         saved_src_entry = VM_MAP_ENTRY_NULL;
15289
15290                         if (result == KERN_MEMORY_RESTART_COPY) {
15291                                 vm_object_reference(object);
15292                                 goto RestartCopy;
15293                         }
15294                 }
15295
15296                 _vm_map_store_entry_link(map_header,
15297                                    map_header->links.prev, new_entry);
15298
15299                 /*Protections for submap mapping are irrelevant here*/
15300                 if( !src_entry->is_sub_map ) {
15301                         *cur_protection &= src_entry->protection;
15302                         *max_protection &= src_entry->max_protection;
15303                 }
15304                 map_address += tmp_size;
15305                 mapped_size += tmp_size;
15306                 src_start += tmp_size;
15307
15308         } /* end while */
15309
15310         vm_map_unlock(map);
15311         if (result != KERN_SUCCESS) {
15312                 /*
15313                  * Free all allocated elements.
15314                  */
15315                 for (src_entry = map_header->links.next;
15316                      src_entry != (struct vm_map_entry *)&map_header->links;
15317                      src_entry = new_entry) {
15318                         new_entry = src_entry->vme_next;
15319                         _vm_map_store_entry_unlink(map_header, src_entry);
15320                         if (src_entry->is_sub_map) {
15321                                 vm_map_deallocate(VME_SUBMAP(src_entry));
15322                         } else {
15323                                 vm_object_deallocate(VME_OBJECT(src_entry));
15324                         }
15325                         _vm_map_entry_dispose(map_header, src_entry);
15326                 }
15327         }
15328         return result;
15329 }
15330
15331 /*
15332  *      Routine:        vm_remap
15333  *
15334  *                      Map portion of a task's address space.
15335  *                      Mapped region must not overlap more than
15336  *                      one vm memory object. Protections and
15337  *                      inheritance attributes remain the same
15338  *                      as in the original task and are out parameters.
15339  *                      Source and Target task can be identical
15340  *                      Other attributes are identical as for vm_map()
15341  */
15342 kern_return_t
15343 vm_map_remap(
15344         vm_map_t                target_map,
15345         vm_map_address_t        *address,
15346         vm_map_size_t           size,
15347         vm_map_offset_t         mask,
15348         int                     flags,
15349         vm_map_kernel_flags_t   vmk_flags,
15350         vm_tag_t                tag,
15351         vm_map_t                src_map,
15352         vm_map_offset_t         memory_address,
15353         boolean_t               copy,
15354         vm_prot_t               *cur_protection,
15355         vm_prot_t               *max_protection,
15356         vm_inherit_t            inheritance)
15357 {
15358         kern_return_t           result;
15359         vm_map_entry_t          entry;
15360         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
15361         vm_map_entry_t          new_entry;
15362         struct vm_map_header    map_header;
15363         vm_map_offset_t         offset_in_mapping;
15364
15365         if (target_map == VM_MAP_NULL)
15366                 return KERN_INVALID_ARGUMENT;
15367
15368         switch (inheritance) {
15369         case VM_INHERIT_NONE:
15370         case VM_INHERIT_COPY:
15371         case VM_INHERIT_SHARE:
15372                 if (size != 0 && src_map != VM_MAP_NULL)
15373                         break;
15374                 /*FALL THRU*/
15375         default:
15376                 return KERN_INVALID_ARGUMENT;
15377         }
15378
15379         /*
15380          * If the user is requesting that we return the address of the
15381          * first byte of the data (rather than the base of the page),
15382          * then we use different rounding semantics: specifically,
15383          * we assume that (memory_address, size) describes a region
15384          * all of whose pages we must cover, rather than a base to be truncated
15385          * down and a size to be added to that base.  So we figure out
15386          * the highest page that the requested region includes and make
15387          * sure that the size will cover it.
15388          *
15389          * The key example we're worried about it is of the form:
15390          *
15391          *              memory_address = 0x1ff0, size = 0x20
15392          *
15393          * With the old semantics, we round down the memory_address to 0x1000
15394          * and round up the size to 0x1000, resulting in our covering *only*
15395          * page 0x1000.  With the new semantics, we'd realize that the region covers
15396          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
15397          * 0x1000 and page 0x2000 in the region we remap.
15398          */
15399         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15400                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
15401                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
15402         } else {
15403                 size = vm_map_round_page(size, PAGE_MASK);
15404         }
15405         if (size == 0) {
15406                 return KERN_INVALID_ARGUMENT;
15407         }
15408
15409         result = vm_map_remap_extract(src_map, memory_address,
15410                                       size, copy, &map_header,
15411                                       cur_protection,
15412                                       max_protection,
15413                                       inheritance,
15414                                       target_map->hdr.entries_pageable,
15415                                       src_map == target_map,
15416                                       vmk_flags);
15417
15418         if (result != KERN_SUCCESS) {
15419                 return result;
15420         }
15421
15422         /*
15423          * Allocate/check a range of free virtual address
15424          * space for the target
15425          */
15426         *address = vm_map_trunc_page(*address,
15427                                      VM_MAP_PAGE_MASK(target_map));
15428         vm_map_lock(target_map);
15429         result = vm_map_remap_range_allocate(target_map, address, size,
15430                                              mask, flags, vmk_flags, tag,
15431                                              &insp_entry);
15432
15433         for (entry = map_header.links.next;
15434              entry != (struct vm_map_entry *)&map_header.links;
15435              entry = new_entry) {
15436                 new_entry = entry->vme_next;
15437                 _vm_map_store_entry_unlink(&map_header, entry);
15438                 if (result == KERN_SUCCESS) {
15439                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15440                                 /* no codesigning -> read-only access */
15441                                 assert(!entry->used_for_jit);
15442                                 entry->max_protection = VM_PROT_READ;
15443                                 entry->protection = VM_PROT_READ;
15444                                 entry->vme_resilient_codesign = TRUE;
15445                         }
15446                         entry->vme_start += *address;
15447                         entry->vme_end += *address;
15448                         assert(!entry->map_aligned);
15449                         vm_map_store_entry_link(target_map, insp_entry, entry);
15450                         insp_entry = entry;
15451                 } else {
15452                         if (!entry->is_sub_map) {
15453                                 vm_object_deallocate(VME_OBJECT(entry));
15454                         } else {
15455                                 vm_map_deallocate(VME_SUBMAP(entry));
15456                         }
15457                         _vm_map_entry_dispose(&map_header, entry);
15458                 }
15459         }
15460
15461         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15462                 *cur_protection = VM_PROT_READ;
15463                 *max_protection = VM_PROT_READ;
15464         }
15465
15466         if( target_map->disable_vmentry_reuse == TRUE) {
15467                 assert(!target_map->is_nested_map);
15468                 if( target_map->highest_entry_end < insp_entry->vme_end ){
15469                         target_map->highest_entry_end = insp_entry->vme_end;
15470                 }
15471         }
15472
15473         if (result == KERN_SUCCESS) {
15474                 target_map->size += size;
15475                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
15476
15477         }
15478         vm_map_unlock(target_map);
15479
15480         if (result == KERN_SUCCESS && target_map->wiring_required)
15481                 result = vm_map_wire_kernel(target_map, *address,
15482                                      *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
15483                                      TRUE);
15484
15485         /*
15486          * If requested, return the address of the data pointed to by the
15487          * request, rather than the base of the resulting page.
15488          */
15489         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15490                 *address += offset_in_mapping;
15491         }
15492
15493         return result;
15494 }
15495
15496 /*
15497  *      Routine:        vm_map_remap_range_allocate
15498  *
15499  *      Description:
15500  *              Allocate a range in the specified virtual address map.
15501  *              returns the address and the map entry just before the allocated
15502  *              range
15503  *
15504  *      Map must be locked.
15505  */
15506
15507 static kern_return_t
15508 vm_map_remap_range_allocate(
15509         vm_map_t                map,
15510         vm_map_address_t        *address,       /* IN/OUT */
15511         vm_map_size_t           size,
15512         vm_map_offset_t         mask,
15513         int                     flags,
15514         __unused vm_map_kernel_flags_t  vmk_flags,
15515         __unused vm_tag_t       tag,
15516         vm_map_entry_t          *map_entry)     /* OUT */
15517 {
15518         vm_map_entry_t  entry;
15519         vm_map_offset_t start;
15520         vm_map_offset_t end;
15521         kern_return_t   kr;
15522         vm_map_entry_t          hole_entry;
15523
15524 StartAgain: ;
15525
15526         start = *address;
15527
15528         if (flags & VM_FLAGS_ANYWHERE)
15529         {
15530                 if (flags & VM_FLAGS_RANDOM_ADDR)
15531                 {
15532                         /*
15533                          * Get a random start address.
15534                          */
15535                         kr = vm_map_random_address_for_size(map, address, size);
15536                         if (kr != KERN_SUCCESS) {
15537                                 return(kr);
15538                         }
15539                         start = *address;
15540                 }
15541
15542                 /*
15543                  *      Calculate the first possible address.
15544                  */
15545
15546                 if (start < map->min_offset)
15547                         start = map->min_offset;
15548                 if (start > map->max_offset)
15549                         return(KERN_NO_SPACE);
15550
15551                 /*
15552                  *      Look for the first possible address;
15553                  *      if there's already something at this
15554                  *      address, we have to start after it.
15555                  */
15556
15557                 if( map->disable_vmentry_reuse == TRUE) {
15558                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
15559                 } else {
15560
15561                         if (map->holelistenabled) {
15562                                 hole_entry = (vm_map_entry_t)map->holes_list;
15563
15564                                 if (hole_entry == NULL) {
15565                                         /*
15566                                          * No more space in the map?
15567                                          */
15568                                         return(KERN_NO_SPACE);
15569                                 } else {
15570
15571                                         boolean_t found_hole = FALSE;
15572
15573                                         do {
15574                                                 if (hole_entry->vme_start >= start) {
15575                                                         start = hole_entry->vme_start;
15576                                                         found_hole = TRUE;
15577                                                         break;
15578                                                 }
15579
15580                                                 if (hole_entry->vme_end > start) {
15581                                                         found_hole = TRUE;
15582                                                         break;
15583                                                 }
15584                                                 hole_entry = hole_entry->vme_next;
15585
15586                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
15587
15588                                         if (found_hole == FALSE) {
15589                                                 return (KERN_NO_SPACE);
15590                                         }
15591
15592                                         entry = hole_entry;
15593                                 }
15594                         } else {
15595                                 assert(first_free_is_valid(map));
15596                                 if (start == map->min_offset) {
15597                                         if ((entry = map->first_free) != vm_map_to_entry(map))
15598                                                 start = entry->vme_end;
15599                                 } else {
15600                                         vm_map_entry_t  tmp_entry;
15601                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
15602                                                 start = tmp_entry->vme_end;
15603                                         entry = tmp_entry;
15604                                 }
15605                         }
15606                         start = vm_map_round_page(start,
15607                                                   VM_MAP_PAGE_MASK(map));
15608                 }
15609
15610                 /*
15611                  *      In any case, the "entry" always precedes
15612                  *      the proposed new region throughout the
15613                  *      loop:
15614                  */
15615
15616                 while (TRUE) {
15617                         vm_map_entry_t  next;
15618
15619                         /*
15620                          *      Find the end of the proposed new region.
15621                          *      Be sure we didn't go beyond the end, or
15622                          *      wrap around the address.
15623                          */
15624
15625                         end = ((start + mask) & ~mask);
15626                         end = vm_map_round_page(end,
15627                                                 VM_MAP_PAGE_MASK(map));
15628                         if (end < start)
15629                                 return(KERN_NO_SPACE);
15630                         start = end;
15631                         end += size;
15632
15633                         if ((end > map->max_offset) || (end < start)) {
15634                                 if (map->wait_for_space) {
15635                                         if (size <= (map->max_offset -
15636                                                      map->min_offset)) {
15637                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
15638                                                 vm_map_unlock(map);
15639                                                 thread_block(THREAD_CONTINUE_NULL);
15640                                                 vm_map_lock(map);
15641                                                 goto StartAgain;
15642                                         }
15643                                 }
15644
15645                                 return(KERN_NO_SPACE);
15646                         }
15647
15648                         next = entry->vme_next;
15649
15650                         if (map->holelistenabled) {
15651                                 if (entry->vme_end >= end)
15652                                         break;
15653                         } else {
15654                                 /*
15655                                  *      If there are no more entries, we must win.
15656                                  *
15657                                  *      OR
15658                                  *
15659                                  *      If there is another entry, it must be
15660                                  *      after the end of the potential new region.
15661                                  */
15662
15663                                 if (next == vm_map_to_entry(map))
15664                                         break;
15665
15666                                 if (next->vme_start >= end)
15667                                         break;
15668                         }
15669
15670                         /*
15671                          *      Didn't fit -- move to the next entry.
15672                          */
15673
15674                         entry = next;
15675
15676                         if (map->holelistenabled) {
15677                                 if (entry == (vm_map_entry_t) map->holes_list) {
15678                                         /*
15679                                          * Wrapped around
15680                                          */
15681                                         return(KERN_NO_SPACE);
15682                                 }
15683                                 start = entry->vme_start;
15684                         } else {
15685                                 start = entry->vme_end;
15686                         }
15687                 }
15688
15689                 if (map->holelistenabled) {
15690
15691                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
15692                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
15693                         }
15694                 }
15695
15696                 *address = start;
15697
15698         } else {
15699                 vm_map_entry_t          temp_entry;
15700
15701                 /*
15702                  *      Verify that:
15703                  *              the address doesn't itself violate
15704                  *              the mask requirement.
15705                  */
15706
15707                 if ((start & mask) != 0)
15708                         return(KERN_NO_SPACE);
15709
15710
15711                 /*
15712                  *      ...     the address is within bounds
15713                  */
15714
15715                 end = start + size;
15716
15717                 if ((start < map->min_offset) ||
15718                     (end > map->max_offset) ||
15719                     (start >= end)) {
15720                         return(KERN_INVALID_ADDRESS);
15721                 }
15722
15723                 /*
15724                  * If we're asked to overwrite whatever was mapped in that
15725                  * range, first deallocate that range.
15726                  */
15727                 if (flags & VM_FLAGS_OVERWRITE) {
15728                         vm_map_t zap_map;
15729
15730                         /*
15731                          * We use a "zap_map" to avoid having to unlock
15732                          * the "map" in vm_map_delete(), which would compromise
15733                          * the atomicity of the "deallocate" and then "remap"
15734                          * combination.
15735                          */
15736                         zap_map = vm_map_create(PMAP_NULL,
15737                                                 start,
15738                                                 end,
15739                                                 map->hdr.entries_pageable);
15740                         if (zap_map == VM_MAP_NULL) {
15741                                 return KERN_RESOURCE_SHORTAGE;
15742                         }
15743                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
15744                         vm_map_disable_hole_optimization(zap_map);
15745
15746                         kr = vm_map_delete(map, start, end,
15747                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
15748                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
15749                                            zap_map);
15750                         if (kr == KERN_SUCCESS) {
15751                                 vm_map_destroy(zap_map,
15752                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15753                                 zap_map = VM_MAP_NULL;
15754                         }
15755                 }
15756
15757                 /*
15758                  *      ...     the starting address isn't allocated
15759                  */
15760
15761                 if (vm_map_lookup_entry(map, start, &temp_entry))
15762                         return(KERN_NO_SPACE);
15763
15764                 entry = temp_entry;
15765
15766                 /*
15767                  *      ...     the next region doesn't overlap the
15768                  *              end point.
15769                  */
15770
15771                 if ((entry->vme_next != vm_map_to_entry(map)) &&
15772                     (entry->vme_next->vme_start < end))
15773                         return(KERN_NO_SPACE);
15774         }
15775         *map_entry = entry;
15776         return(KERN_SUCCESS);
15777 }
15778
15779 /*
15780  *      vm_map_switch:
15781  *
15782  *      Set the address map for the current thread to the specified map
15783  */
15784
15785 vm_map_t
15786 vm_map_switch(
15787         vm_map_t        map)
15788 {
15789         int             mycpu;
15790         thread_t        thread = current_thread();
15791         vm_map_t        oldmap = thread->map;
15792
15793         mp_disable_preemption();
15794         mycpu = cpu_number();
15795
15796         /*
15797          *      Deactivate the current map and activate the requested map
15798          */
15799         PMAP_SWITCH_USER(thread, map, mycpu);
15800
15801         mp_enable_preemption();
15802         return(oldmap);
15803 }
15804
15805
15806 /*
15807  *      Routine:        vm_map_write_user
15808  *
15809  *      Description:
15810  *              Copy out data from a kernel space into space in the
15811  *              destination map. The space must already exist in the
15812  *              destination map.
15813  *              NOTE:  This routine should only be called by threads
15814  *              which can block on a page fault. i.e. kernel mode user
15815  *              threads.
15816  *
15817  */
15818 kern_return_t
15819 vm_map_write_user(
15820         vm_map_t                map,
15821         void                    *src_p,
15822         vm_map_address_t        dst_addr,
15823         vm_size_t               size)
15824 {
15825         kern_return_t   kr = KERN_SUCCESS;
15826
15827         if(current_map() == map) {
15828                 if (copyout(src_p, dst_addr, size)) {
15829                         kr = KERN_INVALID_ADDRESS;
15830                 }
15831         } else {
15832                 vm_map_t        oldmap;
15833
15834                 /* take on the identity of the target map while doing */
15835                 /* the transfer */
15836
15837                 vm_map_reference(map);
15838                 oldmap = vm_map_switch(map);
15839                 if (copyout(src_p, dst_addr, size)) {
15840                         kr = KERN_INVALID_ADDRESS;
15841                 }
15842                 vm_map_switch(oldmap);
15843                 vm_map_deallocate(map);
15844         }
15845         return kr;
15846 }
15847
15848 /*
15849  *      Routine:        vm_map_read_user
15850  *
15851  *      Description:
15852  *              Copy in data from a user space source map into the
15853  *              kernel map. The space must already exist in the
15854  *              kernel map.
15855  *              NOTE:  This routine should only be called by threads
15856  *              which can block on a page fault. i.e. kernel mode user
15857  *              threads.
15858  *
15859  */
15860 kern_return_t
15861 vm_map_read_user(
15862         vm_map_t                map,
15863         vm_map_address_t        src_addr,
15864         void                    *dst_p,
15865         vm_size_t               size)
15866 {
15867         kern_return_t   kr = KERN_SUCCESS;
15868
15869         if(current_map() == map) {
15870                 if (copyin(src_addr, dst_p, size)) {
15871                         kr = KERN_INVALID_ADDRESS;
15872                 }
15873         } else {
15874                 vm_map_t        oldmap;
15875
15876                 /* take on the identity of the target map while doing */
15877                 /* the transfer */
15878
15879                 vm_map_reference(map);
15880                 oldmap = vm_map_switch(map);
15881                 if (copyin(src_addr, dst_p, size)) {
15882                         kr = KERN_INVALID_ADDRESS;
15883                 }
15884                 vm_map_switch(oldmap);
15885                 vm_map_deallocate(map);
15886         }
15887         return kr;
15888 }
15889
15890
15891 /*
15892  *      vm_map_check_protection:
15893  *
15894  *      Assert that the target map allows the specified
15895  *      privilege on the entire address region given.
15896  *      The entire region must be allocated.
15897  */
15898 boolean_t
15899 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
15900                         vm_map_offset_t end, vm_prot_t protection)
15901 {
15902         vm_map_entry_t entry;
15903         vm_map_entry_t tmp_entry;
15904
15905         vm_map_lock(map);
15906
15907         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
15908         {
15909                 vm_map_unlock(map);
15910                 return (FALSE);
15911         }
15912
15913         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15914                 vm_map_unlock(map);
15915                 return(FALSE);
15916         }
15917
15918         entry = tmp_entry;
15919
15920         while (start < end) {
15921                 if (entry == vm_map_to_entry(map)) {
15922                         vm_map_unlock(map);
15923                         return(FALSE);
15924                 }
15925
15926                 /*
15927                  *      No holes allowed!
15928                  */
15929
15930                 if (start < entry->vme_start) {
15931                         vm_map_unlock(map);
15932                         return(FALSE);
15933                 }
15934
15935                 /*
15936                  * Check protection associated with entry.
15937                  */
15938
15939                 if ((entry->protection & protection) != protection) {
15940                         vm_map_unlock(map);
15941                         return(FALSE);
15942                 }
15943
15944                 /* go to next entry */
15945
15946                 start = entry->vme_end;
15947                 entry = entry->vme_next;
15948         }
15949         vm_map_unlock(map);
15950         return(TRUE);
15951 }
15952
15953 kern_return_t
15954 vm_map_purgable_control(
15955         vm_map_t                map,
15956         vm_map_offset_t         address,
15957         vm_purgable_t           control,
15958         int                     *state)
15959 {
15960         vm_map_entry_t          entry;
15961         vm_object_t             object;
15962         kern_return_t           kr;
15963         boolean_t               was_nonvolatile;
15964
15965         /*
15966          * Vet all the input parameters and current type and state of the
15967          * underlaying object.  Return with an error if anything is amiss.
15968          */
15969         if (map == VM_MAP_NULL)
15970                 return(KERN_INVALID_ARGUMENT);
15971
15972         if (control != VM_PURGABLE_SET_STATE &&
15973             control != VM_PURGABLE_GET_STATE &&
15974             control != VM_PURGABLE_PURGE_ALL &&
15975             control != VM_PURGABLE_SET_STATE_FROM_KERNEL)
15976                 return(KERN_INVALID_ARGUMENT);
15977
15978         if (control == VM_PURGABLE_PURGE_ALL) {
15979                 vm_purgeable_object_purge_all();
15980                 return KERN_SUCCESS;
15981         }
15982
15983         if ((control == VM_PURGABLE_SET_STATE ||
15984              control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
15985             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
15986              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
15987                 return(KERN_INVALID_ARGUMENT);
15988
15989         vm_map_lock_read(map);
15990
15991         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
15992
15993                 /*
15994                  * Must pass a valid non-submap address.
15995                  */
15996                 vm_map_unlock_read(map);
15997                 return(KERN_INVALID_ADDRESS);
15998         }
15999
16000         if ((entry->protection & VM_PROT_WRITE) == 0) {
16001                 /*
16002                  * Can't apply purgable controls to something you can't write.
16003                  */
16004                 vm_map_unlock_read(map);
16005                 return(KERN_PROTECTION_FAILURE);
16006         }
16007
16008         object = VME_OBJECT(entry);
16009         if (object == VM_OBJECT_NULL ||
16010             object->purgable == VM_PURGABLE_DENY) {
16011                 /*
16012                  * Object must already be present and be purgeable.
16013                  */
16014                 vm_map_unlock_read(map);
16015                 return KERN_INVALID_ARGUMENT;
16016         }
16017
16018         vm_object_lock(object);
16019
16020 #if 00
16021         if (VME_OFFSET(entry) != 0 ||
16022             entry->vme_end - entry->vme_start != object->vo_size) {
16023                 /*
16024                  * Can only apply purgable controls to the whole (existing)
16025                  * object at once.
16026                  */
16027                 vm_map_unlock_read(map);
16028                 vm_object_unlock(object);
16029                 return KERN_INVALID_ARGUMENT;
16030         }
16031 #endif
16032
16033         assert(!entry->is_sub_map);
16034         assert(!entry->use_pmap); /* purgeable has its own accounting */
16035
16036         vm_map_unlock_read(map);
16037
16038         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16039
16040         kr = vm_object_purgable_control(object, control, state);
16041
16042         if (was_nonvolatile &&
16043             object->purgable != VM_PURGABLE_NONVOLATILE &&
16044             map->pmap == kernel_pmap) {
16045 #if DEBUG
16046                 object->vo_purgeable_volatilizer = kernel_task;
16047 #endif /* DEBUG */
16048         }
16049
16050         vm_object_unlock(object);
16051
16052         return kr;
16053 }
16054
16055 kern_return_t
16056 vm_map_page_query_internal(
16057         vm_map_t        target_map,
16058         vm_map_offset_t offset,
16059         int             *disposition,
16060         int             *ref_count)
16061 {
16062         kern_return_t                   kr;
16063         vm_page_info_basic_data_t       info;
16064         mach_msg_type_number_t          count;
16065
16066         count = VM_PAGE_INFO_BASIC_COUNT;
16067         kr = vm_map_page_info(target_map,
16068                               offset,
16069                               VM_PAGE_INFO_BASIC,
16070                               (vm_page_info_t) &info,
16071                               &count);
16072         if (kr == KERN_SUCCESS) {
16073                 *disposition = info.disposition;
16074                 *ref_count = info.ref_count;
16075         } else {
16076                 *disposition = 0;
16077                 *ref_count = 0;
16078         }
16079
16080         return kr;
16081 }
16082
16083 kern_return_t
16084 vm_map_page_info(
16085         vm_map_t                map,
16086         vm_map_offset_t         offset,
16087         vm_page_info_flavor_t   flavor,
16088         vm_page_info_t          info,
16089         mach_msg_type_number_t  *count)
16090 {
16091         return (vm_map_page_range_info_internal(map,
16092                                        offset, /* start of range */
16093                                        (offset + 1), /* this will get rounded in the call to the page boundary */
16094                                        flavor,
16095                                        info,
16096                                        count));
16097 }
16098
16099 kern_return_t
16100 vm_map_page_range_info_internal(
16101         vm_map_t                map,
16102         vm_map_offset_t         start_offset,
16103         vm_map_offset_t         end_offset,
16104         vm_page_info_flavor_t   flavor,
16105         vm_page_info_t          info,
16106         mach_msg_type_number_t  *count)
16107 {
16108         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
16109         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16110         vm_page_t               m = VM_PAGE_NULL;
16111         kern_return_t           retval = KERN_SUCCESS;
16112         int                     disposition = 0;
16113         int                     ref_count = 0;
16114         int                     depth = 0, info_idx = 0;
16115         vm_page_info_basic_t    basic_info = 0;
16116         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16117         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16118         boolean_t               do_region_footprint;
16119
16120         switch (flavor) {
16121         case VM_PAGE_INFO_BASIC:
16122                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
16123                         /*
16124                          * The "vm_page_info_basic_data" structure was not
16125                          * properly padded, so allow the size to be off by
16126                          * one to maintain backwards binary compatibility...
16127                          */
16128                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
16129                                 return KERN_INVALID_ARGUMENT;
16130                 }
16131                 break;
16132         default:
16133                 return KERN_INVALID_ARGUMENT;
16134         }
16135
16136         do_region_footprint = task_self_region_footprint();
16137         disposition = 0;
16138         ref_count = 0;
16139         depth = 0;
16140         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
16141         retval = KERN_SUCCESS;
16142
16143         offset_in_page = start_offset & PAGE_MASK;
16144         start = vm_map_trunc_page(start_offset, PAGE_MASK);
16145         end = vm_map_round_page(end_offset, PAGE_MASK);
16146
16147         assert ((end - start) <= MAX_PAGE_RANGE_QUERY);
16148
16149         vm_map_lock_read(map);
16150
16151         for (curr_s_offset = start; curr_s_offset < end;) {
16152                 /*
16153                  * New lookup needs reset of these variables.
16154                  */
16155                 curr_object = object = VM_OBJECT_NULL;
16156                 offset_in_object = 0;
16157                 ref_count = 0;
16158                 depth = 0;
16159
16160                 if (do_region_footprint &&
16161                     curr_s_offset >= vm_map_last_entry(map)->vme_end) {
16162                         ledger_amount_t nonvol_compressed;
16163
16164                         /*
16165                          * Request for "footprint" info about a page beyond
16166                          * the end of address space: this must be for
16167                          * the fake region vm_map_region_recurse_64()
16168                          * reported to account for non-volatile purgeable
16169                          * memory owned by this task.
16170                          */
16171                         disposition = 0;
16172                         nonvol_compressed = 0;
16173                         ledger_get_balance(
16174                                 map->pmap->ledger,
16175                                 task_ledgers.purgeable_nonvolatile_compressed,
16176                                 &nonvol_compressed);
16177                         if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
16178                             (unsigned) nonvol_compressed) {
16179                                 /*
16180                                  * We haven't reported all the "non-volatile
16181                                  * compressed" pages yet, so report this fake
16182                                  * page as "compressed".
16183                                  */
16184                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16185                         } else {
16186                                 /*
16187                                  * We've reported all the non-volatile
16188                                  * compressed page but not all the non-volatile
16189                                  * pages , so report this fake page as
16190                                  * "resident dirty".
16191                                  */
16192                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16193                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16194                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
16195                         }
16196                         switch (flavor) {
16197                         case VM_PAGE_INFO_BASIC:
16198                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16199                                 basic_info->disposition = disposition;
16200                                 basic_info->ref_count = 1;
16201                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16202                                 basic_info->offset = 0;
16203                                 basic_info->depth = 0;
16204
16205                                 info_idx++;
16206                                 break;
16207                         }
16208                         curr_s_offset += PAGE_SIZE;
16209                         continue;
16210                 }
16211
16212                 /*
16213                  * First, find the map entry covering "curr_s_offset", going down
16214                  * submaps if necessary.
16215                  */
16216                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16217                         /* no entry -> no object -> no page */
16218
16219                         if (curr_s_offset < vm_map_min(map)) {
16220                                 /*
16221                                  * Illegal address that falls below map min.
16222                                  */
16223                                 curr_e_offset = MIN(end, vm_map_min(map));
16224
16225                         } else if (curr_s_offset >= vm_map_max(map)) {
16226                                 /*
16227                                  * Illegal address that falls on/after map max.
16228                                  */
16229                                 curr_e_offset = end;
16230
16231                         } else if (map_entry == vm_map_to_entry(map)) {
16232                                 /*
16233                                  * Hit a hole.
16234                                  */
16235                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
16236                                         /*
16237                                          * Empty map.
16238                                          */
16239                                         curr_e_offset = MIN(map->max_offset, end);
16240                                 } else {
16241                                         /*
16242                                          * Hole at start of the map.
16243                                          */
16244                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16245                                 }
16246                         } else {
16247                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
16248                                         /*
16249                                          * Hole at the end of the map.
16250                                          */
16251                                         curr_e_offset = MIN(map->max_offset, end);
16252                                 } else {
16253                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16254                                 }
16255                         }
16256
16257                         assert(curr_e_offset >= curr_s_offset);
16258
16259                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16260
16261                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16262
16263                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16264
16265                         curr_s_offset = curr_e_offset;
16266
16267                         info_idx += num_pages;
16268
16269                         continue;
16270                 }
16271
16272                 /* compute offset from this map entry's start */
16273                 offset_in_object = curr_s_offset - map_entry->vme_start;
16274
16275                 /* compute offset into this map entry's object (or submap) */
16276                 offset_in_object += VME_OFFSET(map_entry);
16277
16278                 if (map_entry->is_sub_map) {
16279                         vm_map_t sub_map = VM_MAP_NULL;
16280                         vm_page_info_t submap_info = 0;
16281                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
16282
16283                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
16284
16285                         submap_s_offset = offset_in_object;
16286                         submap_e_offset = submap_s_offset + range_len;
16287
16288                         sub_map = VME_SUBMAP(map_entry);
16289
16290                         vm_map_reference(sub_map);
16291                         vm_map_unlock_read(map);
16292
16293                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16294
16295                         retval = vm_map_page_range_info_internal(sub_map,
16296                                               submap_s_offset,
16297                                               submap_e_offset,
16298                                               VM_PAGE_INFO_BASIC,
16299                                               (vm_page_info_t) submap_info,
16300                                               count);
16301
16302                         assert(retval == KERN_SUCCESS);
16303
16304                         vm_map_lock_read(map);
16305                         vm_map_deallocate(sub_map);
16306
16307                         /* Move the "info" index by the number of pages we inspected.*/
16308                         info_idx += range_len >> PAGE_SHIFT;
16309
16310                         /* Move our current offset by the size of the range we inspected.*/
16311                         curr_s_offset += range_len;
16312
16313                         continue;
16314                 }
16315
16316                 object = VME_OBJECT(map_entry);
16317                 if (object == VM_OBJECT_NULL) {
16318
16319                         /*
16320                          * We don't have an object here and, hence,
16321                          * no pages to inspect. We'll fill up the
16322                          * info structure appropriately.
16323                          */
16324
16325                         curr_e_offset = MIN(map_entry->vme_end, end);
16326
16327                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16328
16329                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16330
16331                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16332
16333                         curr_s_offset = curr_e_offset;
16334
16335                         info_idx += num_pages;
16336
16337                         continue;
16338                 }
16339
16340                 if (do_region_footprint) {
16341                         int pmap_disp;
16342
16343                         disposition = 0;
16344                         pmap_disp = 0;
16345                         pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
16346                         if (map_entry->iokit_acct &&
16347                             object->internal &&
16348                             object->purgable == VM_PURGABLE_DENY) {
16349                                 /*
16350                                  * Non-purgeable IOKit memory: phys_footprint
16351                                  * includes the entire virtual mapping.
16352                                  */
16353                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16354                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16355                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16356                         } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
16357                                                 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
16358                                 /* alternate accounting */
16359                                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16360                                 pmap_disp = 0;
16361                         } else {
16362                                 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
16363                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16364                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16365                                         disposition |= VM_PAGE_QUERY_PAGE_REF;
16366                                         if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
16367                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16368                                         } else {
16369                                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
16370                                         }
16371                                 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
16372                                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16373                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16374                                 }
16375                         }
16376                         switch (flavor) {
16377                         case VM_PAGE_INFO_BASIC:
16378                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16379                                 basic_info->disposition = disposition;
16380                                 basic_info->ref_count = 1;
16381                                 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16382                                 basic_info->offset = 0;
16383                                 basic_info->depth = 0;
16384
16385                                 info_idx++;
16386                                 break;
16387                         }
16388                         curr_s_offset += PAGE_SIZE;
16389                         continue;
16390                 }
16391
16392                 vm_object_reference(object);
16393                 /*
16394                  * Shared mode -- so we can allow other readers
16395                  * to grab the lock too.
16396                  */
16397                 vm_object_lock_shared(object);
16398
16399                 curr_e_offset = MIN(map_entry->vme_end, end);
16400
16401                 vm_map_unlock_read(map);
16402
16403                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
16404
16405                 curr_object = object;
16406
16407                 for (; curr_s_offset < curr_e_offset;) {
16408
16409                         if (object == curr_object) {
16410                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
16411                         } else {
16412                                 ref_count = curr_object->ref_count;
16413                         }
16414
16415                         curr_offset_in_object = offset_in_object;
16416
16417                         for (;;) {
16418                                 m = vm_page_lookup(curr_object, curr_offset_in_object);
16419
16420                                 if (m != VM_PAGE_NULL) {
16421
16422                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16423                                         break;
16424
16425                                 } else {
16426                                         if (curr_object->internal &&
16427                                             curr_object->alive &&
16428                                             !curr_object->terminating &&
16429                                             curr_object->pager_ready) {
16430
16431                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
16432                                                     == VM_EXTERNAL_STATE_EXISTS) {
16433                                                         /* the pager has that page */
16434                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16435                                                         break;
16436                                                 }
16437                                         }
16438
16439                                         /*
16440                                          * Go down the VM object shadow chain until we find the page
16441                                          * we're looking for.
16442                                          */
16443
16444                                         if (curr_object->shadow != VM_OBJECT_NULL) {
16445                                                 vm_object_t shadow = VM_OBJECT_NULL;
16446
16447                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
16448                                                 shadow = curr_object->shadow;
16449
16450                                                 vm_object_lock_shared(shadow);
16451                                                 vm_object_unlock(curr_object);
16452
16453                                                 curr_object = shadow;
16454                                                 depth++;
16455                                                 continue;
16456                                         } else {
16457
16458                                                 break;
16459                                         }
16460                                 }
16461                         }
16462
16463                         /* The ref_count is not strictly accurate, it measures the number   */
16464                         /* of entities holding a ref on the object, they may not be mapping */
16465                         /* the object or may not be mapping the section holding the         */
16466                         /* target page but its still a ball park number and though an over- */
16467                         /* count, it picks up the copy-on-write cases                       */
16468
16469                         /* We could also get a picture of page sharing from pmap_attributes */
16470                         /* but this would under count as only faulted-in mappings would     */
16471                         /* show up.                                                         */
16472
16473                         if ((curr_object == object) && curr_object->shadow)
16474                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
16475
16476                         if (! curr_object->internal)
16477                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
16478
16479                         if (m != VM_PAGE_NULL) {
16480
16481                                 if (m->fictitious) {
16482
16483                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
16484
16485                                 } else {
16486                                         if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
16487                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16488
16489                                         if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
16490                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
16491
16492                                         if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
16493                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
16494
16495                                         if (m->cs_validated)
16496                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
16497                                         if (m->cs_tainted)
16498                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
16499                                         if (m->cs_nx)
16500                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
16501                                 }
16502                         }
16503
16504                         switch (flavor) {
16505                         case VM_PAGE_INFO_BASIC:
16506                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16507                                 basic_info->disposition = disposition;
16508                                 basic_info->ref_count = ref_count;
16509                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
16510                                         VM_KERNEL_ADDRPERM(curr_object);
16511                                 basic_info->offset =
16512                                         (memory_object_offset_t) curr_offset_in_object + offset_in_page;
16513                                 basic_info->depth = depth;
16514
16515                                 info_idx++;
16516                                 break;
16517                         }
16518
16519                         disposition = 0;
16520                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
16521
16522                         /*
16523                          * Move to next offset in the range and in our object.
16524                          */
16525                         curr_s_offset += PAGE_SIZE;
16526                         offset_in_object += PAGE_SIZE;
16527                         curr_offset_in_object = offset_in_object;
16528
16529                         if (curr_object != object) {
16530
16531                                 vm_object_unlock(curr_object);
16532
16533                                 curr_object = object;
16534
16535                                 vm_object_lock_shared(curr_object);
16536                         } else {
16537
16538                                 vm_object_lock_yield_shared(curr_object);
16539                         }
16540                 }
16541
16542                 vm_object_unlock(curr_object);
16543                 vm_object_deallocate(curr_object);
16544
16545                 vm_map_lock_read(map);
16546         }
16547
16548         vm_map_unlock_read(map);
16549         return retval;
16550 }
16551
16552 /*
16553  *      vm_map_msync
16554  *
16555  *      Synchronises the memory range specified with its backing store
16556  *      image by either flushing or cleaning the contents to the appropriate
16557  *      memory manager engaging in a memory object synchronize dialog with
16558  *      the manager.  The client doesn't return until the manager issues
16559  *      m_o_s_completed message.  MIG Magically converts user task parameter
16560  *      to the task's address map.
16561  *
16562  *      interpretation of sync_flags
16563  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
16564  *                                pages to manager.
16565  *
16566  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
16567  *                              - discard pages, write dirty or precious
16568  *                                pages back to memory manager.
16569  *
16570  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
16571  *                              - write dirty or precious pages back to
16572  *                                the memory manager.
16573  *
16574  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
16575  *                                is a hole in the region, and we would
16576  *                                have returned KERN_SUCCESS, return
16577  *                                KERN_INVALID_ADDRESS instead.
16578  *
16579  *      NOTE
16580  *      The memory object attributes have not yet been implemented, this
16581  *      function will have to deal with the invalidate attribute
16582  *
16583  *      RETURNS
16584  *      KERN_INVALID_TASK               Bad task parameter
16585  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
16586  *      KERN_SUCCESS                    The usual.
16587  *      KERN_INVALID_ADDRESS            There was a hole in the region.
16588  */
16589
16590 kern_return_t
16591 vm_map_msync(
16592         vm_map_t                map,
16593         vm_map_address_t        address,
16594         vm_map_size_t           size,
16595         vm_sync_t               sync_flags)
16596 {
16597         vm_map_entry_t          entry;
16598         vm_map_size_t           amount_left;
16599         vm_object_offset_t      offset;
16600         boolean_t               do_sync_req;
16601         boolean_t               had_hole = FALSE;
16602         vm_map_offset_t         pmap_offset;
16603
16604         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
16605             (sync_flags & VM_SYNC_SYNCHRONOUS))
16606                 return(KERN_INVALID_ARGUMENT);
16607
16608         /*
16609          * align address and size on page boundaries
16610          */
16611         size = (vm_map_round_page(address + size,
16612                                   VM_MAP_PAGE_MASK(map)) -
16613                 vm_map_trunc_page(address,
16614                                   VM_MAP_PAGE_MASK(map)));
16615         address = vm_map_trunc_page(address,
16616                                     VM_MAP_PAGE_MASK(map));
16617
16618         if (map == VM_MAP_NULL)
16619                 return(KERN_INVALID_TASK);
16620
16621         if (size == 0)
16622                 return(KERN_SUCCESS);
16623
16624         amount_left = size;
16625
16626         while (amount_left > 0) {
16627                 vm_object_size_t        flush_size;
16628                 vm_object_t             object;
16629
16630                 vm_map_lock(map);
16631                 if (!vm_map_lookup_entry(map,
16632                                          address,
16633                                          &entry)) {
16634
16635                         vm_map_size_t   skip;
16636
16637                         /*
16638                          * hole in the address map.
16639                          */
16640                         had_hole = TRUE;
16641
16642                         if (sync_flags & VM_SYNC_KILLPAGES) {
16643                                 /*
16644                                  * For VM_SYNC_KILLPAGES, there should be
16645                                  * no holes in the range, since we couldn't
16646                                  * prevent someone else from allocating in
16647                                  * that hole and we wouldn't want to "kill"
16648                                  * their pages.
16649                                  */
16650                                 vm_map_unlock(map);
16651                                 break;
16652                         }
16653
16654                         /*
16655                          * Check for empty map.
16656                          */
16657                         if (entry == vm_map_to_entry(map) &&
16658                             entry->vme_next == entry) {
16659                                 vm_map_unlock(map);
16660                                 break;
16661                         }
16662                         /*
16663                          * Check that we don't wrap and that
16664                          * we have at least one real map entry.
16665                          */
16666                         if ((map->hdr.nentries == 0) ||
16667                             (entry->vme_next->vme_start < address)) {
16668                                 vm_map_unlock(map);
16669                                 break;
16670                         }
16671                         /*
16672                          * Move up to the next entry if needed
16673                          */
16674                         skip = (entry->vme_next->vme_start - address);
16675                         if (skip >= amount_left)
16676                                 amount_left = 0;
16677                         else
16678                                 amount_left -= skip;
16679                         address = entry->vme_next->vme_start;
16680                         vm_map_unlock(map);
16681                         continue;
16682                 }
16683
16684                 offset = address - entry->vme_start;
16685                 pmap_offset = address;
16686
16687                 /*
16688                  * do we have more to flush than is contained in this
16689                  * entry ?
16690                  */
16691                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
16692                         flush_size = entry->vme_end -
16693                                 (entry->vme_start + offset);
16694                 } else {
16695                         flush_size = amount_left;
16696                 }
16697                 amount_left -= flush_size;
16698                 address += flush_size;
16699
16700                 if (entry->is_sub_map == TRUE) {
16701                         vm_map_t        local_map;
16702                         vm_map_offset_t local_offset;
16703
16704                         local_map = VME_SUBMAP(entry);
16705                         local_offset = VME_OFFSET(entry);
16706                         vm_map_unlock(map);
16707                         if (vm_map_msync(
16708                                     local_map,
16709                                     local_offset,
16710                                     flush_size,
16711                                     sync_flags) == KERN_INVALID_ADDRESS) {
16712                                 had_hole = TRUE;
16713                         }
16714                         continue;
16715                 }
16716                 object = VME_OBJECT(entry);
16717
16718                 /*
16719                  * We can't sync this object if the object has not been
16720                  * created yet
16721                  */
16722                 if (object == VM_OBJECT_NULL) {
16723                         vm_map_unlock(map);
16724                         continue;
16725                 }
16726                 offset += VME_OFFSET(entry);
16727
16728                 vm_object_lock(object);
16729
16730                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
16731                         int kill_pages = 0;
16732                         boolean_t reusable_pages = FALSE;
16733
16734                         if (sync_flags & VM_SYNC_KILLPAGES) {
16735                                 if (((object->ref_count == 1) ||
16736                                      ((object->copy_strategy !=
16737                                        MEMORY_OBJECT_COPY_SYMMETRIC) &&
16738                                       (object->copy == VM_OBJECT_NULL))) &&
16739                                     (object->shadow == VM_OBJECT_NULL)) {
16740                                         if (object->ref_count != 1) {
16741                                                 vm_page_stats_reusable.free_shared++;
16742                                         }
16743                                         kill_pages = 1;
16744                                 } else {
16745                                         kill_pages = -1;
16746                                 }
16747                         }
16748                         if (kill_pages != -1)
16749                                 vm_object_deactivate_pages(
16750                                         object,
16751                                         offset,
16752                                         (vm_object_size_t) flush_size,
16753                                         kill_pages,
16754                                         reusable_pages,
16755                                         map->pmap,
16756                                         pmap_offset);
16757                         vm_object_unlock(object);
16758                         vm_map_unlock(map);
16759                         continue;
16760                 }
16761                 /*
16762                  * We can't sync this object if there isn't a pager.
16763                  * Don't bother to sync internal objects, since there can't
16764                  * be any "permanent" storage for these objects anyway.
16765                  */
16766                 if ((object->pager == MEMORY_OBJECT_NULL) ||
16767                     (object->internal) || (object->private)) {
16768                         vm_object_unlock(object);
16769                         vm_map_unlock(map);
16770                         continue;
16771                 }
16772                 /*
16773                  * keep reference on the object until syncing is done
16774                  */
16775                 vm_object_reference_locked(object);
16776                 vm_object_unlock(object);
16777
16778                 vm_map_unlock(map);
16779
16780                 do_sync_req = vm_object_sync(object,
16781                                              offset,
16782                                              flush_size,
16783                                              sync_flags & VM_SYNC_INVALIDATE,
16784                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
16785                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
16786                                              sync_flags & VM_SYNC_SYNCHRONOUS);
16787
16788                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
16789                         /*
16790                          * clear out the clustering and read-ahead hints
16791                          */
16792                         vm_object_lock(object);
16793
16794                         object->pages_created = 0;
16795                         object->pages_used = 0;
16796                         object->sequential = 0;
16797                         object->last_alloc = 0;
16798
16799                         vm_object_unlock(object);
16800                 }
16801                 vm_object_deallocate(object);
16802         } /* while */
16803
16804         /* for proper msync() behaviour */
16805         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
16806                 return(KERN_INVALID_ADDRESS);
16807
16808         return(KERN_SUCCESS);
16809 }/* vm_msync */
16810
16811 /*
16812  *      Routine:        convert_port_entry_to_map
16813  *      Purpose:
16814  *              Convert from a port specifying an entry or a task
16815  *              to a map. Doesn't consume the port ref; produces a map ref,
16816  *              which may be null.  Unlike convert_port_to_map, the
16817  *              port may be task or a named entry backed.
16818  *      Conditions:
16819  *              Nothing locked.
16820  */
16821
16822
16823 vm_map_t
16824 convert_port_entry_to_map(
16825         ipc_port_t      port)
16826 {
16827         vm_map_t map;
16828         vm_named_entry_t        named_entry;
16829         uint32_t        try_failed_count = 0;
16830
16831         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16832                 while(TRUE) {
16833                         ip_lock(port);
16834                         if(ip_active(port) && (ip_kotype(port)
16835                                                == IKOT_NAMED_ENTRY)) {
16836                                 named_entry =
16837                                         (vm_named_entry_t)port->ip_kobject;
16838                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
16839                                         ip_unlock(port);
16840
16841                                         try_failed_count++;
16842                                         mutex_pause(try_failed_count);
16843                                         continue;
16844                                 }
16845                                 named_entry->ref_count++;
16846                                 lck_mtx_unlock(&(named_entry)->Lock);
16847                                 ip_unlock(port);
16848                                 if ((named_entry->is_sub_map) &&
16849                                     (named_entry->protection
16850                                      & VM_PROT_WRITE)) {
16851                                         map = named_entry->backing.map;
16852                                 } else {
16853                                         mach_destroy_memory_entry(port);
16854                                         return VM_MAP_NULL;
16855                                 }
16856                                 vm_map_reference_swap(map);
16857                                 mach_destroy_memory_entry(port);
16858                                 break;
16859                         }
16860                         else
16861                                 return VM_MAP_NULL;
16862                 }
16863         }
16864         else
16865                 map = convert_port_to_map(port);
16866
16867         return map;
16868 }
16869
16870 /*
16871  *      Routine:        convert_port_entry_to_object
16872  *      Purpose:
16873  *              Convert from a port specifying a named entry to an
16874  *              object. Doesn't consume the port ref; produces a map ref,
16875  *              which may be null.
16876  *      Conditions:
16877  *              Nothing locked.
16878  */
16879
16880
16881 vm_object_t
16882 convert_port_entry_to_object(
16883         ipc_port_t      port)
16884 {
16885         vm_object_t             object = VM_OBJECT_NULL;
16886         vm_named_entry_t        named_entry;
16887         uint32_t                try_failed_count = 0;
16888
16889         if (IP_VALID(port) &&
16890             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16891         try_again:
16892                 ip_lock(port);
16893                 if (ip_active(port) &&
16894                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16895                         named_entry = (vm_named_entry_t)port->ip_kobject;
16896                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
16897                                 ip_unlock(port);
16898                                 try_failed_count++;
16899                                 mutex_pause(try_failed_count);
16900                                 goto try_again;
16901                         }
16902                         named_entry->ref_count++;
16903                         lck_mtx_unlock(&(named_entry)->Lock);
16904                         ip_unlock(port);
16905                         if (!(named_entry->is_sub_map) &&
16906                             !(named_entry->is_copy) &&
16907                             (named_entry->protection & VM_PROT_WRITE)) {
16908                                 object = named_entry->backing.object;
16909                                 vm_object_reference(object);
16910                         }
16911                         mach_destroy_memory_entry(port);
16912                 }
16913         }
16914
16915         return object;
16916 }
16917
16918 /*
16919  * Export routines to other components for the things we access locally through
16920  * macros.
16921  */
16922 #undef current_map
16923 vm_map_t
16924 current_map(void)
16925 {
16926         return (current_map_fast());
16927 }
16928
16929 /*
16930  *      vm_map_reference:
16931  *
16932  *      Most code internal to the osfmk will go through a
16933  *      macro defining this.  This is always here for the
16934  *      use of other kernel components.
16935  */
16936 #undef vm_map_reference
16937 void
16938 vm_map_reference(
16939         vm_map_t        map)
16940 {
16941         if (map == VM_MAP_NULL)
16942                 return;
16943
16944         lck_mtx_lock(&map->s_lock);
16945 #if     TASK_SWAPPER
16946         assert(map->res_count > 0);
16947         assert(map->ref_count >= map->res_count);
16948         map->res_count++;
16949 #endif
16950         map->ref_count++;
16951         lck_mtx_unlock(&map->s_lock);
16952 }
16953
16954 /*
16955  *      vm_map_deallocate:
16956  *
16957  *      Removes a reference from the specified map,
16958  *      destroying it if no references remain.
16959  *      The map should not be locked.
16960  */
16961 void
16962 vm_map_deallocate(
16963         vm_map_t        map)
16964 {
16965         unsigned int            ref;
16966
16967         if (map == VM_MAP_NULL)
16968                 return;
16969
16970         lck_mtx_lock(&map->s_lock);
16971         ref = --map->ref_count;
16972         if (ref > 0) {
16973                 vm_map_res_deallocate(map);
16974                 lck_mtx_unlock(&map->s_lock);
16975                 return;
16976         }
16977         assert(map->ref_count == 0);
16978         lck_mtx_unlock(&map->s_lock);
16979
16980 #if     TASK_SWAPPER
16981         /*
16982          * The map residence count isn't decremented here because
16983          * the vm_map_delete below will traverse the entire map,
16984          * deleting entries, and the residence counts on objects
16985          * and sharing maps will go away then.
16986          */
16987 #endif
16988
16989         vm_map_destroy(map, VM_MAP_NO_FLAGS);
16990 }
16991
16992
16993 void
16994 vm_map_disable_NX(vm_map_t map)
16995 {
16996         if (map == NULL)
16997                 return;
16998         if (map->pmap == NULL)
16999                 return;
17000
17001         pmap_disable_NX(map->pmap);
17002 }
17003
17004 void
17005 vm_map_disallow_data_exec(vm_map_t map)
17006 {
17007     if (map == NULL)
17008         return;
17009
17010     map->map_disallow_data_exec = TRUE;
17011 }
17012
17013 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17014  * more descriptive.
17015  */
17016 void
17017 vm_map_set_32bit(vm_map_t map)
17018 {
17019 #if defined(__arm__) || defined(__arm64__)
17020         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17021 #else
17022         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17023 #endif
17024 }
17025
17026
17027 void
17028 vm_map_set_64bit(vm_map_t map)
17029 {
17030 #if defined(__arm__) || defined(__arm64__)
17031         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
17032 #else
17033         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
17034 #endif
17035 }
17036
17037 /*
17038  * Expand the maximum size of an existing map.
17039  */
17040 void
17041 vm_map_set_jumbo(vm_map_t map)
17042 {
17043 #if defined (__arm64__)
17044         vm_map_offset_t old_max_offset = map->max_offset;
17045         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_JUMBO);
17046         if (map->holes_list->prev->vme_end == pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE)) {
17047                 /*
17048                  * There is already a hole at the end of the map; simply make it bigger.
17049                  */
17050                 map->holes_list->prev->vme_end = map->max_offset;
17051         } else {
17052                 /*
17053                  * There is no hole at the end, so we need to create a new hole
17054                  * for the new empty space we're creating.
17055                  */
17056                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
17057                 new_hole->start = old_max_offset;
17058                 new_hole->end = map->max_offset;
17059                 new_hole->prev = map->holes_list->prev;
17060                 new_hole->next = (struct vm_map_entry *)map->holes_list;
17061                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
17062                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
17063         }
17064 #else /* arm64 */
17065         (void) map;
17066 #endif
17067 }
17068
17069 vm_map_offset_t
17070 vm_compute_max_offset(boolean_t is64)
17071 {
17072 #if defined(__arm__) || defined(__arm64__)
17073         return (pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE));
17074 #else
17075         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
17076 #endif
17077 }
17078
17079 void
17080 vm_map_get_max_aslr_slide_section(
17081                 vm_map_t                map __unused,
17082                 int64_t                 *max_sections,
17083                 int64_t                 *section_size)
17084 {
17085 #if defined(__arm64__)
17086         *max_sections = 3;
17087         *section_size = ARM_TT_TWIG_SIZE;
17088 #else
17089         *max_sections = 1;
17090         *section_size = 0;
17091 #endif
17092 }
17093
17094 uint64_t
17095 vm_map_get_max_aslr_slide_pages(vm_map_t map)
17096 {
17097 #if defined(__arm64__)
17098         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
17099          * limited embedded address space; this is also meant to minimize pmap
17100          * memory usage on 16KB page systems.
17101          */
17102         return (1 << (24 - VM_MAP_PAGE_SHIFT(map)));
17103 #else
17104         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17105 #endif
17106 }
17107
17108 uint64_t
17109 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
17110 {
17111 #if defined(__arm64__)
17112         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
17113          * of independent entropy on 16KB page systems.
17114          */
17115         return (1 << (22 - VM_MAP_PAGE_SHIFT(map)));
17116 #else
17117         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17118 #endif
17119 }
17120
17121 #ifndef __arm__
17122 boolean_t
17123 vm_map_is_64bit(
17124                 vm_map_t map)
17125 {
17126         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
17127 }
17128 #endif
17129
17130 boolean_t
17131 vm_map_has_hard_pagezero(
17132                 vm_map_t        map,
17133                 vm_map_offset_t pagezero_size)
17134 {
17135         /*
17136          * XXX FBDP
17137          * We should lock the VM map (for read) here but we can get away
17138          * with it for now because there can't really be any race condition:
17139          * the VM map's min_offset is changed only when the VM map is created
17140          * and when the zero page is established (when the binary gets loaded),
17141          * and this routine gets called only when the task terminates and the
17142          * VM map is being torn down, and when a new map is created via
17143          * load_machfile()/execve().
17144          */
17145         return (map->min_offset >= pagezero_size);
17146 }
17147
17148 /*
17149  * Raise a VM map's maximun offset.
17150  */
17151 kern_return_t
17152 vm_map_raise_max_offset(
17153         vm_map_t        map,
17154         vm_map_offset_t new_max_offset)
17155 {
17156         kern_return_t   ret;
17157
17158         vm_map_lock(map);
17159         ret = KERN_INVALID_ADDRESS;
17160
17161         if (new_max_offset >= map->max_offset) {
17162                 if (!vm_map_is_64bit(map)) {
17163                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
17164                                 map->max_offset = new_max_offset;
17165                                 ret = KERN_SUCCESS;
17166                         }
17167                 } else {
17168                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
17169                                 map->max_offset = new_max_offset;
17170                                 ret = KERN_SUCCESS;
17171                         }
17172                 }
17173         }
17174
17175         vm_map_unlock(map);
17176         return ret;
17177 }
17178
17179
17180 /*
17181  * Raise a VM map's minimum offset.
17182  * To strictly enforce "page zero" reservation.
17183  */
17184 kern_return_t
17185 vm_map_raise_min_offset(
17186         vm_map_t        map,
17187         vm_map_offset_t new_min_offset)
17188 {
17189         vm_map_entry_t  first_entry;
17190
17191         new_min_offset = vm_map_round_page(new_min_offset,
17192                                            VM_MAP_PAGE_MASK(map));
17193
17194         vm_map_lock(map);
17195
17196         if (new_min_offset < map->min_offset) {
17197                 /*
17198                  * Can't move min_offset backwards, as that would expose
17199                  * a part of the address space that was previously, and for
17200                  * possibly good reasons, inaccessible.
17201                  */
17202                 vm_map_unlock(map);
17203                 return KERN_INVALID_ADDRESS;
17204         }
17205         if (new_min_offset >= map->max_offset) {
17206                 /* can't go beyond the end of the address space */
17207                 vm_map_unlock(map);
17208                 return KERN_INVALID_ADDRESS;
17209         }
17210
17211         first_entry = vm_map_first_entry(map);
17212         if (first_entry != vm_map_to_entry(map) &&
17213             first_entry->vme_start < new_min_offset) {
17214                 /*
17215                  * Some memory was already allocated below the new
17216                  * minimun offset.  It's too late to change it now...
17217                  */
17218                 vm_map_unlock(map);
17219                 return KERN_NO_SPACE;
17220         }
17221
17222         map->min_offset = new_min_offset;
17223
17224         assert(map->holes_list);
17225         map->holes_list->start = new_min_offset;
17226         assert(new_min_offset < map->holes_list->end);
17227
17228         vm_map_unlock(map);
17229
17230         return KERN_SUCCESS;
17231 }
17232
17233 /*
17234  * Set the limit on the maximum amount of user wired memory allowed for this map.
17235  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
17236  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
17237  * don't have to reach over to the BSD data structures.
17238  */
17239
17240 void
17241 vm_map_set_user_wire_limit(vm_map_t     map,
17242                            vm_size_t    limit)
17243 {
17244         map->user_wire_limit = limit;
17245 }
17246
17247
17248 void vm_map_switch_protect(vm_map_t     map,
17249                            boolean_t    val)
17250 {
17251         vm_map_lock(map);
17252         map->switch_protect=val;
17253         vm_map_unlock(map);
17254 }
17255
17256 /*
17257  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
17258  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
17259  * bump both counters.
17260  */
17261 void
17262 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
17263 {
17264         pmap_t pmap = vm_map_pmap(map);
17265
17266         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17267         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17268 }
17269
17270 void
17271 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
17272 {
17273         pmap_t pmap = vm_map_pmap(map);
17274
17275         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17276         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17277 }
17278
17279 /* Add (generate) code signature for memory range */
17280 #if CONFIG_DYNAMIC_CODE_SIGNING
17281 kern_return_t vm_map_sign(vm_map_t map,
17282                  vm_map_offset_t start,
17283                  vm_map_offset_t end)
17284 {
17285         vm_map_entry_t entry;
17286         vm_page_t m;
17287         vm_object_t object;
17288
17289         /*
17290          * Vet all the input parameters and current type and state of the
17291          * underlaying object.  Return with an error if anything is amiss.
17292          */
17293         if (map == VM_MAP_NULL)
17294                 return(KERN_INVALID_ARGUMENT);
17295
17296         vm_map_lock_read(map);
17297
17298         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
17299                 /*
17300                  * Must pass a valid non-submap address.
17301                  */
17302                 vm_map_unlock_read(map);
17303                 return(KERN_INVALID_ADDRESS);
17304         }
17305
17306         if((entry->vme_start > start) || (entry->vme_end < end)) {
17307                 /*
17308                  * Map entry doesn't cover the requested range. Not handling
17309                  * this situation currently.
17310                  */
17311                 vm_map_unlock_read(map);
17312                 return(KERN_INVALID_ARGUMENT);
17313         }
17314
17315         object = VME_OBJECT(entry);
17316         if (object == VM_OBJECT_NULL) {
17317                 /*
17318                  * Object must already be present or we can't sign.
17319                  */
17320                 vm_map_unlock_read(map);
17321                 return KERN_INVALID_ARGUMENT;
17322         }
17323
17324         vm_object_lock(object);
17325         vm_map_unlock_read(map);
17326
17327         while(start < end) {
17328                 uint32_t refmod;
17329
17330                 m = vm_page_lookup(object,
17331                                    start - entry->vme_start + VME_OFFSET(entry));
17332                 if (m==VM_PAGE_NULL) {
17333                         /* shoud we try to fault a page here? we can probably
17334                          * demand it exists and is locked for this request */
17335                         vm_object_unlock(object);
17336                         return KERN_FAILURE;
17337                 }
17338                 /* deal with special page status */
17339                 if (m->busy ||
17340                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
17341                         vm_object_unlock(object);
17342                         return KERN_FAILURE;
17343                 }
17344
17345                 /* Page is OK... now "validate" it */
17346                 /* This is the place where we'll call out to create a code
17347                  * directory, later */
17348                 m->cs_validated = TRUE;
17349
17350                 /* The page is now "clean" for codesigning purposes. That means
17351                  * we don't consider it as modified (wpmapped) anymore. But
17352                  * we'll disconnect the page so we note any future modification
17353                  * attempts. */
17354                 m->wpmapped = FALSE;
17355                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
17356
17357                 /* Pull the dirty status from the pmap, since we cleared the
17358                  * wpmapped bit */
17359                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
17360                         SET_PAGE_DIRTY(m, FALSE);
17361                 }
17362
17363                 /* On to the next page */
17364                 start += PAGE_SIZE;
17365         }
17366         vm_object_unlock(object);
17367
17368         return KERN_SUCCESS;
17369 }
17370 #endif
17371
17372 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
17373 {
17374         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
17375         vm_map_entry_t next_entry;
17376         kern_return_t   kr = KERN_SUCCESS;
17377         vm_map_t        zap_map;
17378
17379         vm_map_lock(map);
17380
17381         /*
17382          * We use a "zap_map" to avoid having to unlock
17383          * the "map" in vm_map_delete().
17384          */
17385         zap_map = vm_map_create(PMAP_NULL,
17386                                 map->min_offset,
17387                                 map->max_offset,
17388                                 map->hdr.entries_pageable);
17389
17390         if (zap_map == VM_MAP_NULL) {
17391                 return KERN_RESOURCE_SHORTAGE;
17392         }
17393
17394         vm_map_set_page_shift(zap_map,
17395                               VM_MAP_PAGE_SHIFT(map));
17396         vm_map_disable_hole_optimization(zap_map);
17397
17398         for (entry = vm_map_first_entry(map);
17399              entry != vm_map_to_entry(map);
17400              entry = next_entry) {
17401                 next_entry = entry->vme_next;
17402
17403                 if (VME_OBJECT(entry) &&
17404                     !entry->is_sub_map &&
17405                     (VME_OBJECT(entry)->internal == TRUE) &&
17406                     (VME_OBJECT(entry)->ref_count == 1)) {
17407
17408                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
17409                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
17410
17411                         (void)vm_map_delete(map,
17412                                             entry->vme_start,
17413                                             entry->vme_end,
17414                                             VM_MAP_REMOVE_SAVE_ENTRIES,
17415                                             zap_map);
17416                 }
17417         }
17418
17419         vm_map_unlock(map);
17420
17421         /*
17422          * Get rid of the "zap_maps" and all the map entries that
17423          * they may still contain.
17424          */
17425         if (zap_map != VM_MAP_NULL) {
17426                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
17427                 zap_map = VM_MAP_NULL;
17428         }
17429
17430         return kr;
17431 }
17432
17433
17434 #if DEVELOPMENT || DEBUG
17435
17436 int
17437 vm_map_disconnect_page_mappings(
17438         vm_map_t map,
17439         boolean_t do_unnest)
17440 {
17441         vm_map_entry_t entry;
17442         int     page_count = 0;
17443
17444         if (do_unnest == TRUE) {
17445 #ifndef NO_NESTED_PMAP
17446                 vm_map_lock(map);
17447
17448                 for (entry = vm_map_first_entry(map);
17449                      entry != vm_map_to_entry(map);
17450                      entry = entry->vme_next) {
17451
17452                         if (entry->is_sub_map && entry->use_pmap) {
17453                                 /*
17454                                  * Make sure the range between the start of this entry and
17455                                  * the end of this entry is no longer nested, so that
17456                                  * we will only remove mappings from the pmap in use by this
17457                                  * this task
17458                                  */
17459                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
17460                         }
17461                 }
17462                 vm_map_unlock(map);
17463 #endif
17464         }
17465         vm_map_lock_read(map);
17466
17467         page_count = map->pmap->stats.resident_count;
17468
17469         for (entry = vm_map_first_entry(map);
17470              entry != vm_map_to_entry(map);
17471              entry = entry->vme_next) {
17472
17473                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
17474                                            (VME_OBJECT(entry)->phys_contiguous))) {
17475                         continue;
17476                 }
17477                 if (entry->is_sub_map)
17478                         assert(!entry->use_pmap);
17479
17480                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
17481         }
17482         vm_map_unlock_read(map);
17483
17484         return page_count;
17485 }
17486
17487 #endif
17488
17489
17490 #if CONFIG_FREEZE
17491
17492
17493 int c_freezer_swapout_count;
17494 int c_freezer_compression_count = 0;
17495 AbsoluteTime c_freezer_last_yield_ts = 0;
17496
17497 kern_return_t vm_map_freeze(
17498                 vm_map_t map,
17499                 unsigned int *purgeable_count,
17500                 unsigned int *wired_count,
17501                 unsigned int *clean_count,
17502                 unsigned int *dirty_count,
17503                 __unused unsigned int dirty_budget,
17504                 boolean_t *has_shared)
17505 {
17506         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
17507         kern_return_t   kr = KERN_SUCCESS;
17508
17509         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
17510         *has_shared = FALSE;
17511
17512         /*
17513          * We need the exclusive lock here so that we can
17514          * block any page faults or lookups while we are
17515          * in the middle of freezing this vm map.
17516          */
17517         vm_map_lock(map);
17518
17519         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
17520
17521         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17522                 kr = KERN_NO_SPACE;
17523                 goto done;
17524         }
17525
17526         c_freezer_compression_count = 0;
17527         clock_get_uptime(&c_freezer_last_yield_ts);
17528
17529         for (entry2 = vm_map_first_entry(map);
17530              entry2 != vm_map_to_entry(map);
17531              entry2 = entry2->vme_next) {
17532
17533                 vm_object_t     src_object = VME_OBJECT(entry2);
17534
17535                 if (src_object &&
17536                     !entry2->is_sub_map &&
17537                     !src_object->phys_contiguous) {
17538                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
17539
17540                         if (src_object->internal == TRUE) {
17541
17542                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17543                                         /*
17544                                          * Pages belonging to this object could be swapped to disk.
17545                                          * Make sure it's not a shared object because we could end
17546                                          * up just bringing it back in again.
17547                                          */
17548                                         if (src_object->ref_count > 1) {
17549                                                 continue;
17550                                         }
17551                                 }
17552                                 vm_object_compressed_freezer_pageout(src_object);
17553
17554                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17555                                         kr = KERN_NO_SPACE;
17556                                         break;
17557                                 }
17558                         }
17559                 }
17560         }
17561 done:
17562         vm_map_unlock(map);
17563
17564         vm_object_compressed_freezer_done();
17565
17566         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17567                 /*
17568                  * reset the counter tracking the # of swapped c_segs
17569                  * because we are now done with this freeze session and task.
17570                  */
17571                 c_freezer_swapout_count = 0;
17572         }
17573         return kr;
17574 }
17575
17576 #endif
17577
17578 /*
17579  * vm_map_entry_should_cow_for_true_share:
17580  *
17581  * Determines if the map entry should be clipped and setup for copy-on-write
17582  * to avoid applying "true_share" to a large VM object when only a subset is
17583  * targeted.
17584  *
17585  * For now, we target only the map entries created for the Objective C
17586  * Garbage Collector, which initially have the following properties:
17587  *      - alias == VM_MEMORY_MALLOC
17588  *      - wired_count == 0
17589  *      - !needs_copy
17590  * and a VM object with:
17591  *      - internal
17592  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
17593  *      - !true_share
17594  *      - vo_size == ANON_CHUNK_SIZE
17595  *
17596  * Only non-kernel map entries.
17597  */
17598 boolean_t
17599 vm_map_entry_should_cow_for_true_share(
17600         vm_map_entry_t  entry)
17601 {
17602         vm_object_t     object;
17603
17604         if (entry->is_sub_map) {
17605                 /* entry does not point at a VM object */
17606                 return FALSE;
17607         }
17608
17609         if (entry->needs_copy) {
17610                 /* already set for copy_on_write: done! */
17611                 return FALSE;
17612         }
17613
17614         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
17615             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
17616                 /* not a malloc heap or Obj-C Garbage Collector heap */
17617                 return FALSE;
17618         }
17619
17620         if (entry->wired_count) {
17621                 /* wired: can't change the map entry... */
17622                 vm_counters.should_cow_but_wired++;
17623                 return FALSE;
17624         }
17625
17626         object = VME_OBJECT(entry);
17627
17628         if (object == VM_OBJECT_NULL) {
17629                 /* no object yet... */
17630                 return FALSE;
17631         }
17632
17633         if (!object->internal) {
17634                 /* not an internal object */
17635                 return FALSE;
17636         }
17637
17638         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17639                 /* not the default copy strategy */
17640                 return FALSE;
17641         }
17642
17643         if (object->true_share) {
17644                 /* already true_share: too late to avoid it */
17645                 return FALSE;
17646         }
17647
17648         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
17649             object->vo_size != ANON_CHUNK_SIZE) {
17650                 /* ... not an object created for the ObjC Garbage Collector */
17651                 return FALSE;
17652         }
17653
17654         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
17655             object->vo_size != 2048 * 4096) {
17656                 /* ... not a "MALLOC_SMALL" heap */
17657                 return FALSE;
17658         }
17659
17660         /*
17661          * All the criteria match: we have a large object being targeted for "true_share".
17662          * To limit the adverse side-effects linked with "true_share", tell the caller to
17663          * try and avoid setting up the entire object for "true_share" by clipping the
17664          * targeted range and setting it up for copy-on-write.
17665          */
17666         return TRUE;
17667 }
17668
17669 vm_map_offset_t
17670 vm_map_round_page_mask(
17671         vm_map_offset_t offset,
17672         vm_map_offset_t mask)
17673 {
17674         return VM_MAP_ROUND_PAGE(offset, mask);
17675 }
17676
17677 vm_map_offset_t
17678 vm_map_trunc_page_mask(
17679         vm_map_offset_t offset,
17680         vm_map_offset_t mask)
17681 {
17682         return VM_MAP_TRUNC_PAGE(offset, mask);
17683 }
17684
17685 boolean_t
17686 vm_map_page_aligned(
17687         vm_map_offset_t offset,
17688         vm_map_offset_t mask)
17689 {
17690         return ((offset) & mask) == 0;
17691 }
17692
17693 int
17694 vm_map_page_shift(
17695         vm_map_t map)
17696 {
17697         return VM_MAP_PAGE_SHIFT(map);
17698 }
17699
17700 int
17701 vm_map_page_size(
17702         vm_map_t map)
17703 {
17704         return VM_MAP_PAGE_SIZE(map);
17705 }
17706
17707 vm_map_offset_t
17708 vm_map_page_mask(
17709         vm_map_t map)
17710 {
17711         return VM_MAP_PAGE_MASK(map);
17712 }
17713
17714 kern_return_t
17715 vm_map_set_page_shift(
17716         vm_map_t        map,
17717         int             pageshift)
17718 {
17719         if (map->hdr.nentries != 0) {
17720                 /* too late to change page size */
17721                 return KERN_FAILURE;
17722         }
17723
17724         map->hdr.page_shift = pageshift;
17725
17726         return KERN_SUCCESS;
17727 }
17728
17729 kern_return_t
17730 vm_map_query_volatile(
17731         vm_map_t        map,
17732         mach_vm_size_t  *volatile_virtual_size_p,
17733         mach_vm_size_t  *volatile_resident_size_p,
17734         mach_vm_size_t  *volatile_compressed_size_p,
17735         mach_vm_size_t  *volatile_pmap_size_p,
17736         mach_vm_size_t  *volatile_compressed_pmap_size_p)
17737 {
17738         mach_vm_size_t  volatile_virtual_size;
17739         mach_vm_size_t  volatile_resident_count;
17740         mach_vm_size_t  volatile_compressed_count;
17741         mach_vm_size_t  volatile_pmap_count;
17742         mach_vm_size_t  volatile_compressed_pmap_count;
17743         mach_vm_size_t  resident_count;
17744         vm_map_entry_t  entry;
17745         vm_object_t     object;
17746
17747         /* map should be locked by caller */
17748
17749         volatile_virtual_size = 0;
17750         volatile_resident_count = 0;
17751         volatile_compressed_count = 0;
17752         volatile_pmap_count = 0;
17753         volatile_compressed_pmap_count = 0;
17754
17755         for (entry = vm_map_first_entry(map);
17756              entry != vm_map_to_entry(map);
17757              entry = entry->vme_next) {
17758                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
17759
17760                 if (entry->is_sub_map) {
17761                         continue;
17762                 }
17763                 if (! (entry->protection & VM_PROT_WRITE)) {
17764                         continue;
17765                 }
17766                 object = VME_OBJECT(entry);
17767                 if (object == VM_OBJECT_NULL) {
17768                         continue;
17769                 }
17770                 if (object->purgable != VM_PURGABLE_VOLATILE &&
17771                     object->purgable != VM_PURGABLE_EMPTY) {
17772                         continue;
17773                 }
17774                 if (VME_OFFSET(entry)) {
17775                         /*
17776                          * If the map entry has been split and the object now
17777                          * appears several times in the VM map, we don't want
17778                          * to count the object's resident_page_count more than
17779                          * once.  We count it only for the first one, starting
17780                          * at offset 0 and ignore the other VM map entries.
17781                          */
17782                         continue;
17783                 }
17784                 resident_count = object->resident_page_count;
17785                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
17786                         resident_count = 0;
17787                 } else {
17788                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
17789                 }
17790
17791                 volatile_virtual_size += entry->vme_end - entry->vme_start;
17792                 volatile_resident_count += resident_count;
17793                 if (object->pager) {
17794                         volatile_compressed_count +=
17795                                 vm_compressor_pager_get_count(object->pager);
17796                 }
17797                 pmap_compressed_bytes = 0;
17798                 pmap_resident_bytes =
17799                         pmap_query_resident(map->pmap,
17800                                             entry->vme_start,
17801                                             entry->vme_end,
17802                                             &pmap_compressed_bytes);
17803                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
17804                 volatile_compressed_pmap_count += (pmap_compressed_bytes
17805                                                    / PAGE_SIZE);
17806         }
17807
17808         /* map is still locked on return */
17809
17810         *volatile_virtual_size_p = volatile_virtual_size;
17811         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
17812         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
17813         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
17814         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
17815
17816         return KERN_SUCCESS;
17817 }
17818
17819 void
17820 vm_map_sizes(vm_map_t map,
17821                 vm_map_size_t * psize,
17822                 vm_map_size_t * pfree,
17823                 vm_map_size_t * plargest_free)
17824 {
17825     vm_map_entry_t  entry;
17826     vm_map_offset_t prev;
17827     vm_map_size_t   free, total_free, largest_free;
17828     boolean_t       end;
17829
17830     if (!map)
17831     {
17832         *psize = *pfree = *plargest_free = 0;
17833         return;
17834     }
17835     total_free = largest_free = 0;
17836
17837     vm_map_lock_read(map);
17838     if (psize) *psize = map->max_offset - map->min_offset;
17839
17840     prev = map->min_offset;
17841     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
17842     {
17843         end = (entry == vm_map_to_entry(map));
17844
17845         if (end) free = entry->vme_end   - prev;
17846         else     free = entry->vme_start - prev;
17847
17848         total_free += free;
17849         if (free > largest_free) largest_free = free;
17850
17851         if (end) break;
17852         prev = entry->vme_end;
17853     }
17854     vm_map_unlock_read(map);
17855     if (pfree)         *pfree = total_free;
17856     if (plargest_free) *plargest_free = largest_free;
17857 }
17858
17859 #if VM_SCAN_FOR_SHADOW_CHAIN
17860 int vm_map_shadow_max(vm_map_t map);
17861 int vm_map_shadow_max(
17862         vm_map_t map)
17863 {
17864         int             shadows, shadows_max;
17865         vm_map_entry_t  entry;
17866         vm_object_t     object, next_object;
17867
17868         if (map == NULL)
17869                 return 0;
17870
17871         shadows_max = 0;
17872
17873         vm_map_lock_read(map);
17874
17875         for (entry = vm_map_first_entry(map);
17876              entry != vm_map_to_entry(map);
17877              entry = entry->vme_next) {
17878                 if (entry->is_sub_map) {
17879                         continue;
17880                 }
17881                 object = VME_OBJECT(entry);
17882                 if (object == NULL) {
17883                         continue;
17884                 }
17885                 vm_object_lock_shared(object);
17886                 for (shadows = 0;
17887                      object->shadow != NULL;
17888                      shadows++, object = next_object) {
17889                         next_object = object->shadow;
17890                         vm_object_lock_shared(next_object);
17891                         vm_object_unlock(object);
17892                 }
17893                 vm_object_unlock(object);
17894                 if (shadows > shadows_max) {
17895                         shadows_max = shadows;
17896                 }
17897         }
17898
17899         vm_map_unlock_read(map);
17900
17901         return shadows_max;
17902 }
17903 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
17904
17905 void vm_commit_pagezero_status(vm_map_t lmap) {
17906         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
17907 }
17908
17909 #if __x86_64__
17910 void
17911 vm_map_set_high_start(
17912         vm_map_t        map,
17913         vm_map_offset_t high_start)
17914 {
17915         map->vmmap_high_start = high_start;
17916 }
17917 #endif /* __x86_64__ */