osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/kalloc.h>
  88 #include <kern/zalloc.h>
  89
  90 #include <vm/cpm.h>
  91 #include <vm/vm_compressor_pager.h>
  92 #include <vm/vm_init.h>
  93 #include <vm/vm_fault.h>
  94 #include <vm/vm_map.h>
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_page.h>
  97 #include <vm/vm_pageout.h>
  98 #include <vm/vm_kern.h>
  99 #include <ipc/ipc_port.h>
 100 #include <kern/sched_prim.h>
 101 #include <kern/misc_protos.h>
 102 #include <kern/xpr.h>
 103
 104 #include <mach/vm_map_server.h>
 105 #include <mach/mach_host_server.h>
 106 #include <vm/vm_protos.h>
 107 #include <vm/vm_purgeable_internal.h>
 108
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_shared_region.h>
 111 #include <vm/vm_map_store.h>
 112
 113 extern int proc_selfpid(void);
 114 extern char *proc_name_address(void *p);
 115
 116 #if VM_MAP_DEBUG_APPLE_PROTECT
 117 int vm_map_debug_apple_protect = 0;
 118 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 119 #if VM_MAP_DEBUG_FOURK
 120 int vm_map_debug_fourk = 0;
 121 #endif /* VM_MAP_DEBUG_FOURK */
 122
 123 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 124 /* Internal prototypes
 125  */
 126
 127 static void vm_map_simplify_range(
 128         vm_map_t        map,
 129         vm_map_offset_t start,
 130         vm_map_offset_t end);   /* forward */
 131
 132 static boolean_t        vm_map_range_check(
 133         vm_map_t        map,
 134         vm_map_offset_t start,
 135         vm_map_offset_t end,
 136         vm_map_entry_t  *entry);
 137
 138 static vm_map_entry_t   _vm_map_entry_create(
 139         struct vm_map_header    *map_header, boolean_t map_locked);
 140
 141 static void             _vm_map_entry_dispose(
 142         struct vm_map_header    *map_header,
 143         vm_map_entry_t          entry);
 144
 145 static void             vm_map_pmap_enter(
 146         vm_map_t                map,
 147         vm_map_offset_t         addr,
 148         vm_map_offset_t         end_addr,
 149         vm_object_t             object,
 150         vm_object_offset_t      offset,
 151         vm_prot_t               protection);
 152
 153 static void             _vm_map_clip_end(
 154         struct vm_map_header    *map_header,
 155         vm_map_entry_t          entry,
 156         vm_map_offset_t         end);
 157
 158 static void             _vm_map_clip_start(
 159         struct vm_map_header    *map_header,
 160         vm_map_entry_t          entry,
 161         vm_map_offset_t         start);
 162
 163 static void             vm_map_entry_delete(
 164         vm_map_t        map,
 165         vm_map_entry_t  entry);
 166
 167 static kern_return_t    vm_map_delete(
 168         vm_map_t        map,
 169         vm_map_offset_t start,
 170         vm_map_offset_t end,
 171         int             flags,
 172         vm_map_t        zap_map);
 173
 174 static kern_return_t    vm_map_copy_overwrite_unaligned(
 175         vm_map_t        dst_map,
 176         vm_map_entry_t  entry,
 177         vm_map_copy_t   copy,
 178         vm_map_address_t start,
 179         boolean_t       discard_on_success);
 180
 181 static kern_return_t    vm_map_copy_overwrite_aligned(
 182         vm_map_t        dst_map,
 183         vm_map_entry_t  tmp_entry,
 184         vm_map_copy_t   copy,
 185         vm_map_offset_t start,
 186         pmap_t          pmap);
 187
 188 static kern_return_t    vm_map_copyin_kernel_buffer(
 189         vm_map_t        src_map,
 190         vm_map_address_t src_addr,
 191         vm_map_size_t   len,
 192         boolean_t       src_destroy,
 193         vm_map_copy_t   *copy_result);  /* OUT */
 194
 195 static kern_return_t    vm_map_copyout_kernel_buffer(
 196         vm_map_t        map,
 197         vm_map_address_t *addr, /* IN/OUT */
 198         vm_map_copy_t   copy,
 199         vm_map_size_t   copy_size,
 200         boolean_t       overwrite,
 201         boolean_t       consume_on_success);
 202
 203 static void             vm_map_fork_share(
 204         vm_map_t        old_map,
 205         vm_map_entry_t  old_entry,
 206         vm_map_t        new_map);
 207
 208 static boolean_t        vm_map_fork_copy(
 209         vm_map_t        old_map,
 210         vm_map_entry_t  *old_entry_p,
 211         vm_map_t        new_map,
 212         int             vm_map_copyin_flags);
 213
 214 void            vm_map_region_top_walk(
 215         vm_map_entry_t             entry,
 216         vm_region_top_info_t       top);
 217
 218 void            vm_map_region_walk(
 219         vm_map_t                   map,
 220         vm_map_offset_t            va,
 221         vm_map_entry_t             entry,
 222         vm_object_offset_t         offset,
 223         vm_object_size_t           range,
 224         vm_region_extended_info_t  extended,
 225         boolean_t                  look_for_pages,
 226         mach_msg_type_number_t count);
 227
 228 static kern_return_t    vm_map_wire_nested(
 229         vm_map_t                   map,
 230         vm_map_offset_t            start,
 231         vm_map_offset_t            end,
 232         vm_prot_t                  caller_prot,
 233         boolean_t                  user_wire,
 234         pmap_t                     map_pmap,
 235         vm_map_offset_t            pmap_addr,
 236         ppnum_t                    *physpage_p);
 237
 238 static kern_return_t    vm_map_unwire_nested(
 239         vm_map_t                   map,
 240         vm_map_offset_t            start,
 241         vm_map_offset_t            end,
 242         boolean_t                  user_wire,
 243         pmap_t                     map_pmap,
 244         vm_map_offset_t            pmap_addr);
 245
 246 static kern_return_t    vm_map_overwrite_submap_recurse(
 247         vm_map_t                   dst_map,
 248         vm_map_offset_t            dst_addr,
 249         vm_map_size_t              dst_size);
 250
 251 static kern_return_t    vm_map_copy_overwrite_nested(
 252         vm_map_t                   dst_map,
 253         vm_map_offset_t            dst_addr,
 254         vm_map_copy_t              copy,
 255         boolean_t                  interruptible,
 256         pmap_t                     pmap,
 257         boolean_t                  discard_on_success);
 258
 259 static kern_return_t    vm_map_remap_extract(
 260         vm_map_t                map,
 261         vm_map_offset_t         addr,
 262         vm_map_size_t           size,
 263         boolean_t               copy,
 264         struct vm_map_header    *map_header,
 265         vm_prot_t               *cur_protection,
 266         vm_prot_t               *max_protection,
 267         vm_inherit_t            inheritance,
 268         boolean_t               pageable,
 269         boolean_t               same_map);
 270
 271 static kern_return_t    vm_map_remap_range_allocate(
 272         vm_map_t                map,
 273         vm_map_address_t        *address,
 274         vm_map_size_t           size,
 275         vm_map_offset_t         mask,
 276         int                     flags,
 277         vm_map_entry_t          *map_entry);
 278
 279 static void             vm_map_region_look_for_page(
 280         vm_map_t                   map,
 281         vm_map_offset_t            va,
 282         vm_object_t                object,
 283         vm_object_offset_t         offset,
 284         int                        max_refcnt,
 285         int                        depth,
 286         vm_region_extended_info_t  extended,
 287         mach_msg_type_number_t count);
 288
 289 static int              vm_map_region_count_obj_refs(
 290         vm_map_entry_t             entry,
 291         vm_object_t                object);
 292
 293
 294 static kern_return_t    vm_map_willneed(
 295         vm_map_t        map,
 296         vm_map_offset_t start,
 297         vm_map_offset_t end);
 298
 299 static kern_return_t    vm_map_reuse_pages(
 300         vm_map_t        map,
 301         vm_map_offset_t start,
 302         vm_map_offset_t end);
 303
 304 static kern_return_t    vm_map_reusable_pages(
 305         vm_map_t        map,
 306         vm_map_offset_t start,
 307         vm_map_offset_t end);
 308
 309 static kern_return_t    vm_map_can_reuse(
 310         vm_map_t        map,
 311         vm_map_offset_t start,
 312         vm_map_offset_t end);
 313
 314 #if MACH_ASSERT
 315 static kern_return_t    vm_map_pageout(
 316         vm_map_t        map,
 317         vm_map_offset_t start,
 318         vm_map_offset_t end);
 319 #endif /* MACH_ASSERT */
 320
 321 /*
 322  * Macros to copy a vm_map_entry. We must be careful to correctly
 323  * manage the wired page count. vm_map_entry_copy() creates a new
 324  * map entry to the same memory - the wired count in the new entry
 325  * must be set to zero. vm_map_entry_copy_full() creates a new
 326  * entry that is identical to the old entry.  This preserves the
 327  * wire count; it's used for map splitting and zone changing in
 328  * vm_map_copyout.
 329  */
 330
 331 #define vm_map_entry_copy(NEW,OLD)      \
 332 MACRO_BEGIN                             \
 333 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 334         *(NEW) = *(OLD);                \
 335         (NEW)->is_shared = FALSE;       \
 336         (NEW)->needs_wakeup = FALSE;    \
 337         (NEW)->in_transition = FALSE;   \
 338         (NEW)->wired_count = 0;         \
 339         (NEW)->user_wired_count = 0;    \
 340         (NEW)->permanent = FALSE;       \
 341         (NEW)->used_for_jit = FALSE;    \
 342         (NEW)->from_reserved_zone = _vmec_reserved;     \
 343         (NEW)->iokit_acct = FALSE;      \
 344         (NEW)->vme_resilient_codesign = FALSE; \
 345         (NEW)->vme_resilient_media = FALSE;     \
 346         (NEW)->vme_atomic = FALSE;      \
 347 MACRO_END
 348
 349 #define vm_map_entry_copy_full(NEW,OLD)                 \
 350 MACRO_BEGIN                                             \
 351 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 352 (*(NEW) = *(OLD));                                      \
 353 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 354 MACRO_END
 355
 356 /*
 357  *      Decide if we want to allow processes to execute from their data or stack areas.
 358  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 359  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 360  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 361  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 362  *      specific pmap files since the default behavior varies according to architecture.  The
 363  *      main reason it varies is because of the need to provide binary compatibility with old
 364  *      applications that were written before these restrictions came into being.  In the old
 365  *      days, an app could execute anything it could read, but this has slowly been tightened
 366  *      up over time.  The default behavior is:
 367  *
 368  *      32-bit PPC apps         may execute from both stack and data areas
 369  *      32-bit Intel apps       may exeucte from data areas but not stack
 370  *      64-bit PPC/Intel apps   may not execute from either data or stack
 371  *
 372  *      An application on any architecture may override these defaults by explicitly
 373  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 374  *      system call.  This code here just determines what happens when an app tries to
 375  *      execute from a page that lacks execute permission.
 376  *
 377  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 378  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 379  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 380  *      execution from data areas for a particular binary even if the arch normally permits it. As
 381  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 382  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 383  *      are not all NX-safe.
 384  */
 385
 386 extern int allow_data_exec, allow_stack_exec;
 387
 388 int
 389 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 390 {
 391         int current_abi;
 392
 393         if (map->pmap == kernel_pmap) return FALSE;
 394
 395         /*
 396          * Determine if the app is running in 32 or 64 bit mode.
 397          */
 398
 399         if (vm_map_is_64bit(map))
 400                 current_abi = VM_ABI_64;
 401         else
 402                 current_abi = VM_ABI_32;
 403
 404         /*
 405          * Determine if we should allow the execution based on whether it's a
 406          * stack or data area and the current architecture.
 407          */
 408
 409         if (user_tag == VM_MEMORY_STACK)
 410                 return allow_stack_exec & current_abi;
 411
 412         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 413 }
 414
 415
 416 /*
 417  *      Virtual memory maps provide for the mapping, protection,
 418  *      and sharing of virtual memory objects.  In addition,
 419  *      this module provides for an efficient virtual copy of
 420  *      memory from one map to another.
 421  *
 422  *      Synchronization is required prior to most operations.
 423  *
 424  *      Maps consist of an ordered doubly-linked list of simple
 425  *      entries; a single hint is used to speed up lookups.
 426  *
 427  *      Sharing maps have been deleted from this version of Mach.
 428  *      All shared objects are now mapped directly into the respective
 429  *      maps.  This requires a change in the copy on write strategy;
 430  *      the asymmetric (delayed) strategy is used for shared temporary
 431  *      objects instead of the symmetric (shadow) strategy.  All maps
 432  *      are now "top level" maps (either task map, kernel map or submap
 433  *      of the kernel map).
 434  *
 435  *      Since portions of maps are specified by start/end addreses,
 436  *      which may not align with existing map entries, all
 437  *      routines merely "clip" entries to these start/end values.
 438  *      [That is, an entry is split into two, bordering at a
 439  *      start or end value.]  Note that these clippings may not
 440  *      always be necessary (as the two resulting entries are then
 441  *      not changed); however, the clipping is done for convenience.
 442  *      No attempt is currently made to "glue back together" two
 443  *      abutting entries.
 444  *
 445  *      The symmetric (shadow) copy strategy implements virtual copy
 446  *      by copying VM object references from one map to
 447  *      another, and then marking both regions as copy-on-write.
 448  *      It is important to note that only one writeable reference
 449  *      to a VM object region exists in any map when this strategy
 450  *      is used -- this means that shadow object creation can be
 451  *      delayed until a write operation occurs.  The symmetric (delayed)
 452  *      strategy allows multiple maps to have writeable references to
 453  *      the same region of a vm object, and hence cannot delay creating
 454  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 455  *      Copying of permanent objects is completely different; see
 456  *      vm_object_copy_strategically() in vm_object.c.
 457  */
 458
 459 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 460 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 461 zone_t  vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 462                                          * allocations */
 463 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 464 zone_t          vm_map_holes_zone;      /* zone for vm map holes (vm_map_links) structures */
 465
 466
 467 /*
 468  *      Placeholder object for submap operations.  This object is dropped
 469  *      into the range by a call to vm_map_find, and removed when
 470  *      vm_map_submap creates the submap.
 471  */
 472
 473 vm_object_t     vm_submap_object;
 474
 475 static void             *map_data;
 476 static vm_size_t        map_data_size;
 477 static void             *kentry_data;
 478 static vm_size_t        kentry_data_size;
 479 static void             *map_holes_data;
 480 static vm_size_t        map_holes_data_size;
 481
 482 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 483
 484 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 485 unsigned int not_in_kdp = 1;
 486
 487 unsigned int vm_map_set_cache_attr_count = 0;
 488
 489 kern_return_t
 490 vm_map_set_cache_attr(
 491         vm_map_t        map,
 492         vm_map_offset_t va)
 493 {
 494         vm_map_entry_t  map_entry;
 495         vm_object_t     object;
 496         kern_return_t   kr = KERN_SUCCESS;
 497
 498         vm_map_lock_read(map);
 499
 500         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 501             map_entry->is_sub_map) {
 502                 /*
 503                  * that memory is not properly mapped
 504                  */
 505                 kr = KERN_INVALID_ARGUMENT;
 506                 goto done;
 507         }
 508         object = VME_OBJECT(map_entry);
 509
 510         if (object == VM_OBJECT_NULL) {
 511                 /*
 512                  * there should be a VM object here at this point
 513                  */
 514                 kr = KERN_INVALID_ARGUMENT;
 515                 goto done;
 516         }
 517         vm_object_lock(object);
 518         object->set_cache_attr = TRUE;
 519         vm_object_unlock(object);
 520
 521         vm_map_set_cache_attr_count++;
 522 done:
 523         vm_map_unlock_read(map);
 524
 525         return kr;
 526 }
 527
 528
 529 #if CONFIG_CODE_DECRYPTION
 530 /*
 531  * vm_map_apple_protected:
 532  * This remaps the requested part of the object with an object backed by
 533  * the decrypting pager.
 534  * crypt_info contains entry points and session data for the crypt module.
 535  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 536  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 537  */
 538 kern_return_t
 539 vm_map_apple_protected(
 540         vm_map_t                map,
 541         vm_map_offset_t         start,
 542         vm_map_offset_t         end,
 543         vm_object_offset_t      crypto_backing_offset,
 544         struct pager_crypt_info *crypt_info)
 545 {
 546         boolean_t       map_locked;
 547         kern_return_t   kr;
 548         vm_map_entry_t  map_entry;
 549         struct vm_map_entry tmp_entry;
 550         memory_object_t unprotected_mem_obj;
 551         vm_object_t     protected_object;
 552         vm_map_offset_t map_addr;
 553         vm_map_offset_t start_aligned, end_aligned;
 554         vm_object_offset_t      crypto_start, crypto_end;
 555         int             vm_flags;
 556
 557         map_locked = FALSE;
 558         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 559
 560         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 561         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 562         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 563         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 564
 565         assert(start_aligned == start);
 566         assert(end_aligned == end);
 567
 568         map_addr = start_aligned;
 569         for (map_addr = start_aligned;
 570              map_addr < end;
 571              map_addr = tmp_entry.vme_end) {
 572                 vm_map_lock(map);
 573                 map_locked = TRUE;
 574
 575                 /* lookup the protected VM object */
 576                 if (!vm_map_lookup_entry(map,
 577                                          map_addr,
 578                                          &map_entry) ||
 579                     map_entry->is_sub_map ||
 580                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 581                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 582                         /* that memory is not properly mapped */
 583                         kr = KERN_INVALID_ARGUMENT;
 584                         goto done;
 585                 }
 586
 587                 /* get the protected object to be decrypted */
 588                 protected_object = VME_OBJECT(map_entry);
 589                 if (protected_object == VM_OBJECT_NULL) {
 590                         /* there should be a VM object here at this point */
 591                         kr = KERN_INVALID_ARGUMENT;
 592                         goto done;
 593                 }
 594                 /* ensure protected object stays alive while map is unlocked */
 595                 vm_object_reference(protected_object);
 596
 597                 /* limit the map entry to the area we want to cover */
 598                 vm_map_clip_start(map, map_entry, start_aligned);
 599                 vm_map_clip_end(map, map_entry, end_aligned);
 600
 601                 tmp_entry = *map_entry;
 602                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 603                 vm_map_unlock(map);
 604                 map_locked = FALSE;
 605
 606                 /*
 607                  * This map entry might be only partially encrypted
 608                  * (if not fully "page-aligned").
 609                  */
 610                 crypto_start = 0;
 611                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 612                 if (tmp_entry.vme_start < start) {
 613                         if (tmp_entry.vme_start != start_aligned) {
 614                                 kr = KERN_INVALID_ADDRESS;
 615                         }
 616                         crypto_start += (start - tmp_entry.vme_start);
 617                 }
 618                 if (tmp_entry.vme_end > end) {
 619                         if (tmp_entry.vme_end != end_aligned) {
 620                                 kr = KERN_INVALID_ADDRESS;
 621                         }
 622                         crypto_end -= (tmp_entry.vme_end - end);
 623                 }
 624
 625                 /*
 626                  * This "extra backing offset" is needed to get the decryption
 627                  * routine to use the right key.  It adjusts for the possibly
 628                  * relative offset of an interposed "4K" pager...
 629                  */
 630                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 631                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 632                 }
 633
 634                 /*
 635                  * Lookup (and create if necessary) the protected memory object
 636                  * matching that VM object.
 637                  * If successful, this also grabs a reference on the memory object,
 638                  * to guarantee that it doesn't go away before we get a chance to map
 639                  * it.
 640                  */
 641                 unprotected_mem_obj = apple_protect_pager_setup(
 642                         protected_object,
 643                         VME_OFFSET(&tmp_entry),
 644                         crypto_backing_offset,
 645                         crypt_info,
 646                         crypto_start,
 647                         crypto_end);
 648
 649                 /* release extra ref on protected object */
 650                 vm_object_deallocate(protected_object);
 651
 652                 if (unprotected_mem_obj == NULL) {
 653                         kr = KERN_FAILURE;
 654                         goto done;
 655                 }
 656
 657                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 658
 659                 /* map this memory object in place of the current one */
 660                 map_addr = tmp_entry.vme_start;
 661                 kr = vm_map_enter_mem_object(map,
 662                                              &map_addr,
 663                                              (tmp_entry.vme_end -
 664                                               tmp_entry.vme_start),
 665                                              (mach_vm_offset_t) 0,
 666                                              vm_flags,
 667                                              (ipc_port_t) unprotected_mem_obj,
 668                                              0,
 669                                              TRUE,
 670                                              tmp_entry.protection,
 671                                              tmp_entry.max_protection,
 672                                              tmp_entry.inheritance);
 673                 assert(kr == KERN_SUCCESS);
 674                 assert(map_addr == tmp_entry.vme_start);
 675
 676 #if VM_MAP_DEBUG_APPLE_PROTECT
 677                 if (vm_map_debug_apple_protect) {
 678                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 679                                " backing:[object:%p,offset:0x%llx,"
 680                                "crypto_backing_offset:0x%llx,"
 681                                "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 682                                map,
 683                                (uint64_t) map_addr,
 684                                (uint64_t) (map_addr + (tmp_entry.vme_end -
 685                                                        tmp_entry.vme_start)),
 686                                unprotected_mem_obj,
 687                                protected_object,
 688                                VME_OFFSET(&tmp_entry),
 689                                crypto_backing_offset,
 690                                crypto_start,
 691                                crypto_end);
 692                 }
 693 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 694
 695                 /*
 696                  * Release the reference obtained by
 697                  * apple_protect_pager_setup().
 698                  * The mapping (if it succeeded) is now holding a reference on
 699                  * the memory object.
 700                  */
 701                 memory_object_deallocate(unprotected_mem_obj);
 702                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 703
 704                 /* continue with next map entry */
 705                 crypto_backing_offset += (tmp_entry.vme_end -
 706                                           tmp_entry.vme_start);
 707                 crypto_backing_offset -= crypto_start;
 708         }
 709         kr = KERN_SUCCESS;
 710
 711 done:
 712         if (map_locked) {
 713                 vm_map_unlock(map);
 714         }
 715         return kr;
 716 }
 717 #endif  /* CONFIG_CODE_DECRYPTION */
 718
 719
 720 lck_grp_t               vm_map_lck_grp;
 721 lck_grp_attr_t  vm_map_lck_grp_attr;
 722 lck_attr_t              vm_map_lck_attr;
 723 lck_attr_t              vm_map_lck_rw_attr;
 724
 725
 726 /*
 727  *      vm_map_init:
 728  *
 729  *      Initialize the vm_map module.  Must be called before
 730  *      any other vm_map routines.
 731  *
 732  *      Map and entry structures are allocated from zones -- we must
 733  *      initialize those zones.
 734  *
 735  *      There are three zones of interest:
 736  *
 737  *      vm_map_zone:            used to allocate maps.
 738  *      vm_map_entry_zone:      used to allocate map entries.
 739  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 740  *
 741  *      The kernel allocates map entries from a special zone that is initially
 742  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 743  *      the kernel to allocate more memory to a entry zone when it became
 744  *      empty since the very act of allocating memory implies the creation
 745  *      of a new entry.
 746  */
 747 void
 748 vm_map_init(
 749         void)
 750 {
 751         vm_size_t entry_zone_alloc_size;
 752         const char *mez_name = "VM map entries";
 753
 754         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 755                             PAGE_SIZE, "maps");
 756         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 757 #if     defined(__LP64__)
 758         entry_zone_alloc_size = PAGE_SIZE * 5;
 759 #else
 760         entry_zone_alloc_size = PAGE_SIZE * 6;
 761 #endif
 762         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 763                                   1024*1024, entry_zone_alloc_size,
 764                                   mez_name);
 765         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 766         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 767         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 768
 769         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 770                                    kentry_data_size * 64, kentry_data_size,
 771                                    "Reserved VM map entries");
 772         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 773
 774         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 775                                  16*1024, PAGE_SIZE, "VM map copies");
 776         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 777
 778         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 779                                  16*1024, PAGE_SIZE, "VM map holes");
 780         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 781
 782         /*
 783          *      Cram the map and kentry zones with initial data.
 784          *      Set reserved_zone non-collectible to aid zone_gc().
 785          */
 786         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 787         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 788
 789         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 790         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 791         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 792         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 793         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 794         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 795         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 796
 797         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 798         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 799         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 800         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 801         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 802         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 803
 804         /*
 805          * Add the stolen memory to zones, adjust zone size and stolen counts.
 806          */
 807         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 808         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 809         zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
 810         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 811
 812         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 813         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 814         lck_attr_setdefault(&vm_map_lck_attr);
 815
 816         lck_attr_setdefault(&vm_map_lck_rw_attr);
 817         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 818
 819 #if VM_MAP_DEBUG_APPLE_PROTECT
 820         PE_parse_boot_argn("vm_map_debug_apple_protect",
 821                            &vm_map_debug_apple_protect,
 822                            sizeof(vm_map_debug_apple_protect));
 823 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 824 #if VM_MAP_DEBUG_APPLE_FOURK
 825         PE_parse_boot_argn("vm_map_debug_fourk",
 826                            &vm_map_debug_fourk,
 827                            sizeof(vm_map_debug_fourk));
 828 #endif /* VM_MAP_DEBUG_FOURK */
 829 }
 830
 831 void
 832 vm_map_steal_memory(
 833         void)
 834 {
 835         uint32_t kentry_initial_pages;
 836
 837         map_data_size = round_page(10 * sizeof(struct _vm_map));
 838         map_data = pmap_steal_memory(map_data_size);
 839
 840         /*
 841          * kentry_initial_pages corresponds to the number of kernel map entries
 842          * required during bootstrap until the asynchronous replenishment
 843          * scheme is activated and/or entries are available from the general
 844          * map entry pool.
 845          */
 846 #if     defined(__LP64__)
 847         kentry_initial_pages = 10;
 848 #else
 849         kentry_initial_pages = 6;
 850 #endif
 851
 852 #if CONFIG_GZALLOC
 853         /* If using the guard allocator, reserve more memory for the kernel
 854          * reserved map entry pool.
 855         */
 856         if (gzalloc_enabled())
 857                 kentry_initial_pages *= 1024;
 858 #endif
 859
 860         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 861         kentry_data = pmap_steal_memory(kentry_data_size);
 862
 863         map_holes_data_size = kentry_data_size;
 864         map_holes_data = pmap_steal_memory(map_holes_data_size);
 865 }
 866
 867 void
 868 vm_kernel_reserved_entry_init(void) {
 869         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 870         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
 871 }
 872
 873 void
 874 vm_map_disable_hole_optimization(vm_map_t map)
 875 {
 876         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
 877
 878         if (map->holelistenabled) {
 879
 880                 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
 881
 882                 while (hole_entry != NULL) {
 883
 884                         next_hole_entry = hole_entry->vme_next;
 885
 886                         hole_entry->vme_next = NULL;
 887                         hole_entry->vme_prev = NULL;
 888                         zfree(vm_map_holes_zone, hole_entry);
 889
 890                         if (next_hole_entry == head_entry) {
 891                                 hole_entry = NULL;
 892                         } else {
 893                                 hole_entry = next_hole_entry;
 894                         }
 895                 }
 896
 897                 map->holes_list = NULL;
 898                 map->holelistenabled = FALSE;
 899
 900                 map->first_free = vm_map_first_entry(map);
 901                 SAVE_HINT_HOLE_WRITE(map, NULL);
 902         }
 903 }
 904
 905 boolean_t
 906 vm_kernel_map_is_kernel(vm_map_t map) {
 907         return (map->pmap == kernel_pmap);
 908 }
 909
 910 /*
 911  *      vm_map_create:
 912  *
 913  *      Creates and returns a new empty VM map with
 914  *      the given physical map structure, and having
 915  *      the given lower and upper address bounds.
 916  */
 917
 918 boolean_t vm_map_supports_hole_optimization = TRUE;
 919
 920 vm_map_t
 921 vm_map_create(
 922         pmap_t                  pmap,
 923         vm_map_offset_t min,
 924         vm_map_offset_t max,
 925         boolean_t               pageable)
 926 {
 927         static int              color_seed = 0;
 928         vm_map_t        result;
 929         struct vm_map_links     *hole_entry = NULL;
 930
 931         result = (vm_map_t) zalloc(vm_map_zone);
 932         if (result == VM_MAP_NULL)
 933                 panic("vm_map_create");
 934
 935         vm_map_first_entry(result) = vm_map_to_entry(result);
 936         vm_map_last_entry(result)  = vm_map_to_entry(result);
 937         result->hdr.nentries = 0;
 938         result->hdr.entries_pageable = pageable;
 939
 940         vm_map_store_init( &(result->hdr) );
 941
 942         result->hdr.page_shift = PAGE_SHIFT;
 943
 944         result->size = 0;
 945         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 946         result->user_wire_size  = 0;
 947         result->ref_count = 1;
 948 #if     TASK_SWAPPER
 949         result->res_count = 1;
 950         result->sw_state = MAP_SW_IN;
 951 #endif  /* TASK_SWAPPER */
 952         result->pmap = pmap;
 953         result->min_offset = min;
 954         result->max_offset = max;
 955         result->wiring_required = FALSE;
 956         result->no_zero_fill = FALSE;
 957         result->mapped_in_other_pmaps = FALSE;
 958         result->wait_for_space = FALSE;
 959         result->switch_protect = FALSE;
 960         result->disable_vmentry_reuse = FALSE;
 961         result->map_disallow_data_exec = FALSE;
 962         result->is_nested_map = FALSE;
 963         result->highest_entry_end = 0;
 964         result->first_free = vm_map_to_entry(result);
 965         result->hint = vm_map_to_entry(result);
 966         result->color_rr = (color_seed++) & vm_color_mask;
 967         result->jit_entry_exists = FALSE;
 968
 969         if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
 970                 hole_entry = zalloc(vm_map_holes_zone);
 971
 972                 hole_entry->start = min;
 973                 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
 974                 result->holes_list = result->hole_hint = hole_entry;
 975                 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
 976                 result->holelistenabled = TRUE;
 977
 978         } else {
 979
 980                 result->holelistenabled = FALSE;
 981         }
 982
 983         vm_map_lock_init(result);
 984         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 985
 986         return(result);
 987 }
 988
 989 /*
 990  *      vm_map_entry_create:    [ internal use only ]
 991  *
 992  *      Allocates a VM map entry for insertion in the
 993  *      given map (or map copy).  No fields are filled.
 994  */
 995 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 996
 997 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 998         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 999 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1000
1001 static vm_map_entry_t
1002 _vm_map_entry_create(
1003         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1004 {
1005         zone_t  zone;
1006         vm_map_entry_t  entry;
1007
1008         zone = vm_map_entry_zone;
1009
1010         assert(map_header->entries_pageable ? !map_locked : TRUE);
1011
1012         if (map_header->entries_pageable) {
1013                 entry = (vm_map_entry_t) zalloc(zone);
1014         }
1015         else {
1016                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1017
1018                 if (entry == VM_MAP_ENTRY_NULL) {
1019                         zone = vm_map_entry_reserved_zone;
1020                         entry = (vm_map_entry_t) zalloc(zone);
1021                         OSAddAtomic(1, &reserved_zalloc_count);
1022                 } else
1023                         OSAddAtomic(1, &nonreserved_zalloc_count);
1024         }
1025
1026         if (entry == VM_MAP_ENTRY_NULL)
1027                 panic("vm_map_entry_create");
1028         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1029
1030         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1031 #if     MAP_ENTRY_CREATION_DEBUG
1032         entry->vme_creation_maphdr = map_header;
1033         backtrace(&entry->vme_creation_bt[0],
1034                   (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1035 #endif
1036         return(entry);
1037 }
1038
1039 /*
1040  *      vm_map_entry_dispose:   [ internal use only ]
1041  *
1042  *      Inverse of vm_map_entry_create.
1043  *
1044  *      write map lock held so no need to
1045  *      do anything special to insure correctness
1046  *      of the stores
1047  */
1048 #define vm_map_entry_dispose(map, entry)                        \
1049         _vm_map_entry_dispose(&(map)->hdr, (entry))
1050
1051 #define vm_map_copy_entry_dispose(map, entry) \
1052         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1053
1054 static void
1055 _vm_map_entry_dispose(
1056         struct vm_map_header    *map_header,
1057         vm_map_entry_t          entry)
1058 {
1059         zone_t          zone;
1060
1061         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1062                 zone = vm_map_entry_zone;
1063         else
1064                 zone = vm_map_entry_reserved_zone;
1065
1066         if (!map_header->entries_pageable) {
1067                 if (zone == vm_map_entry_zone)
1068                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1069                 else
1070                         OSAddAtomic(-1, &reserved_zalloc_count);
1071         }
1072
1073         zfree(zone, entry);
1074 }
1075
1076 #if MACH_ASSERT
1077 static boolean_t first_free_check = FALSE;
1078 boolean_t
1079 first_free_is_valid(
1080         vm_map_t        map)
1081 {
1082         if (!first_free_check)
1083                 return TRUE;
1084
1085         return( first_free_is_valid_store( map ));
1086 }
1087 #endif /* MACH_ASSERT */
1088
1089
1090 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1091         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1092
1093 #define vm_map_copy_entry_unlink(copy, entry)                           \
1094         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1095
1096 #if     MACH_ASSERT && TASK_SWAPPER
1097 /*
1098  *      vm_map_res_reference:
1099  *
1100  *      Adds another valid residence count to the given map.
1101  *
1102  *      Map is locked so this function can be called from
1103  *      vm_map_swapin.
1104  *
1105  */
1106 void vm_map_res_reference(vm_map_t map)
1107 {
1108         /* assert map is locked */
1109         assert(map->res_count >= 0);
1110         assert(map->ref_count >= map->res_count);
1111         if (map->res_count == 0) {
1112                 lck_mtx_unlock(&map->s_lock);
1113                 vm_map_lock(map);
1114                 vm_map_swapin(map);
1115                 lck_mtx_lock(&map->s_lock);
1116                 ++map->res_count;
1117                 vm_map_unlock(map);
1118         } else
1119                 ++map->res_count;
1120 }
1121
1122 /*
1123  *      vm_map_reference_swap:
1124  *
1125  *      Adds valid reference and residence counts to the given map.
1126  *
1127  *      The map may not be in memory (i.e. zero residence count).
1128  *
1129  */
1130 void vm_map_reference_swap(vm_map_t map)
1131 {
1132         assert(map != VM_MAP_NULL);
1133         lck_mtx_lock(&map->s_lock);
1134         assert(map->res_count >= 0);
1135         assert(map->ref_count >= map->res_count);
1136         map->ref_count++;
1137         vm_map_res_reference(map);
1138         lck_mtx_unlock(&map->s_lock);
1139 }
1140
1141 /*
1142  *      vm_map_res_deallocate:
1143  *
1144  *      Decrement residence count on a map; possibly causing swapout.
1145  *
1146  *      The map must be in memory (i.e. non-zero residence count).
1147  *
1148  *      The map is locked, so this function is callable from vm_map_deallocate.
1149  *
1150  */
1151 void vm_map_res_deallocate(vm_map_t map)
1152 {
1153         assert(map->res_count > 0);
1154         if (--map->res_count == 0) {
1155                 lck_mtx_unlock(&map->s_lock);
1156                 vm_map_lock(map);
1157                 vm_map_swapout(map);
1158                 vm_map_unlock(map);
1159                 lck_mtx_lock(&map->s_lock);
1160         }
1161         assert(map->ref_count >= map->res_count);
1162 }
1163 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1164
1165 /*
1166  *      vm_map_destroy:
1167  *
1168  *      Actually destroy a map.
1169  */
1170 void
1171 vm_map_destroy(
1172         vm_map_t        map,
1173         int             flags)
1174 {
1175         vm_map_lock(map);
1176
1177         /* final cleanup: no need to unnest shared region */
1178         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1179
1180         /* clean up regular map entries */
1181         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1182                              flags, VM_MAP_NULL);
1183         /* clean up leftover special mappings (commpage, etc...) */
1184         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1185                              flags, VM_MAP_NULL);
1186
1187         vm_map_disable_hole_optimization(map);
1188         vm_map_unlock(map);
1189
1190         assert(map->hdr.nentries == 0);
1191
1192         if(map->pmap)
1193                 pmap_destroy(map->pmap);
1194
1195         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1196                 /*
1197                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1198                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1199                  * structure or kalloc'ed via lck_mtx_init.
1200                  * An example is s_lock_ext within struct _vm_map.
1201                  *
1202                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1203                  * can add another tag to detect embedded vs alloc'ed indirect external
1204                  * mutexes but that'll be additional checks in the lock path and require
1205                  * updating dependencies for the old vs new tag.
1206                  *
1207                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1208                  * just when lock debugging is ON, we choose to forego explicitly destroying
1209                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1210                  * count on vm_map_lck_grp, which has no serious side-effect.
1211                  */
1212         } else {
1213                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1214                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1215         }
1216
1217         zfree(vm_map_zone, map);
1218 }
1219
1220 #if     TASK_SWAPPER
1221 /*
1222  * vm_map_swapin/vm_map_swapout
1223  *
1224  * Swap a map in and out, either referencing or releasing its resources.
1225  * These functions are internal use only; however, they must be exported
1226  * because they may be called from macros, which are exported.
1227  *
1228  * In the case of swapout, there could be races on the residence count,
1229  * so if the residence count is up, we return, assuming that a
1230  * vm_map_deallocate() call in the near future will bring us back.
1231  *
1232  * Locking:
1233  *      -- We use the map write lock for synchronization among races.
1234  *      -- The map write lock, and not the simple s_lock, protects the
1235  *         swap state of the map.
1236  *      -- If a map entry is a share map, then we hold both locks, in
1237  *         hierarchical order.
1238  *
1239  * Synchronization Notes:
1240  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1241  *      will block on the map lock and proceed when swapout is through.
1242  *      2) A vm_map_reference() call at this time is illegal, and will
1243  *      cause a panic.  vm_map_reference() is only allowed on resident
1244  *      maps, since it refuses to block.
1245  *      3) A vm_map_swapin() call during a swapin will block, and
1246  *      proceeed when the first swapin is done, turning into a nop.
1247  *      This is the reason the res_count is not incremented until
1248  *      after the swapin is complete.
1249  *      4) There is a timing hole after the checks of the res_count, before
1250  *      the map lock is taken, during which a swapin may get the lock
1251  *      before a swapout about to happen.  If this happens, the swapin
1252  *      will detect the state and increment the reference count, causing
1253  *      the swapout to be a nop, thereby delaying it until a later
1254  *      vm_map_deallocate.  If the swapout gets the lock first, then
1255  *      the swapin will simply block until the swapout is done, and
1256  *      then proceed.
1257  *
1258  * Because vm_map_swapin() is potentially an expensive operation, it
1259  * should be used with caution.
1260  *
1261  * Invariants:
1262  *      1) A map with a residence count of zero is either swapped, or
1263  *         being swapped.
1264  *      2) A map with a non-zero residence count is either resident,
1265  *         or being swapped in.
1266  */
1267
1268 int vm_map_swap_enable = 1;
1269
1270 void vm_map_swapin (vm_map_t map)
1271 {
1272         vm_map_entry_t entry;
1273
1274         if (!vm_map_swap_enable)        /* debug */
1275                 return;
1276
1277         /*
1278          * Map is locked
1279          * First deal with various races.
1280          */
1281         if (map->sw_state == MAP_SW_IN)
1282                 /*
1283                  * we raced with swapout and won.  Returning will incr.
1284                  * the res_count, turning the swapout into a nop.
1285                  */
1286                 return;
1287
1288         /*
1289          * The residence count must be zero.  If we raced with another
1290          * swapin, the state would have been IN; if we raced with a
1291          * swapout (after another competing swapin), we must have lost
1292          * the race to get here (see above comment), in which case
1293          * res_count is still 0.
1294          */
1295         assert(map->res_count == 0);
1296
1297         /*
1298          * There are no intermediate states of a map going out or
1299          * coming in, since the map is locked during the transition.
1300          */
1301         assert(map->sw_state == MAP_SW_OUT);
1302
1303         /*
1304          * We now operate upon each map entry.  If the entry is a sub-
1305          * or share-map, we call vm_map_res_reference upon it.
1306          * If the entry is an object, we call vm_object_res_reference
1307          * (this may iterate through the shadow chain).
1308          * Note that we hold the map locked the entire time,
1309          * even if we get back here via a recursive call in
1310          * vm_map_res_reference.
1311          */
1312         entry = vm_map_first_entry(map);
1313
1314         while (entry != vm_map_to_entry(map)) {
1315                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1316                         if (entry->is_sub_map) {
1317                                 vm_map_t lmap = VME_SUBMAP(entry);
1318                                 lck_mtx_lock(&lmap->s_lock);
1319                                 vm_map_res_reference(lmap);
1320                                 lck_mtx_unlock(&lmap->s_lock);
1321                         } else {
1322                                 vm_object_t object = VME_OBEJCT(entry);
1323                                 vm_object_lock(object);
1324                                 /*
1325                                  * This call may iterate through the
1326                                  * shadow chain.
1327                                  */
1328                                 vm_object_res_reference(object);
1329                                 vm_object_unlock(object);
1330                         }
1331                 }
1332                 entry = entry->vme_next;
1333         }
1334         assert(map->sw_state == MAP_SW_OUT);
1335         map->sw_state = MAP_SW_IN;
1336 }
1337
1338 void vm_map_swapout(vm_map_t map)
1339 {
1340         vm_map_entry_t entry;
1341
1342         /*
1343          * Map is locked
1344          * First deal with various races.
1345          * If we raced with a swapin and lost, the residence count
1346          * will have been incremented to 1, and we simply return.
1347          */
1348         lck_mtx_lock(&map->s_lock);
1349         if (map->res_count != 0) {
1350                 lck_mtx_unlock(&map->s_lock);
1351                 return;
1352         }
1353         lck_mtx_unlock(&map->s_lock);
1354
1355         /*
1356          * There are no intermediate states of a map going out or
1357          * coming in, since the map is locked during the transition.
1358          */
1359         assert(map->sw_state == MAP_SW_IN);
1360
1361         if (!vm_map_swap_enable)
1362                 return;
1363
1364         /*
1365          * We now operate upon each map entry.  If the entry is a sub-
1366          * or share-map, we call vm_map_res_deallocate upon it.
1367          * If the entry is an object, we call vm_object_res_deallocate
1368          * (this may iterate through the shadow chain).
1369          * Note that we hold the map locked the entire time,
1370          * even if we get back here via a recursive call in
1371          * vm_map_res_deallocate.
1372          */
1373         entry = vm_map_first_entry(map);
1374
1375         while (entry != vm_map_to_entry(map)) {
1376                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1377                         if (entry->is_sub_map) {
1378                                 vm_map_t lmap = VME_SUBMAP(entry);
1379                                 lck_mtx_lock(&lmap->s_lock);
1380                                 vm_map_res_deallocate(lmap);
1381                                 lck_mtx_unlock(&lmap->s_lock);
1382                         } else {
1383                                 vm_object_t object = VME_OBJECT(entry);
1384                                 vm_object_lock(object);
1385                                 /*
1386                                  * This call may take a long time,
1387                                  * since it could actively push
1388                                  * out pages (if we implement it
1389                                  * that way).
1390                                  */
1391                                 vm_object_res_deallocate(object);
1392                                 vm_object_unlock(object);
1393                         }
1394                 }
1395                 entry = entry->vme_next;
1396         }
1397         assert(map->sw_state == MAP_SW_IN);
1398         map->sw_state = MAP_SW_OUT;
1399 }
1400
1401 #endif  /* TASK_SWAPPER */
1402
1403 /*
1404  *      vm_map_lookup_entry:    [ internal use only ]
1405  *
1406  *      Calls into the vm map store layer to find the map
1407  *      entry containing (or immediately preceding) the
1408  *      specified address in the given map; the entry is returned
1409  *      in the "entry" parameter.  The boolean
1410  *      result indicates whether the address is
1411  *      actually contained in the map.
1412  */
1413 boolean_t
1414 vm_map_lookup_entry(
1415         vm_map_t                map,
1416         vm_map_offset_t address,
1417         vm_map_entry_t          *entry)         /* OUT */
1418 {
1419         return ( vm_map_store_lookup_entry( map, address, entry ));
1420 }
1421
1422 /*
1423  *      Routine:        vm_map_find_space
1424  *      Purpose:
1425  *              Allocate a range in the specified virtual address map,
1426  *              returning the entry allocated for that range.
1427  *              Used by kmem_alloc, etc.
1428  *
1429  *              The map must be NOT be locked. It will be returned locked
1430  *              on KERN_SUCCESS, unlocked on failure.
1431  *
1432  *              If an entry is allocated, the object/offset fields
1433  *              are initialized to zero.
1434  */
1435 kern_return_t
1436 vm_map_find_space(
1437         vm_map_t        map,
1438         vm_map_offset_t         *address,       /* OUT */
1439         vm_map_size_t           size,
1440         vm_map_offset_t         mask,
1441         int                     flags,
1442         vm_map_entry_t          *o_entry)       /* OUT */
1443 {
1444         vm_map_entry_t                  entry, new_entry;
1445         vm_map_offset_t start;
1446         vm_map_offset_t end;
1447         vm_map_entry_t                  hole_entry;
1448
1449         if (size == 0) {
1450                 *address = 0;
1451                 return KERN_INVALID_ARGUMENT;
1452         }
1453
1454         if (flags & VM_FLAGS_GUARD_AFTER) {
1455                 /* account for the back guard page in the size */
1456                 size += VM_MAP_PAGE_SIZE(map);
1457         }
1458
1459         new_entry = vm_map_entry_create(map, FALSE);
1460
1461         /*
1462          *      Look for the first possible address; if there's already
1463          *      something at this address, we have to start after it.
1464          */
1465
1466         vm_map_lock(map);
1467
1468         if( map->disable_vmentry_reuse == TRUE) {
1469                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1470         } else {
1471                 if (map->holelistenabled) {
1472                         hole_entry = (vm_map_entry_t)map->holes_list;
1473
1474                         if (hole_entry == NULL) {
1475                                 /*
1476                                  * No more space in the map?
1477                                  */
1478                                 vm_map_entry_dispose(map, new_entry);
1479                                 vm_map_unlock(map);
1480                                 return(KERN_NO_SPACE);
1481                         }
1482
1483                         entry = hole_entry;
1484                         start = entry->vme_start;
1485                 } else {
1486                         assert(first_free_is_valid(map));
1487                         if ((entry = map->first_free) == vm_map_to_entry(map))
1488                                 start = map->min_offset;
1489                         else
1490                                 start = entry->vme_end;
1491                 }
1492         }
1493
1494         /*
1495          *      In any case, the "entry" always precedes
1496          *      the proposed new region throughout the loop:
1497          */
1498
1499         while (TRUE) {
1500                 vm_map_entry_t  next;
1501
1502                 /*
1503                  *      Find the end of the proposed new region.
1504                  *      Be sure we didn't go beyond the end, or
1505                  *      wrap around the address.
1506                  */
1507
1508                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1509                         /* reserve space for the front guard page */
1510                         start += VM_MAP_PAGE_SIZE(map);
1511                 }
1512                 end = ((start + mask) & ~mask);
1513
1514                 if (end < start) {
1515                         vm_map_entry_dispose(map, new_entry);
1516                         vm_map_unlock(map);
1517                         return(KERN_NO_SPACE);
1518                 }
1519                 start = end;
1520                 end += size;
1521
1522                 if ((end > map->max_offset) || (end < start)) {
1523                         vm_map_entry_dispose(map, new_entry);
1524                         vm_map_unlock(map);
1525                         return(KERN_NO_SPACE);
1526                 }
1527
1528                 next = entry->vme_next;
1529
1530                 if (map->holelistenabled) {
1531                         if (entry->vme_end >= end)
1532                                 break;
1533                 } else {
1534                         /*
1535                          *      If there are no more entries, we must win.
1536                          *
1537                          *      OR
1538                          *
1539                          *      If there is another entry, it must be
1540                          *      after the end of the potential new region.
1541                          */
1542
1543                         if (next == vm_map_to_entry(map))
1544                                 break;
1545
1546                         if (next->vme_start >= end)
1547                                 break;
1548                 }
1549
1550                 /*
1551                  *      Didn't fit -- move to the next entry.
1552                  */
1553
1554                 entry = next;
1555
1556                 if (map->holelistenabled) {
1557                         if (entry == (vm_map_entry_t) map->holes_list) {
1558                                 /*
1559                                  * Wrapped around
1560                                  */
1561                                 vm_map_entry_dispose(map, new_entry);
1562                                 vm_map_unlock(map);
1563                                 return(KERN_NO_SPACE);
1564                         }
1565                         start = entry->vme_start;
1566                 } else {
1567                         start = entry->vme_end;
1568                 }
1569         }
1570
1571         if (map->holelistenabled) {
1572                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1573                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1574                 }
1575         }
1576
1577         /*
1578          *      At this point,
1579          *              "start" and "end" should define the endpoints of the
1580          *                      available new range, and
1581          *              "entry" should refer to the region before the new
1582          *                      range, and
1583          *
1584          *              the map should be locked.
1585          */
1586
1587         if (flags & VM_FLAGS_GUARD_BEFORE) {
1588                 /* go back for the front guard page */
1589                 start -= VM_MAP_PAGE_SIZE(map);
1590         }
1591         *address = start;
1592
1593         assert(start < end);
1594         new_entry->vme_start = start;
1595         new_entry->vme_end = end;
1596         assert(page_aligned(new_entry->vme_start));
1597         assert(page_aligned(new_entry->vme_end));
1598         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1599                                    VM_MAP_PAGE_MASK(map)));
1600         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1601                                    VM_MAP_PAGE_MASK(map)));
1602
1603         new_entry->is_shared = FALSE;
1604         new_entry->is_sub_map = FALSE;
1605         new_entry->use_pmap = TRUE;
1606         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1607         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1608
1609         new_entry->needs_copy = FALSE;
1610
1611         new_entry->inheritance = VM_INHERIT_DEFAULT;
1612         new_entry->protection = VM_PROT_DEFAULT;
1613         new_entry->max_protection = VM_PROT_ALL;
1614         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1615         new_entry->wired_count = 0;
1616         new_entry->user_wired_count = 0;
1617
1618         new_entry->in_transition = FALSE;
1619         new_entry->needs_wakeup = FALSE;
1620         new_entry->no_cache = FALSE;
1621         new_entry->permanent = FALSE;
1622         new_entry->superpage_size = FALSE;
1623         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1624                 new_entry->map_aligned = TRUE;
1625         } else {
1626                 new_entry->map_aligned = FALSE;
1627         }
1628
1629         new_entry->used_for_jit = FALSE;
1630         new_entry->zero_wired_pages = FALSE;
1631         new_entry->iokit_acct = FALSE;
1632         new_entry->vme_resilient_codesign = FALSE;
1633         new_entry->vme_resilient_media = FALSE;
1634         if (flags & VM_FLAGS_ATOMIC_ENTRY)
1635                 new_entry->vme_atomic = TRUE;
1636         else
1637                 new_entry->vme_atomic = FALSE;
1638
1639         int alias;
1640         VM_GET_FLAGS_ALIAS(flags, alias);
1641         VME_ALIAS_SET(new_entry, alias);
1642
1643         /*
1644          *      Insert the new entry into the list
1645          */
1646
1647         vm_map_store_entry_link(map, entry, new_entry);
1648
1649         map->size += size;
1650
1651         /*
1652          *      Update the lookup hint
1653          */
1654         SAVE_HINT_MAP_WRITE(map, new_entry);
1655
1656         *o_entry = new_entry;
1657         return(KERN_SUCCESS);
1658 }
1659
1660 int vm_map_pmap_enter_print = FALSE;
1661 int vm_map_pmap_enter_enable = FALSE;
1662
1663 /*
1664  *      Routine:        vm_map_pmap_enter [internal only]
1665  *
1666  *      Description:
1667  *              Force pages from the specified object to be entered into
1668  *              the pmap at the specified address if they are present.
1669  *              As soon as a page not found in the object the scan ends.
1670  *
1671  *      Returns:
1672  *              Nothing.
1673  *
1674  *      In/out conditions:
1675  *              The source map should not be locked on entry.
1676  */
1677 __unused static void
1678 vm_map_pmap_enter(
1679         vm_map_t                map,
1680         vm_map_offset_t         addr,
1681         vm_map_offset_t         end_addr,
1682         vm_object_t             object,
1683         vm_object_offset_t      offset,
1684         vm_prot_t               protection)
1685 {
1686         int                     type_of_fault;
1687         kern_return_t           kr;
1688
1689         if(map->pmap == 0)
1690                 return;
1691
1692         while (addr < end_addr) {
1693                 vm_page_t       m;
1694
1695
1696                 /*
1697                  * TODO:
1698                  * From vm_map_enter(), we come into this function without the map
1699                  * lock held or the object lock held.
1700                  * We haven't taken a reference on the object either.
1701                  * We should do a proper lookup on the map to make sure
1702                  * that things are sane before we go locking objects that
1703                  * could have been deallocated from under us.
1704                  */
1705
1706                 vm_object_lock(object);
1707
1708                 m = vm_page_lookup(object, offset);
1709                 /*
1710                  * ENCRYPTED SWAP:
1711                  * The user should never see encrypted data, so do not
1712                  * enter an encrypted page in the page table.
1713                  */
1714                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1715                     m->fictitious ||
1716                     (m->unusual && ( m->error || m->restart || m->absent))) {
1717                         vm_object_unlock(object);
1718                         return;
1719                 }
1720
1721                 if (vm_map_pmap_enter_print) {
1722                         printf("vm_map_pmap_enter:");
1723                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1724                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1725                 }
1726                 type_of_fault = DBG_CACHE_HIT_FAULT;
1727                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1728                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1729                                     0, /* XXX need user tag / alias? */
1730                                     0, /* alternate accounting? */
1731                                     NULL,
1732                                     &type_of_fault);
1733
1734                 vm_object_unlock(object);
1735
1736                 offset += PAGE_SIZE_64;
1737                 addr += PAGE_SIZE;
1738         }
1739 }
1740
1741 boolean_t vm_map_pmap_is_empty(
1742         vm_map_t        map,
1743         vm_map_offset_t start,
1744         vm_map_offset_t end);
1745 boolean_t vm_map_pmap_is_empty(
1746         vm_map_t        map,
1747         vm_map_offset_t start,
1748         vm_map_offset_t end)
1749 {
1750 #ifdef MACHINE_PMAP_IS_EMPTY
1751         return pmap_is_empty(map->pmap, start, end);
1752 #else   /* MACHINE_PMAP_IS_EMPTY */
1753         vm_map_offset_t offset;
1754         ppnum_t         phys_page;
1755
1756         if (map->pmap == NULL) {
1757                 return TRUE;
1758         }
1759
1760         for (offset = start;
1761              offset < end;
1762              offset += PAGE_SIZE) {
1763                 phys_page = pmap_find_phys(map->pmap, offset);
1764                 if (phys_page) {
1765                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1766                                 "page %d at 0x%llx\n",
1767                                 map, (long long)start, (long long)end,
1768                                 phys_page, (long long)offset);
1769                         return FALSE;
1770                 }
1771         }
1772         return TRUE;
1773 #endif  /* MACHINE_PMAP_IS_EMPTY */
1774 }
1775
1776 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1777 kern_return_t
1778 vm_map_random_address_for_size(
1779         vm_map_t        map,
1780         vm_map_offset_t *address,
1781         vm_map_size_t   size)
1782 {
1783         kern_return_t   kr = KERN_SUCCESS;
1784         int             tries = 0;
1785         vm_map_offset_t random_addr = 0;
1786         vm_map_offset_t hole_end;
1787
1788         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1789         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1790         vm_map_size_t   vm_hole_size = 0;
1791         vm_map_size_t   addr_space_size;
1792
1793         addr_space_size = vm_map_max(map) - vm_map_min(map);
1794
1795         assert(page_aligned(size));
1796
1797         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1798                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1799                 random_addr = vm_map_trunc_page(
1800                         vm_map_min(map) +(random_addr % addr_space_size),
1801                         VM_MAP_PAGE_MASK(map));
1802
1803                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1804                         if (prev_entry == vm_map_to_entry(map)) {
1805                                 next_entry = vm_map_first_entry(map);
1806                         } else {
1807                                 next_entry = prev_entry->vme_next;
1808                         }
1809                         if (next_entry == vm_map_to_entry(map)) {
1810                                 hole_end = vm_map_max(map);
1811                         } else {
1812                                 hole_end = next_entry->vme_start;
1813                         }
1814                         vm_hole_size = hole_end - random_addr;
1815                         if (vm_hole_size >= size) {
1816                                 *address = random_addr;
1817                                 break;
1818                         }
1819                 }
1820                 tries++;
1821         }
1822
1823         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1824                 kr = KERN_NO_SPACE;
1825         }
1826         return kr;
1827 }
1828
1829 /*
1830  *      Routine:        vm_map_enter
1831  *
1832  *      Description:
1833  *              Allocate a range in the specified virtual address map.
1834  *              The resulting range will refer to memory defined by
1835  *              the given memory object and offset into that object.
1836  *
1837  *              Arguments are as defined in the vm_map call.
1838  */
1839 int _map_enter_debug = 0;
1840 static unsigned int vm_map_enter_restore_successes = 0;
1841 static unsigned int vm_map_enter_restore_failures = 0;
1842 kern_return_t
1843 vm_map_enter(
1844         vm_map_t                map,
1845         vm_map_offset_t         *address,       /* IN/OUT */
1846         vm_map_size_t           size,
1847         vm_map_offset_t         mask,
1848         int                     flags,
1849         vm_object_t             object,
1850         vm_object_offset_t      offset,
1851         boolean_t               needs_copy,
1852         vm_prot_t               cur_protection,
1853         vm_prot_t               max_protection,
1854         vm_inherit_t            inheritance)
1855 {
1856         vm_map_entry_t          entry, new_entry;
1857         vm_map_offset_t         start, tmp_start, tmp_offset;
1858         vm_map_offset_t         end, tmp_end;
1859         vm_map_offset_t         tmp2_start, tmp2_end;
1860         vm_map_offset_t         step;
1861         kern_return_t           result = KERN_SUCCESS;
1862         vm_map_t                zap_old_map = VM_MAP_NULL;
1863         vm_map_t                zap_new_map = VM_MAP_NULL;
1864         boolean_t               map_locked = FALSE;
1865         boolean_t               pmap_empty = TRUE;
1866         boolean_t               new_mapping_established = FALSE;
1867         boolean_t               keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1868         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1869         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1870         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1871         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1872         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1873         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1874         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1875         boolean_t               iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1876         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1877         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1878         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
1879         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1880         vm_tag_t                alias, user_alias;
1881         vm_map_offset_t         effective_min_offset, effective_max_offset;
1882         kern_return_t           kr;
1883         boolean_t               clear_map_aligned = FALSE;
1884         vm_map_entry_t          hole_entry;
1885
1886         if (superpage_size) {
1887                 switch (superpage_size) {
1888                         /*
1889                          * Note that the current implementation only supports
1890                          * a single size for superpages, SUPERPAGE_SIZE, per
1891                          * architecture. As soon as more sizes are supposed
1892                          * to be supported, SUPERPAGE_SIZE has to be replaced
1893                          * with a lookup of the size depending on superpage_size.
1894                          */
1895 #ifdef __x86_64__
1896                         case SUPERPAGE_SIZE_ANY:
1897                                 /* handle it like 2 MB and round up to page size */
1898                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1899                         case SUPERPAGE_SIZE_2MB:
1900                                 break;
1901 #endif
1902                         default:
1903                                 return KERN_INVALID_ARGUMENT;
1904                 }
1905                 mask = SUPERPAGE_SIZE-1;
1906                 if (size & (SUPERPAGE_SIZE-1))
1907                         return KERN_INVALID_ARGUMENT;
1908                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1909         }
1910
1911
1912
1913         if (resilient_codesign || resilient_media) {
1914                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1915                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1916                         return KERN_PROTECTION_FAILURE;
1917                 }
1918         }
1919
1920         if (is_submap) {
1921                 if (purgable) {
1922                         /* submaps can not be purgeable */
1923                         return KERN_INVALID_ARGUMENT;
1924                 }
1925                 if (object == VM_OBJECT_NULL) {
1926                         /* submaps can not be created lazily */
1927                         return KERN_INVALID_ARGUMENT;
1928                 }
1929         }
1930         if (flags & VM_FLAGS_ALREADY) {
1931                 /*
1932                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1933                  * is already present.  For it to be meaningul, the requested
1934                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1935                  * we shouldn't try and remove what was mapped there first
1936                  * (!VM_FLAGS_OVERWRITE).
1937                  */
1938                 if ((flags & VM_FLAGS_ANYWHERE) ||
1939                     (flags & VM_FLAGS_OVERWRITE)) {
1940                         return KERN_INVALID_ARGUMENT;
1941                 }
1942         }
1943
1944         effective_min_offset = map->min_offset;
1945
1946         if (flags & VM_FLAGS_BEYOND_MAX) {
1947                 /*
1948                  * Allow an insertion beyond the map's max offset.
1949                  */
1950                 if (vm_map_is_64bit(map))
1951                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1952                 else
1953                         effective_max_offset = 0x00000000FFFFF000ULL;
1954         } else {
1955                 effective_max_offset = map->max_offset;
1956         }
1957
1958         if (size == 0 ||
1959             (offset & PAGE_MASK_64) != 0) {
1960                 *address = 0;
1961                 return KERN_INVALID_ARGUMENT;
1962         }
1963
1964         VM_GET_FLAGS_ALIAS(flags, alias);
1965         if (map->pmap == kernel_pmap) {
1966                 user_alias = VM_KERN_MEMORY_NONE;
1967         } else {
1968                 user_alias = alias;
1969         }
1970
1971 #define RETURN(value)   { result = value; goto BailOut; }
1972
1973         assert(page_aligned(*address));
1974         assert(page_aligned(size));
1975
1976         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1977                 /*
1978                  * In most cases, the caller rounds the size up to the
1979                  * map's page size.
1980                  * If we get a size that is explicitly not map-aligned here,
1981                  * we'll have to respect the caller's wish and mark the
1982                  * mapping as "not map-aligned" to avoid tripping the
1983                  * map alignment checks later.
1984                  */
1985                 clear_map_aligned = TRUE;
1986         }
1987         if (!anywhere &&
1988             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1989                 /*
1990                  * We've been asked to map at a fixed address and that
1991                  * address is not aligned to the map's specific alignment.
1992                  * The caller should know what it's doing (i.e. most likely
1993                  * mapping some fragmented copy map, transferring memory from
1994                  * a VM map with a different alignment), so clear map_aligned
1995                  * for this new VM map entry and proceed.
1996                  */
1997                 clear_map_aligned = TRUE;
1998         }
1999
2000         /*
2001          * Only zero-fill objects are allowed to be purgable.
2002          * LP64todo - limit purgable objects to 32-bits for now
2003          */
2004         if (purgable &&
2005             (offset != 0 ||
2006              (object != VM_OBJECT_NULL &&
2007               (object->vo_size != size ||
2008                object->purgable == VM_PURGABLE_DENY))
2009              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2010                 return KERN_INVALID_ARGUMENT;
2011
2012         if (!anywhere && overwrite) {
2013                 /*
2014                  * Create a temporary VM map to hold the old mappings in the
2015                  * affected area while we create the new one.
2016                  * This avoids releasing the VM map lock in
2017                  * vm_map_entry_delete() and allows atomicity
2018                  * when we want to replace some mappings with a new one.
2019                  * It also allows us to restore the old VM mappings if the
2020                  * new mapping fails.
2021                  */
2022                 zap_old_map = vm_map_create(PMAP_NULL,
2023                                             *address,
2024                                             *address + size,
2025                                             map->hdr.entries_pageable);
2026                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2027                 vm_map_disable_hole_optimization(zap_old_map);
2028         }
2029
2030 StartAgain: ;
2031
2032         start = *address;
2033
2034         if (anywhere) {
2035                 vm_map_lock(map);
2036                 map_locked = TRUE;
2037
2038                 if (entry_for_jit) {
2039                         if (map->jit_entry_exists) {
2040                                 result = KERN_INVALID_ARGUMENT;
2041                                 goto BailOut;
2042                         }
2043                         random_address = TRUE;
2044                 }
2045
2046                 if (random_address) {
2047                         /*
2048                          * Get a random start address.
2049                          */
2050                         result = vm_map_random_address_for_size(map, address, size);
2051                         if (result != KERN_SUCCESS) {
2052                                 goto BailOut;
2053                         }
2054                         start = *address;
2055                 }
2056
2057
2058                 /*
2059                  *      Calculate the first possible address.
2060                  */
2061
2062                 if (start < effective_min_offset)
2063                         start = effective_min_offset;
2064                 if (start > effective_max_offset)
2065                         RETURN(KERN_NO_SPACE);
2066
2067                 /*
2068                  *      Look for the first possible address;
2069                  *      if there's already something at this
2070                  *      address, we have to start after it.
2071                  */
2072
2073                 if( map->disable_vmentry_reuse == TRUE) {
2074                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2075                 } else {
2076
2077                         if (map->holelistenabled) {
2078                                 hole_entry = (vm_map_entry_t)map->holes_list;
2079
2080                                 if (hole_entry == NULL) {
2081                                         /*
2082                                          * No more space in the map?
2083                                          */
2084                                         result = KERN_NO_SPACE;
2085                                         goto BailOut;
2086                                 } else {
2087
2088                                         boolean_t found_hole = FALSE;
2089
2090                                         do {
2091                                                 if (hole_entry->vme_start >= start) {
2092                                                         start = hole_entry->vme_start;
2093                                                         found_hole = TRUE;
2094                                                         break;
2095                                                 }
2096
2097                                                 if (hole_entry->vme_end > start) {
2098                                                         found_hole = TRUE;
2099                                                         break;
2100                                                 }
2101                                                 hole_entry = hole_entry->vme_next;
2102
2103                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
2104
2105                                         if (found_hole == FALSE) {
2106                                                 result = KERN_NO_SPACE;
2107                                                 goto BailOut;
2108                                         }
2109
2110                                         entry = hole_entry;
2111
2112                                         if (start == 0)
2113                                                 start += PAGE_SIZE_64;
2114                                 }
2115                         } else {
2116                                 assert(first_free_is_valid(map));
2117
2118                                 entry = map->first_free;
2119
2120                                 if (entry == vm_map_to_entry(map)) {
2121                                         entry = NULL;
2122                                 } else {
2123                                        if (entry->vme_next == vm_map_to_entry(map)){
2124                                                /*
2125                                                 * Hole at the end of the map.
2126                                                 */
2127                                                 entry = NULL;
2128                                        } else {
2129                                                 if (start < (entry->vme_next)->vme_start ) {
2130                                                         start = entry->vme_end;
2131                                                         start = vm_map_round_page(start,
2132                                                                                   VM_MAP_PAGE_MASK(map));
2133                                                 } else {
2134                                                         /*
2135                                                          * Need to do a lookup.
2136                                                          */
2137                                                         entry = NULL;
2138                                                 }
2139                                        }
2140                                 }
2141
2142                                 if (entry == NULL) {
2143                                         vm_map_entry_t  tmp_entry;
2144                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2145                                                 assert(!entry_for_jit);
2146                                                 start = tmp_entry->vme_end;
2147                                                 start = vm_map_round_page(start,
2148                                                                           VM_MAP_PAGE_MASK(map));
2149                                         }
2150                                         entry = tmp_entry;
2151                                 }
2152                         }
2153                 }
2154
2155                 /*
2156                  *      In any case, the "entry" always precedes
2157                  *      the proposed new region throughout the
2158                  *      loop:
2159                  */
2160
2161                 while (TRUE) {
2162                         vm_map_entry_t  next;
2163
2164                         /*
2165                          *      Find the end of the proposed new region.
2166                          *      Be sure we didn't go beyond the end, or
2167                          *      wrap around the address.
2168                          */
2169
2170                         end = ((start + mask) & ~mask);
2171                         end = vm_map_round_page(end,
2172                                                 VM_MAP_PAGE_MASK(map));
2173                         if (end < start)
2174                                 RETURN(KERN_NO_SPACE);
2175                         start = end;
2176                         assert(VM_MAP_PAGE_ALIGNED(start,
2177                                                    VM_MAP_PAGE_MASK(map)));
2178                         end += size;
2179
2180                         if ((end > effective_max_offset) || (end < start)) {
2181                                 if (map->wait_for_space) {
2182                                         assert(!keep_map_locked);
2183                                         if (size <= (effective_max_offset -
2184                                                      effective_min_offset)) {
2185                                                 assert_wait((event_t)map,
2186                                                             THREAD_ABORTSAFE);
2187                                                 vm_map_unlock(map);
2188                                                 map_locked = FALSE;
2189                                                 thread_block(THREAD_CONTINUE_NULL);
2190                                                 goto StartAgain;
2191                                         }
2192                                 }
2193                                 RETURN(KERN_NO_SPACE);
2194                         }
2195
2196                         next = entry->vme_next;
2197
2198                         if (map->holelistenabled) {
2199                                 if (entry->vme_end >= end)
2200                                         break;
2201                         } else {
2202                                 /*
2203                                  *      If there are no more entries, we must win.
2204                                  *
2205                                  *      OR
2206                                  *
2207                                  *      If there is another entry, it must be
2208                                  *      after the end of the potential new region.
2209                                  */
2210
2211                                 if (next == vm_map_to_entry(map))
2212                                         break;
2213
2214                                 if (next->vme_start >= end)
2215                                         break;
2216                         }
2217
2218                         /*
2219                          *      Didn't fit -- move to the next entry.
2220                          */
2221
2222                         entry = next;
2223
2224                         if (map->holelistenabled) {
2225                                 if (entry == (vm_map_entry_t) map->holes_list) {
2226                                         /*
2227                                          * Wrapped around
2228                                          */
2229                                         result = KERN_NO_SPACE;
2230                                         goto BailOut;
2231                                 }
2232                                 start = entry->vme_start;
2233                         } else {
2234                                 start = entry->vme_end;
2235                         }
2236
2237                         start = vm_map_round_page(start,
2238                                                   VM_MAP_PAGE_MASK(map));
2239                 }
2240
2241                 if (map->holelistenabled) {
2242                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2243                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2244                         }
2245                 }
2246
2247                 *address = start;
2248                 assert(VM_MAP_PAGE_ALIGNED(*address,
2249                                            VM_MAP_PAGE_MASK(map)));
2250         } else {
2251                 /*
2252                  *      Verify that:
2253                  *              the address doesn't itself violate
2254                  *              the mask requirement.
2255                  */
2256
2257                 vm_map_lock(map);
2258                 map_locked = TRUE;
2259                 if ((start & mask) != 0)
2260                         RETURN(KERN_NO_SPACE);
2261
2262                 /*
2263                  *      ...     the address is within bounds
2264                  */
2265
2266                 end = start + size;
2267
2268                 if ((start < effective_min_offset) ||
2269                     (end > effective_max_offset) ||
2270                     (start >= end)) {
2271                         RETURN(KERN_INVALID_ADDRESS);
2272                 }
2273
2274                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2275                         /*
2276                          * Fixed mapping and "overwrite" flag: attempt to
2277                          * remove all existing mappings in the specified
2278                          * address range, saving them in our "zap_old_map".
2279                          */
2280                         (void) vm_map_delete(map, start, end,
2281                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2282                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2283                                              zap_old_map);
2284                 }
2285
2286                 /*
2287                  *      ...     the starting address isn't allocated
2288                  */
2289
2290                 if (vm_map_lookup_entry(map, start, &entry)) {
2291                         if (! (flags & VM_FLAGS_ALREADY)) {
2292                                 RETURN(KERN_NO_SPACE);
2293                         }
2294                         /*
2295                          * Check if what's already there is what we want.
2296                          */
2297                         tmp_start = start;
2298                         tmp_offset = offset;
2299                         if (entry->vme_start < start) {
2300                                 tmp_start -= start - entry->vme_start;
2301                                 tmp_offset -= start - entry->vme_start;
2302
2303                         }
2304                         for (; entry->vme_start < end;
2305                              entry = entry->vme_next) {
2306                                 /*
2307                                  * Check if the mapping's attributes
2308                                  * match the existing map entry.
2309                                  */
2310                                 if (entry == vm_map_to_entry(map) ||
2311                                     entry->vme_start != tmp_start ||
2312                                     entry->is_sub_map != is_submap ||
2313                                     VME_OFFSET(entry) != tmp_offset ||
2314                                     entry->needs_copy != needs_copy ||
2315                                     entry->protection != cur_protection ||
2316                                     entry->max_protection != max_protection ||
2317                                     entry->inheritance != inheritance ||
2318                                     entry->iokit_acct != iokit_acct ||
2319                                     VME_ALIAS(entry) != alias) {
2320                                         /* not the same mapping ! */
2321                                         RETURN(KERN_NO_SPACE);
2322                                 }
2323                                 /*
2324                                  * Check if the same object is being mapped.
2325                                  */
2326                                 if (is_submap) {
2327                                         if (VME_SUBMAP(entry) !=
2328                                             (vm_map_t) object) {
2329                                                 /* not the same submap */
2330                                                 RETURN(KERN_NO_SPACE);
2331                                         }
2332                                 } else {
2333                                         if (VME_OBJECT(entry) != object) {
2334                                                 /* not the same VM object... */
2335                                                 vm_object_t obj2;
2336
2337                                                 obj2 = VME_OBJECT(entry);
2338                                                 if ((obj2 == VM_OBJECT_NULL ||
2339                                                      obj2->internal) &&
2340                                                     (object == VM_OBJECT_NULL ||
2341                                                      object->internal)) {
2342                                                         /*
2343                                                          * ... but both are
2344                                                          * anonymous memory,
2345                                                          * so equivalent.
2346                                                          */
2347                                                 } else {
2348                                                         RETURN(KERN_NO_SPACE);
2349                                                 }
2350                                         }
2351                                 }
2352
2353                                 tmp_offset += entry->vme_end - entry->vme_start;
2354                                 tmp_start += entry->vme_end - entry->vme_start;
2355                                 if (entry->vme_end >= end) {
2356                                         /* reached the end of our mapping */
2357                                         break;
2358                                 }
2359                         }
2360                         /* it all matches:  let's use what's already there ! */
2361                         RETURN(KERN_MEMORY_PRESENT);
2362                 }
2363
2364                 /*
2365                  *      ...     the next region doesn't overlap the
2366                  *              end point.
2367                  */
2368
2369                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2370                     (entry->vme_next->vme_start < end))
2371                         RETURN(KERN_NO_SPACE);
2372         }
2373
2374         /*
2375          *      At this point,
2376          *              "start" and "end" should define the endpoints of the
2377          *                      available new range, and
2378          *              "entry" should refer to the region before the new
2379          *                      range, and
2380          *
2381          *              the map should be locked.
2382          */
2383
2384         /*
2385          *      See whether we can avoid creating a new entry (and object) by
2386          *      extending one of our neighbors.  [So far, we only attempt to
2387          *      extend from below.]  Note that we can never extend/join
2388          *      purgable objects because they need to remain distinct
2389          *      entities in order to implement their "volatile object"
2390          *      semantics.
2391          */
2392
2393         if (purgable || entry_for_jit) {
2394                 if (object == VM_OBJECT_NULL) {
2395
2396                         object = vm_object_allocate(size);
2397                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2398                         object->true_share = TRUE;
2399                         if (purgable) {
2400                                 task_t owner;
2401                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2402                                 if (map->pmap == kernel_pmap) {
2403                                         /*
2404                                          * Purgeable mappings made in a kernel
2405                                          * map are "owned" by the kernel itself
2406                                          * rather than the current user task
2407                                          * because they're likely to be used by
2408                                          * more than this user task (see
2409                                          * execargs_purgeable_allocate(), for
2410                                          * example).
2411                                          */
2412                                         owner = kernel_task;
2413                                 } else {
2414                                         owner = current_task();
2415                                 }
2416                                 assert(object->vo_purgeable_owner == NULL);
2417                                 assert(object->resident_page_count == 0);
2418                                 assert(object->wired_page_count == 0);
2419                                 vm_object_lock(object);
2420                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2421                                 vm_object_unlock(object);
2422                         }
2423                         offset = (vm_object_offset_t)0;
2424                 }
2425         } else if ((is_submap == FALSE) &&
2426                    (object == VM_OBJECT_NULL) &&
2427                    (entry != vm_map_to_entry(map)) &&
2428                    (entry->vme_end == start) &&
2429                    (!entry->is_shared) &&
2430                    (!entry->is_sub_map) &&
2431                    (!entry->in_transition) &&
2432                    (!entry->needs_wakeup) &&
2433                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2434                    (entry->protection == cur_protection) &&
2435                    (entry->max_protection == max_protection) &&
2436                    (entry->inheritance == inheritance) &&
2437                    ((user_alias == VM_MEMORY_REALLOC) ||
2438                     (VME_ALIAS(entry) == alias)) &&
2439                    (entry->no_cache == no_cache) &&
2440                    (entry->permanent == permanent) &&
2441                    (!entry->superpage_size && !superpage_size) &&
2442                    /*
2443                     * No coalescing if not map-aligned, to avoid propagating
2444                     * that condition any further than needed:
2445                     */
2446                    (!entry->map_aligned || !clear_map_aligned) &&
2447                    (!entry->zero_wired_pages) &&
2448                    (!entry->used_for_jit && !entry_for_jit) &&
2449                    (entry->iokit_acct == iokit_acct) &&
2450                    (!entry->vme_resilient_codesign) &&
2451                    (!entry->vme_resilient_media) &&
2452                    (!entry->vme_atomic) &&
2453
2454                    ((entry->vme_end - entry->vme_start) + size <=
2455                     (user_alias == VM_MEMORY_REALLOC ?
2456                      ANON_CHUNK_SIZE :
2457                      NO_COALESCE_LIMIT)) &&
2458
2459                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2460                 if (vm_object_coalesce(VME_OBJECT(entry),
2461                                        VM_OBJECT_NULL,
2462                                        VME_OFFSET(entry),
2463                                        (vm_object_offset_t) 0,
2464                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2465                                        (vm_map_size_t)(end - entry->vme_end))) {
2466
2467                         /*
2468                          *      Coalesced the two objects - can extend
2469                          *      the previous map entry to include the
2470                          *      new range.
2471                          */
2472                         map->size += (end - entry->vme_end);
2473                         assert(entry->vme_start < end);
2474                         assert(VM_MAP_PAGE_ALIGNED(end,
2475                                                    VM_MAP_PAGE_MASK(map)));
2476                         if (__improbable(vm_debug_events))
2477                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2478                         entry->vme_end = end;
2479                         if (map->holelistenabled) {
2480                                 vm_map_store_update_first_free(map, entry, TRUE);
2481                         } else {
2482                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2483                         }
2484                         new_mapping_established = TRUE;
2485                         RETURN(KERN_SUCCESS);
2486                 }
2487         }
2488
2489         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2490         new_entry = NULL;
2491
2492         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2493                 tmp2_end = tmp2_start + step;
2494                 /*
2495                  *      Create a new entry
2496                  *      LP64todo - for now, we can only allocate 4GB internal objects
2497                  *      because the default pager can't page bigger ones.  Remove this
2498                  *      when it can.
2499                  *
2500                  * XXX FBDP
2501                  * The reserved "page zero" in each process's address space can
2502                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2503                  * therefore different VM map entries serves no purpose and just
2504                  * slows down operations on the VM map, so let's not split the
2505                  * allocation into 4GB chunks if the max protection is NONE.  That
2506                  * memory should never be accessible, so it will never get to the
2507                  * default pager.
2508                  */
2509                 tmp_start = tmp2_start;
2510                 if (object == VM_OBJECT_NULL &&
2511                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2512                     max_protection != VM_PROT_NONE &&
2513                     superpage_size == 0)
2514                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2515                 else
2516                         tmp_end = tmp2_end;
2517                 do {
2518                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2519                                                         object, offset, needs_copy,
2520                                                         FALSE, FALSE,
2521                                                         cur_protection, max_protection,
2522                                                         VM_BEHAVIOR_DEFAULT,
2523                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2524                                                         0, no_cache,
2525                                                         permanent,
2526                                                         superpage_size,
2527                                                         clear_map_aligned,
2528                                                         is_submap);
2529
2530                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2531                         VME_ALIAS_SET(new_entry, alias);
2532
2533                         if (entry_for_jit){
2534                                 if (!(map->jit_entry_exists)){
2535                                         new_entry->used_for_jit = TRUE;
2536                                         map->jit_entry_exists = TRUE;
2537                                 }
2538                         }
2539
2540                         if (resilient_codesign &&
2541                             ! ((cur_protection | max_protection) &
2542                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2543                                 new_entry->vme_resilient_codesign = TRUE;
2544                         }
2545
2546                         if (resilient_media &&
2547                             ! ((cur_protection | max_protection) &
2548                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2549                                 new_entry->vme_resilient_media = TRUE;
2550                         }
2551
2552                         assert(!new_entry->iokit_acct);
2553                         if (!is_submap &&
2554                             object != VM_OBJECT_NULL &&
2555                             object->purgable != VM_PURGABLE_DENY) {
2556                                 assert(new_entry->use_pmap);
2557                                 assert(!new_entry->iokit_acct);
2558                                 /*
2559                                  * Turn off pmap accounting since
2560                                  * purgeable objects have their
2561                                  * own ledgers.
2562                                  */
2563                                 new_entry->use_pmap = FALSE;
2564                         } else if (!is_submap &&
2565                                    iokit_acct &&
2566                                    object != VM_OBJECT_NULL &&
2567                                    object->internal) {
2568                                 /* alternate accounting */
2569                                 assert(!new_entry->iokit_acct);
2570                                 assert(new_entry->use_pmap);
2571                                 new_entry->iokit_acct = TRUE;
2572                                 new_entry->use_pmap = FALSE;
2573                                 DTRACE_VM4(
2574                                         vm_map_iokit_mapped_region,
2575                                         vm_map_t, map,
2576                                         vm_map_offset_t, new_entry->vme_start,
2577                                         vm_map_offset_t, new_entry->vme_end,
2578                                         int, VME_ALIAS(new_entry));
2579                                 vm_map_iokit_mapped_region(
2580                                         map,
2581                                         (new_entry->vme_end -
2582                                          new_entry->vme_start));
2583                         } else if (!is_submap) {
2584                                 assert(!new_entry->iokit_acct);
2585                                 assert(new_entry->use_pmap);
2586                         }
2587
2588                         if (is_submap) {
2589                                 vm_map_t        submap;
2590                                 boolean_t       submap_is_64bit;
2591                                 boolean_t       use_pmap;
2592
2593                                 assert(new_entry->is_sub_map);
2594                                 assert(!new_entry->use_pmap);
2595                                 assert(!new_entry->iokit_acct);
2596                                 submap = (vm_map_t) object;
2597                                 submap_is_64bit = vm_map_is_64bit(submap);
2598                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2599 #ifndef NO_NESTED_PMAP
2600                                 if (use_pmap && submap->pmap == NULL) {
2601                                         ledger_t ledger = map->pmap->ledger;
2602                                         /* we need a sub pmap to nest... */
2603                                         submap->pmap = pmap_create(ledger, 0,
2604                                             submap_is_64bit);
2605                                         if (submap->pmap == NULL) {
2606                                                 /* let's proceed without nesting... */
2607                                         }
2608                                 }
2609                                 if (use_pmap && submap->pmap != NULL) {
2610                                         kr = pmap_nest(map->pmap,
2611                                                        submap->pmap,
2612                                                        tmp_start,
2613                                                        tmp_start,
2614                                                        tmp_end - tmp_start);
2615                                         if (kr != KERN_SUCCESS) {
2616                                                 printf("vm_map_enter: "
2617                                                        "pmap_nest(0x%llx,0x%llx) "
2618                                                        "error 0x%x\n",
2619                                                        (long long)tmp_start,
2620                                                        (long long)tmp_end,
2621                                                        kr);
2622                                         } else {
2623                                                 /* we're now nested ! */
2624                                                 new_entry->use_pmap = TRUE;
2625                                                 pmap_empty = FALSE;
2626                                         }
2627                                 }
2628 #endif /* NO_NESTED_PMAP */
2629                         }
2630                         entry = new_entry;
2631
2632                         if (superpage_size) {
2633                                 vm_page_t pages, m;
2634                                 vm_object_t sp_object;
2635
2636                                 VME_OFFSET_SET(entry, 0);
2637
2638                                 /* allocate one superpage */
2639                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2640                                 if (kr != KERN_SUCCESS) {
2641                                         /* deallocate whole range... */
2642                                         new_mapping_established = TRUE;
2643                                         /* ... but only up to "tmp_end" */
2644                                         size -= end - tmp_end;
2645                                         RETURN(kr);
2646                                 }
2647
2648                                 /* create one vm_object per superpage */
2649                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2650                                 sp_object->phys_contiguous = TRUE;
2651                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2652                                 VME_OBJECT_SET(entry, sp_object);
2653                                 assert(entry->use_pmap);
2654
2655                                 /* enter the base pages into the object */
2656                                 vm_object_lock(sp_object);
2657                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2658                                         m = pages;
2659                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2660                                         pages = NEXT_PAGE(m);
2661                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2662                                         vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2663                                 }
2664                                 vm_object_unlock(sp_object);
2665                         }
2666                 } while (tmp_end != tmp2_end &&
2667                          (tmp_start = tmp_end) &&
2668                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2669                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2670         }
2671
2672         new_mapping_established = TRUE;
2673
2674 BailOut:
2675         assert(map_locked == TRUE);
2676
2677         if (result == KERN_SUCCESS) {
2678                 vm_prot_t pager_prot;
2679                 memory_object_t pager;
2680
2681 #if DEBUG
2682                 if (pmap_empty &&
2683                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2684                         assert(vm_map_pmap_is_empty(map,
2685                                                     *address,
2686                                                     *address+size));
2687                 }
2688 #endif /* DEBUG */
2689
2690                 /*
2691                  * For "named" VM objects, let the pager know that the
2692                  * memory object is being mapped.  Some pagers need to keep
2693                  * track of this, to know when they can reclaim the memory
2694                  * object, for example.
2695                  * VM calls memory_object_map() for each mapping (specifying
2696                  * the protection of each mapping) and calls
2697                  * memory_object_last_unmap() when all the mappings are gone.
2698                  */
2699                 pager_prot = max_protection;
2700                 if (needs_copy) {
2701                         /*
2702                          * Copy-On-Write mapping: won't modify
2703                          * the memory object.
2704                          */
2705                         pager_prot &= ~VM_PROT_WRITE;
2706                 }
2707                 if (!is_submap &&
2708                     object != VM_OBJECT_NULL &&
2709                     object->named &&
2710                     object->pager != MEMORY_OBJECT_NULL) {
2711                         vm_object_lock(object);
2712                         pager = object->pager;
2713                         if (object->named &&
2714                             pager != MEMORY_OBJECT_NULL) {
2715                                 assert(object->pager_ready);
2716                                 vm_object_mapping_wait(object, THREAD_UNINT);
2717                                 vm_object_mapping_begin(object);
2718                                 vm_object_unlock(object);
2719
2720                                 kr = memory_object_map(pager, pager_prot);
2721                                 assert(kr == KERN_SUCCESS);
2722
2723                                 vm_object_lock(object);
2724                                 vm_object_mapping_end(object);
2725                         }
2726                         vm_object_unlock(object);
2727                 }
2728         }
2729
2730         assert(map_locked == TRUE);
2731
2732         if (!keep_map_locked) {
2733                 vm_map_unlock(map);
2734                 map_locked = FALSE;
2735         }
2736
2737         /*
2738          * We can't hold the map lock if we enter this block.
2739          */
2740
2741         if (result == KERN_SUCCESS) {
2742
2743                 /*      Wire down the new entry if the user
2744                  *      requested all new map entries be wired.
2745                  */
2746                 if ((map->wiring_required)||(superpage_size)) {
2747                         assert(!keep_map_locked);
2748                         pmap_empty = FALSE; /* pmap won't be empty */
2749                         kr = vm_map_wire(map, start, end,
2750                                              new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2751                                              TRUE);
2752                         result = kr;
2753                 }
2754
2755         }
2756
2757         if (result != KERN_SUCCESS) {
2758                 if (new_mapping_established) {
2759                         /*
2760                          * We have to get rid of the new mappings since we
2761                          * won't make them available to the user.
2762                          * Try and do that atomically, to minimize the risk
2763                          * that someone else create new mappings that range.
2764                          */
2765                         zap_new_map = vm_map_create(PMAP_NULL,
2766                                                     *address,
2767                                                     *address + size,
2768                                                     map->hdr.entries_pageable);
2769                         vm_map_set_page_shift(zap_new_map,
2770                                               VM_MAP_PAGE_SHIFT(map));
2771                         vm_map_disable_hole_optimization(zap_new_map);
2772
2773                         if (!map_locked) {
2774                                 vm_map_lock(map);
2775                                 map_locked = TRUE;
2776                         }
2777                         (void) vm_map_delete(map, *address, *address+size,
2778                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2779                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2780                                              zap_new_map);
2781                 }
2782                 if (zap_old_map != VM_MAP_NULL &&
2783                     zap_old_map->hdr.nentries != 0) {
2784                         vm_map_entry_t  entry1, entry2;
2785
2786                         /*
2787                          * The new mapping failed.  Attempt to restore
2788                          * the old mappings, saved in the "zap_old_map".
2789                          */
2790                         if (!map_locked) {
2791                                 vm_map_lock(map);
2792                                 map_locked = TRUE;
2793                         }
2794
2795                         /* first check if the coast is still clear */
2796                         start = vm_map_first_entry(zap_old_map)->vme_start;
2797                         end = vm_map_last_entry(zap_old_map)->vme_end;
2798                         if (vm_map_lookup_entry(map, start, &entry1) ||
2799                             vm_map_lookup_entry(map, end, &entry2) ||
2800                             entry1 != entry2) {
2801                                 /*
2802                                  * Part of that range has already been
2803                                  * re-mapped:  we can't restore the old
2804                                  * mappings...
2805                                  */
2806                                 vm_map_enter_restore_failures++;
2807                         } else {
2808                                 /*
2809                                  * Transfer the saved map entries from
2810                                  * "zap_old_map" to the original "map",
2811                                  * inserting them all after "entry1".
2812                                  */
2813                                 for (entry2 = vm_map_first_entry(zap_old_map);
2814                                      entry2 != vm_map_to_entry(zap_old_map);
2815                                      entry2 = vm_map_first_entry(zap_old_map)) {
2816                                         vm_map_size_t entry_size;
2817
2818                                         entry_size = (entry2->vme_end -
2819                                                       entry2->vme_start);
2820                                         vm_map_store_entry_unlink(zap_old_map,
2821                                                             entry2);
2822                                         zap_old_map->size -= entry_size;
2823                                         vm_map_store_entry_link(map, entry1, entry2);
2824                                         map->size += entry_size;
2825                                         entry1 = entry2;
2826                                 }
2827                                 if (map->wiring_required) {
2828                                         /*
2829                                          * XXX TODO: we should rewire the
2830                                          * old pages here...
2831                                          */
2832                                 }
2833                                 vm_map_enter_restore_successes++;
2834                         }
2835                 }
2836         }
2837
2838         /*
2839          * The caller is responsible for releasing the lock if it requested to
2840          * keep the map locked.
2841          */
2842         if (map_locked && !keep_map_locked) {
2843                 vm_map_unlock(map);
2844         }
2845
2846         /*
2847          * Get rid of the "zap_maps" and all the map entries that
2848          * they may still contain.
2849          */
2850         if (zap_old_map != VM_MAP_NULL) {
2851                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2852                 zap_old_map = VM_MAP_NULL;
2853         }
2854         if (zap_new_map != VM_MAP_NULL) {
2855                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2856                 zap_new_map = VM_MAP_NULL;
2857         }
2858
2859         return result;
2860
2861 #undef  RETURN
2862 }
2863
2864
2865 /*
2866  * Counters for the prefault optimization.
2867  */
2868 int64_t vm_prefault_nb_pages = 0;
2869 int64_t vm_prefault_nb_bailout = 0;
2870
2871 static kern_return_t
2872 vm_map_enter_mem_object_helper(
2873         vm_map_t                target_map,
2874         vm_map_offset_t         *address,
2875         vm_map_size_t           initial_size,
2876         vm_map_offset_t         mask,
2877         int                     flags,
2878         ipc_port_t              port,
2879         vm_object_offset_t      offset,
2880         boolean_t               copy,
2881         vm_prot_t               cur_protection,
2882         vm_prot_t               max_protection,
2883         vm_inherit_t            inheritance,
2884         upl_page_list_ptr_t     page_list,
2885         unsigned int            page_list_count)
2886 {
2887         vm_map_address_t        map_addr;
2888         vm_map_size_t           map_size;
2889         vm_object_t             object;
2890         vm_object_size_t        size;
2891         kern_return_t           result;
2892         boolean_t               mask_cur_protection, mask_max_protection;
2893         boolean_t               try_prefault = (page_list_count != 0);
2894         vm_map_offset_t         offset_in_mapping = 0;
2895
2896         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2897         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2898         cur_protection &= ~VM_PROT_IS_MASK;
2899         max_protection &= ~VM_PROT_IS_MASK;
2900
2901         /*
2902          * Check arguments for validity
2903          */
2904         if ((target_map == VM_MAP_NULL) ||
2905             (cur_protection & ~VM_PROT_ALL) ||
2906             (max_protection & ~VM_PROT_ALL) ||
2907             (inheritance > VM_INHERIT_LAST_VALID) ||
2908             (try_prefault && (copy || !page_list)) ||
2909             initial_size == 0) {
2910                 return KERN_INVALID_ARGUMENT;
2911         }
2912
2913         {
2914                 map_addr = vm_map_trunc_page(*address,
2915                                              VM_MAP_PAGE_MASK(target_map));
2916                 map_size = vm_map_round_page(initial_size,
2917                                              VM_MAP_PAGE_MASK(target_map));
2918         }
2919         size = vm_object_round_page(initial_size);
2920
2921         /*
2922          * Find the vm object (if any) corresponding to this port.
2923          */
2924         if (!IP_VALID(port)) {
2925                 object = VM_OBJECT_NULL;
2926                 offset = 0;
2927                 copy = FALSE;
2928         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2929                 vm_named_entry_t        named_entry;
2930
2931                 named_entry = (vm_named_entry_t) port->ip_kobject;
2932
2933                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2934                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2935                         offset += named_entry->data_offset;
2936                 }
2937
2938                 /* a few checks to make sure user is obeying rules */
2939                 if (size == 0) {
2940                         if (offset >= named_entry->size)
2941                                 return KERN_INVALID_RIGHT;
2942                         size = named_entry->size - offset;
2943                 }
2944                 if (mask_max_protection) {
2945                         max_protection &= named_entry->protection;
2946                 }
2947                 if (mask_cur_protection) {
2948                         cur_protection &= named_entry->protection;
2949                 }
2950                 if ((named_entry->protection & max_protection) !=
2951                     max_protection)
2952                         return KERN_INVALID_RIGHT;
2953                 if ((named_entry->protection & cur_protection) !=
2954                     cur_protection)
2955                         return KERN_INVALID_RIGHT;
2956                 if (offset + size < offset) {
2957                         /* overflow */
2958                         return KERN_INVALID_ARGUMENT;
2959                 }
2960                 if (named_entry->size < (offset + initial_size)) {
2961                         return KERN_INVALID_ARGUMENT;
2962                 }
2963
2964                 if (named_entry->is_copy) {
2965                         /* for a vm_map_copy, we can only map it whole */
2966                         if ((size != named_entry->size) &&
2967                             (vm_map_round_page(size,
2968                                                VM_MAP_PAGE_MASK(target_map)) ==
2969                              named_entry->size)) {
2970                                 /* XXX FBDP use the rounded size... */
2971                                 size = vm_map_round_page(
2972                                         size,
2973                                         VM_MAP_PAGE_MASK(target_map));
2974                         }
2975
2976                         if (!(flags & VM_FLAGS_ANYWHERE) &&
2977                             (offset != 0 ||
2978                              size != named_entry->size)) {
2979                                 /*
2980                                  * XXX for a mapping at a "fixed" address,
2981                                  * we can't trim after mapping the whole
2982                                  * memory entry, so reject a request for a
2983                                  * partial mapping.
2984                                  */
2985                                 return KERN_INVALID_ARGUMENT;
2986                         }
2987                 }
2988
2989                 /* the callers parameter offset is defined to be the */
2990                 /* offset from beginning of named entry offset in object */
2991                 offset = offset + named_entry->offset;
2992
2993                 if (! VM_MAP_PAGE_ALIGNED(size,
2994                                           VM_MAP_PAGE_MASK(target_map))) {
2995                         /*
2996                          * Let's not map more than requested;
2997                          * vm_map_enter() will handle this "not map-aligned"
2998                          * case.
2999                          */
3000                         map_size = size;
3001                 }
3002
3003                 named_entry_lock(named_entry);
3004                 if (named_entry->is_sub_map) {
3005                         vm_map_t                submap;
3006
3007                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3008                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3009                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3010                         }
3011
3012                         submap = named_entry->backing.map;
3013                         vm_map_lock(submap);
3014                         vm_map_reference(submap);
3015                         vm_map_unlock(submap);
3016                         named_entry_unlock(named_entry);
3017
3018                         result = vm_map_enter(target_map,
3019                                               &map_addr,
3020                                               map_size,
3021                                               mask,
3022                                               flags | VM_FLAGS_SUBMAP,
3023                                               (vm_object_t) submap,
3024                                               offset,
3025                                               copy,
3026                                               cur_protection,
3027                                               max_protection,
3028                                               inheritance);
3029                         if (result != KERN_SUCCESS) {
3030                                 vm_map_deallocate(submap);
3031                         } else {
3032                                 /*
3033                                  * No need to lock "submap" just to check its
3034                                  * "mapped" flag: that flag is never reset
3035                                  * once it's been set and if we race, we'll
3036                                  * just end up setting it twice, which is OK.
3037                                  */
3038                                 if (submap->mapped_in_other_pmaps == FALSE &&
3039                                     vm_map_pmap(submap) != PMAP_NULL &&
3040                                     vm_map_pmap(submap) !=
3041                                     vm_map_pmap(target_map)) {
3042                                         /*
3043                                          * This submap is being mapped in a map
3044                                          * that uses a different pmap.
3045                                          * Set its "mapped_in_other_pmaps" flag
3046                                          * to indicate that we now need to
3047                                          * remove mappings from all pmaps rather
3048                                          * than just the submap's pmap.
3049                                          */
3050                                         vm_map_lock(submap);
3051                                         submap->mapped_in_other_pmaps = TRUE;
3052                                         vm_map_unlock(submap);
3053                                 }
3054                                 *address = map_addr;
3055                         }
3056                         return result;
3057
3058                 } else if (named_entry->is_pager) {
3059                         unsigned int    access;
3060                         vm_prot_t       protections;
3061                         unsigned int    wimg_mode;
3062
3063                         protections = named_entry->protection & VM_PROT_ALL;
3064                         access = GET_MAP_MEM(named_entry->protection);
3065
3066                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3067                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3068                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3069                         }
3070
3071                         object = vm_object_enter(named_entry->backing.pager,
3072                                                  named_entry->size,
3073                                                  named_entry->internal,
3074                                                  FALSE,
3075                                                  FALSE);
3076                         if (object == VM_OBJECT_NULL) {
3077                                 named_entry_unlock(named_entry);
3078                                 return KERN_INVALID_OBJECT;
3079                         }
3080
3081                         /* JMM - drop reference on pager here */
3082
3083                         /* create an extra ref for the named entry */
3084                         vm_object_lock(object);
3085                         vm_object_reference_locked(object);
3086                         named_entry->backing.object = object;
3087                         named_entry->is_pager = FALSE;
3088                         named_entry_unlock(named_entry);
3089
3090                         wimg_mode = object->wimg_bits;
3091
3092                         if (access == MAP_MEM_IO) {
3093                                 wimg_mode = VM_WIMG_IO;
3094                         } else if (access == MAP_MEM_COPYBACK) {
3095                                 wimg_mode = VM_WIMG_USE_DEFAULT;
3096                         } else if (access == MAP_MEM_INNERWBACK) {
3097                                 wimg_mode = VM_WIMG_INNERWBACK;
3098                         } else if (access == MAP_MEM_WTHRU) {
3099                                 wimg_mode = VM_WIMG_WTHRU;
3100                         } else if (access == MAP_MEM_WCOMB) {
3101                                 wimg_mode = VM_WIMG_WCOMB;
3102                         }
3103
3104                         /* wait for object (if any) to be ready */
3105                         if (!named_entry->internal) {
3106                                 while (!object->pager_ready) {
3107                                         vm_object_wait(
3108                                                 object,
3109                                                 VM_OBJECT_EVENT_PAGER_READY,
3110                                                 THREAD_UNINT);
3111                                         vm_object_lock(object);
3112                                 }
3113                         }
3114
3115                         if (object->wimg_bits != wimg_mode)
3116                                 vm_object_change_wimg_mode(object, wimg_mode);
3117
3118 #if VM_OBJECT_TRACKING_OP_TRUESHARE
3119                         if (!object->true_share &&
3120                             vm_object_tracking_inited) {
3121                                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3122                                 int num = 0;
3123
3124                                 num = OSBacktrace(bt,
3125                                                   VM_OBJECT_TRACKING_BTDEPTH);
3126                                 btlog_add_entry(vm_object_tracking_btlog,
3127                                                 object,
3128                                                 VM_OBJECT_TRACKING_OP_TRUESHARE,
3129                                                 bt,
3130                                                 num);
3131                         }
3132 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3133
3134                         object->true_share = TRUE;
3135
3136                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3137                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3138                         vm_object_unlock(object);
3139
3140                 } else if (named_entry->is_copy) {
3141                         kern_return_t   kr;
3142                         vm_map_copy_t   copy_map;
3143                         vm_map_entry_t  copy_entry;
3144                         vm_map_offset_t copy_addr;
3145
3146                         if (flags & ~(VM_FLAGS_FIXED |
3147                                       VM_FLAGS_ANYWHERE |
3148                                       VM_FLAGS_OVERWRITE |
3149                                       VM_FLAGS_IOKIT_ACCT |
3150                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
3151                                       VM_FLAGS_RETURN_DATA_ADDR |
3152                                       VM_FLAGS_ALIAS_MASK)) {
3153                                 named_entry_unlock(named_entry);
3154                                 return KERN_INVALID_ARGUMENT;
3155                         }
3156
3157                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3158                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3159                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3160                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3161                                         offset_in_mapping &= ~((signed)(0xFFF));
3162                                 offset = vm_object_trunc_page(offset);
3163                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3164                         }
3165
3166                         copy_map = named_entry->backing.copy;
3167                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3168                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3169                                 /* unsupported type; should not happen */
3170                                 printf("vm_map_enter_mem_object: "
3171                                        "memory_entry->backing.copy "
3172                                        "unsupported type 0x%x\n",
3173                                        copy_map->type);
3174                                 named_entry_unlock(named_entry);
3175                                 return KERN_INVALID_ARGUMENT;
3176                         }
3177
3178                         /* reserve a contiguous range */
3179                         kr = vm_map_enter(target_map,
3180                                           &map_addr,
3181                                           /* map whole mem entry, trim later: */
3182                                           named_entry->size,
3183                                           mask,
3184                                           flags & (VM_FLAGS_ANYWHERE |
3185                                                    VM_FLAGS_OVERWRITE |
3186                                                    VM_FLAGS_IOKIT_ACCT |
3187                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
3188                                                    VM_FLAGS_RETURN_DATA_ADDR |
3189                                                    VM_FLAGS_ALIAS_MASK),
3190                                           VM_OBJECT_NULL,
3191                                           0,
3192                                           FALSE, /* copy */
3193                                           cur_protection,
3194                                           max_protection,
3195                                           inheritance);
3196                         if (kr != KERN_SUCCESS) {
3197                                 named_entry_unlock(named_entry);
3198                                 return kr;
3199                         }
3200
3201                         copy_addr = map_addr;
3202
3203                         for (copy_entry = vm_map_copy_first_entry(copy_map);
3204                              copy_entry != vm_map_copy_to_entry(copy_map);
3205                              copy_entry = copy_entry->vme_next) {
3206                                 int                     remap_flags = 0;
3207                                 vm_map_t                copy_submap;
3208                                 vm_object_t             copy_object;
3209                                 vm_map_size_t           copy_size;
3210                                 vm_object_offset_t      copy_offset;
3211                                 int                     copy_vm_alias;
3212
3213                                 copy_offset = VME_OFFSET(copy_entry);
3214                                 copy_size = (copy_entry->vme_end -
3215                                              copy_entry->vme_start);
3216                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3217                                 if (copy_vm_alias == 0) {
3218                                         /*
3219                                          * Caller does not want a specific
3220                                          * alias for this new mapping:  use
3221                                          * the alias of the original mapping.
3222                                          */
3223                                         copy_vm_alias = VME_ALIAS(copy_entry);
3224                                 }
3225
3226                                 /* sanity check */
3227                                 if ((copy_addr + copy_size) >
3228                                     (map_addr +
3229                                      named_entry->size /* XXX full size */ )) {
3230                                         /* over-mapping too much !? */
3231                                         kr = KERN_INVALID_ARGUMENT;
3232                                         /* abort */
3233                                         break;
3234                                 }
3235
3236                                 /* take a reference on the object */
3237                                 if (copy_entry->is_sub_map) {
3238                                         remap_flags |= VM_FLAGS_SUBMAP;
3239                                         copy_submap = VME_SUBMAP(copy_entry);
3240                                         vm_map_lock(copy_submap);
3241                                         vm_map_reference(copy_submap);
3242                                         vm_map_unlock(copy_submap);
3243                                         copy_object = (vm_object_t) copy_submap;
3244                                 } else {
3245                                         copy_object = VME_OBJECT(copy_entry);
3246                                         vm_object_reference(copy_object);
3247                                 }
3248
3249                                 /* over-map the object into destination */
3250                                 remap_flags |= flags;
3251                                 remap_flags |= VM_FLAGS_FIXED;
3252                                 remap_flags |= VM_FLAGS_OVERWRITE;
3253                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
3254                                 remap_flags |= VM_MAKE_TAG(copy_vm_alias);
3255                                 kr = vm_map_enter(target_map,
3256                                                   &copy_addr,
3257                                                   copy_size,
3258                                                   (vm_map_offset_t) 0,
3259                                                   remap_flags,
3260                                                   copy_object,
3261                                                   copy_offset,
3262                                                   copy,
3263                                                   cur_protection,
3264                                                   max_protection,
3265                                                   inheritance);
3266                                 if (kr != KERN_SUCCESS) {
3267                                         if (copy_entry->is_sub_map) {
3268                                                 vm_map_deallocate(copy_submap);
3269                                         } else {
3270                                                 vm_object_deallocate(copy_object);
3271                                         }
3272                                         /* abort */
3273                                         break;
3274                                 }
3275
3276                                 /* next mapping */
3277                                 copy_addr += copy_size;
3278                         }
3279
3280                         if (kr == KERN_SUCCESS) {
3281                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3282                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3283                                         *address = map_addr + offset_in_mapping;
3284                                 } else {
3285                                         *address = map_addr;
3286                                 }
3287
3288                                 if (offset) {
3289                                         /*
3290                                          * Trim in front, from 0 to "offset".
3291                                          */
3292                                         vm_map_remove(target_map,
3293                                                       map_addr,
3294                                                       map_addr + offset,
3295                                                       0);
3296                                         *address += offset;
3297                                 }
3298                                 if (offset + map_size < named_entry->size) {
3299                                         /*
3300                                          * Trim in back, from
3301                                          * "offset + map_size" to
3302                                          * "named_entry->size".
3303                                          */
3304                                         vm_map_remove(target_map,
3305                                                       (map_addr +
3306                                                        offset + map_size),
3307                                                       (map_addr +
3308                                                        named_entry->size),
3309                                                       0);
3310                                 }
3311                         }
3312                         named_entry_unlock(named_entry);
3313
3314                         if (kr != KERN_SUCCESS) {
3315                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
3316                                         /* deallocate the contiguous range */
3317                                         (void) vm_deallocate(target_map,
3318                                                              map_addr,
3319                                                              map_size);
3320                                 }
3321                         }
3322
3323                         return kr;
3324
3325                 } else {
3326                         /* This is the case where we are going to map */
3327                         /* an already mapped object.  If the object is */
3328                         /* not ready it is internal.  An external     */
3329                         /* object cannot be mapped until it is ready  */
3330                         /* we can therefore avoid the ready check     */
3331                         /* in this case.  */
3332                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3333                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3334                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3335                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3336                                         offset_in_mapping &= ~((signed)(0xFFF));
3337                                 offset = vm_object_trunc_page(offset);
3338                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3339                         }
3340
3341                         object = named_entry->backing.object;
3342                         assert(object != VM_OBJECT_NULL);
3343                         named_entry_unlock(named_entry);
3344                         vm_object_reference(object);
3345                 }
3346         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3347                 /*
3348                  * JMM - This is temporary until we unify named entries
3349                  * and raw memory objects.
3350                  *
3351                  * Detected fake ip_kotype for a memory object.  In
3352                  * this case, the port isn't really a port at all, but
3353                  * instead is just a raw memory object.
3354                  */
3355                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3356                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3357                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3358                 }
3359
3360                 object = vm_object_enter((memory_object_t)port,
3361                                          size, FALSE, FALSE, FALSE);
3362                 if (object == VM_OBJECT_NULL)
3363                         return KERN_INVALID_OBJECT;
3364
3365                 /* wait for object (if any) to be ready */
3366                 if (object != VM_OBJECT_NULL) {
3367                         if (object == kernel_object) {
3368                                 printf("Warning: Attempt to map kernel object"
3369                                         " by a non-private kernel entity\n");
3370                                 return KERN_INVALID_OBJECT;
3371                         }
3372                         if (!object->pager_ready) {
3373                                 vm_object_lock(object);
3374
3375                                 while (!object->pager_ready) {
3376                                         vm_object_wait(object,
3377                                                        VM_OBJECT_EVENT_PAGER_READY,
3378                                                        THREAD_UNINT);
3379                                         vm_object_lock(object);
3380                                 }
3381                                 vm_object_unlock(object);
3382                         }
3383                 }
3384         } else {
3385                 return KERN_INVALID_OBJECT;
3386         }
3387
3388         if (object != VM_OBJECT_NULL &&
3389             object->named &&
3390             object->pager != MEMORY_OBJECT_NULL &&
3391             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3392                 memory_object_t pager;
3393                 vm_prot_t       pager_prot;
3394                 kern_return_t   kr;
3395
3396                 /*
3397                  * For "named" VM objects, let the pager know that the
3398                  * memory object is being mapped.  Some pagers need to keep
3399                  * track of this, to know when they can reclaim the memory
3400                  * object, for example.
3401                  * VM calls memory_object_map() for each mapping (specifying
3402                  * the protection of each mapping) and calls
3403                  * memory_object_last_unmap() when all the mappings are gone.
3404                  */
3405                 pager_prot = max_protection;
3406                 if (copy) {
3407                         /*
3408                          * Copy-On-Write mapping: won't modify the
3409                          * memory object.
3410                          */
3411                         pager_prot &= ~VM_PROT_WRITE;
3412                 }
3413                 vm_object_lock(object);
3414                 pager = object->pager;
3415                 if (object->named &&
3416                     pager != MEMORY_OBJECT_NULL &&
3417                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3418                         assert(object->pager_ready);
3419                         vm_object_mapping_wait(object, THREAD_UNINT);
3420                         vm_object_mapping_begin(object);
3421                         vm_object_unlock(object);
3422
3423                         kr = memory_object_map(pager, pager_prot);
3424                         assert(kr == KERN_SUCCESS);
3425
3426                         vm_object_lock(object);
3427                         vm_object_mapping_end(object);
3428                 }
3429                 vm_object_unlock(object);
3430         }
3431
3432         /*
3433          *      Perform the copy if requested
3434          */
3435
3436         if (copy) {
3437                 vm_object_t             new_object;
3438                 vm_object_offset_t      new_offset;
3439
3440                 result = vm_object_copy_strategically(object, offset,
3441                                                       map_size,
3442                                                       &new_object, &new_offset,
3443                                                       &copy);
3444
3445
3446                 if (result == KERN_MEMORY_RESTART_COPY) {
3447                         boolean_t success;
3448                         boolean_t src_needs_copy;
3449
3450                         /*
3451                          * XXX
3452                          * We currently ignore src_needs_copy.
3453                          * This really is the issue of how to make
3454                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3455                          * non-kernel users to use. Solution forthcoming.
3456                          * In the meantime, since we don't allow non-kernel
3457                          * memory managers to specify symmetric copy,
3458                          * we won't run into problems here.
3459                          */
3460                         new_object = object;
3461                         new_offset = offset;
3462                         success = vm_object_copy_quickly(&new_object,
3463                                                          new_offset,
3464                                                          map_size,
3465                                                          &src_needs_copy,
3466                                                          &copy);
3467                         assert(success);
3468                         result = KERN_SUCCESS;
3469                 }
3470                 /*
3471                  *      Throw away the reference to the
3472                  *      original object, as it won't be mapped.
3473                  */
3474
3475                 vm_object_deallocate(object);
3476
3477                 if (result != KERN_SUCCESS) {
3478                         return result;
3479                 }
3480
3481                 object = new_object;
3482                 offset = new_offset;
3483         }
3484
3485         /*
3486          * If users want to try to prefault pages, the mapping and prefault
3487          * needs to be atomic.
3488          */
3489         if (try_prefault)
3490                 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3491
3492         {
3493                 result = vm_map_enter(target_map,
3494                                       &map_addr, map_size,
3495                                       (vm_map_offset_t)mask,
3496                                       flags,
3497                                       object, offset,
3498                                       copy,
3499                                       cur_protection, max_protection,
3500                                       inheritance);
3501         }
3502         if (result != KERN_SUCCESS)
3503                 vm_object_deallocate(object);
3504
3505         /*
3506          * Try to prefault, and do not forget to release the vm map lock.
3507          */
3508         if (result == KERN_SUCCESS && try_prefault) {
3509                 mach_vm_address_t va = map_addr;
3510                 kern_return_t kr = KERN_SUCCESS;
3511                 unsigned int i = 0;
3512                 int pmap_options;
3513
3514                 pmap_options = PMAP_OPTIONS_NOWAIT;
3515                 if (object->internal) {
3516                         pmap_options |= PMAP_OPTIONS_INTERNAL;
3517                 }
3518
3519                 for (i = 0; i < page_list_count; ++i) {
3520                         if (UPL_VALID_PAGE(page_list, i)) {
3521                                 /*
3522                                  * If this function call failed, we should stop
3523                                  * trying to optimize, other calls are likely
3524                                  * going to fail too.
3525                                  *
3526                                  * We are not gonna report an error for such
3527                                  * failure though. That's an optimization, not
3528                                  * something critical.
3529                                  */
3530                                 kr = pmap_enter_options(target_map->pmap,
3531                                                         va, UPL_PHYS_PAGE(page_list, i),
3532                                                         cur_protection, VM_PROT_NONE,
3533                                                         0, TRUE, pmap_options, NULL);
3534                                 if (kr != KERN_SUCCESS) {
3535                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
3536                                         break;
3537                                 }
3538                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
3539                         }
3540
3541                         /* Next virtual address */
3542                         va += PAGE_SIZE;
3543                 }
3544                 vm_map_unlock(target_map);
3545         }
3546
3547         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3548                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3549                 *address = map_addr + offset_in_mapping;
3550         } else {
3551                 *address = map_addr;
3552         }
3553         return result;
3554 }
3555
3556 kern_return_t
3557 vm_map_enter_mem_object(
3558         vm_map_t                target_map,
3559         vm_map_offset_t         *address,
3560         vm_map_size_t           initial_size,
3561         vm_map_offset_t         mask,
3562         int                     flags,
3563         ipc_port_t              port,
3564         vm_object_offset_t      offset,
3565         boolean_t               copy,
3566         vm_prot_t               cur_protection,
3567         vm_prot_t               max_protection,
3568         vm_inherit_t            inheritance)
3569 {
3570         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3571                                               port, offset, copy, cur_protection, max_protection,
3572                                               inheritance, NULL, 0);
3573 }
3574
3575 kern_return_t
3576 vm_map_enter_mem_object_prefault(
3577         vm_map_t                target_map,
3578         vm_map_offset_t         *address,
3579         vm_map_size_t           initial_size,
3580         vm_map_offset_t         mask,
3581         int                     flags,
3582         ipc_port_t              port,
3583         vm_object_offset_t      offset,
3584         vm_prot_t               cur_protection,
3585         vm_prot_t               max_protection,
3586         upl_page_list_ptr_t     page_list,
3587         unsigned int            page_list_count)
3588 {
3589         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3590                                               port, offset, FALSE, cur_protection, max_protection,
3591                                               VM_INHERIT_DEFAULT, page_list, page_list_count);
3592 }
3593
3594
3595 kern_return_t
3596 vm_map_enter_mem_object_control(
3597         vm_map_t                target_map,
3598         vm_map_offset_t         *address,
3599         vm_map_size_t           initial_size,
3600         vm_map_offset_t         mask,
3601         int                     flags,
3602         memory_object_control_t control,
3603         vm_object_offset_t      offset,
3604         boolean_t               copy,
3605         vm_prot_t               cur_protection,
3606         vm_prot_t               max_protection,
3607         vm_inherit_t            inheritance)
3608 {
3609         vm_map_address_t        map_addr;
3610         vm_map_size_t           map_size;
3611         vm_object_t             object;
3612         vm_object_size_t        size;
3613         kern_return_t           result;
3614         memory_object_t         pager;
3615         vm_prot_t               pager_prot;
3616         kern_return_t           kr;
3617
3618         /*
3619          * Check arguments for validity
3620          */
3621         if ((target_map == VM_MAP_NULL) ||
3622             (cur_protection & ~VM_PROT_ALL) ||
3623             (max_protection & ~VM_PROT_ALL) ||
3624             (inheritance > VM_INHERIT_LAST_VALID) ||
3625             initial_size == 0) {
3626                 return KERN_INVALID_ARGUMENT;
3627         }
3628
3629         {
3630                 map_addr = vm_map_trunc_page(*address,
3631                                              VM_MAP_PAGE_MASK(target_map));
3632                 map_size = vm_map_round_page(initial_size,
3633                                              VM_MAP_PAGE_MASK(target_map));
3634         }
3635         size = vm_object_round_page(initial_size);
3636
3637         object = memory_object_control_to_vm_object(control);
3638
3639         if (object == VM_OBJECT_NULL)
3640                 return KERN_INVALID_OBJECT;
3641
3642         if (object == kernel_object) {
3643                 printf("Warning: Attempt to map kernel object"
3644                        " by a non-private kernel entity\n");
3645                 return KERN_INVALID_OBJECT;
3646         }
3647
3648         vm_object_lock(object);
3649         object->ref_count++;
3650         vm_object_res_reference(object);
3651
3652         /*
3653          * For "named" VM objects, let the pager know that the
3654          * memory object is being mapped.  Some pagers need to keep
3655          * track of this, to know when they can reclaim the memory
3656          * object, for example.
3657          * VM calls memory_object_map() for each mapping (specifying
3658          * the protection of each mapping) and calls
3659          * memory_object_last_unmap() when all the mappings are gone.
3660          */
3661         pager_prot = max_protection;
3662         if (copy) {
3663                 pager_prot &= ~VM_PROT_WRITE;
3664         }
3665         pager = object->pager;
3666         if (object->named &&
3667             pager != MEMORY_OBJECT_NULL &&
3668             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3669                 assert(object->pager_ready);
3670                 vm_object_mapping_wait(object, THREAD_UNINT);
3671                 vm_object_mapping_begin(object);
3672                 vm_object_unlock(object);
3673
3674                 kr = memory_object_map(pager, pager_prot);
3675                 assert(kr == KERN_SUCCESS);
3676
3677                 vm_object_lock(object);
3678                 vm_object_mapping_end(object);
3679         }
3680         vm_object_unlock(object);
3681
3682         /*
3683          *      Perform the copy if requested
3684          */
3685
3686         if (copy) {
3687                 vm_object_t             new_object;
3688                 vm_object_offset_t      new_offset;
3689
3690                 result = vm_object_copy_strategically(object, offset, size,
3691                                                       &new_object, &new_offset,
3692                                                       &copy);
3693
3694
3695                 if (result == KERN_MEMORY_RESTART_COPY) {
3696                         boolean_t success;
3697                         boolean_t src_needs_copy;
3698
3699                         /*
3700                          * XXX
3701                          * We currently ignore src_needs_copy.
3702                          * This really is the issue of how to make
3703                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3704                          * non-kernel users to use. Solution forthcoming.
3705                          * In the meantime, since we don't allow non-kernel
3706                          * memory managers to specify symmetric copy,
3707                          * we won't run into problems here.
3708                          */
3709                         new_object = object;
3710                         new_offset = offset;
3711                         success = vm_object_copy_quickly(&new_object,
3712                                                          new_offset, size,
3713                                                          &src_needs_copy,
3714                                                          &copy);
3715                         assert(success);
3716                         result = KERN_SUCCESS;
3717                 }
3718                 /*
3719                  *      Throw away the reference to the
3720                  *      original object, as it won't be mapped.
3721                  */
3722
3723                 vm_object_deallocate(object);
3724
3725                 if (result != KERN_SUCCESS) {
3726                         return result;
3727                 }
3728
3729                 object = new_object;
3730                 offset = new_offset;
3731         }
3732
3733         {
3734                 result = vm_map_enter(target_map,
3735                                       &map_addr, map_size,
3736                                       (vm_map_offset_t)mask,
3737                                       flags,
3738                                       object, offset,
3739                                       copy,
3740                                       cur_protection, max_protection,
3741                                       inheritance);
3742         }
3743         if (result != KERN_SUCCESS)
3744                 vm_object_deallocate(object);
3745         *address = map_addr;
3746
3747         return result;
3748 }
3749
3750
3751 #if     VM_CPM
3752
3753 #ifdef MACH_ASSERT
3754 extern pmap_paddr_t     avail_start, avail_end;
3755 #endif
3756
3757 /*
3758  *      Allocate memory in the specified map, with the caveat that
3759  *      the memory is physically contiguous.  This call may fail
3760  *      if the system can't find sufficient contiguous memory.
3761  *      This call may cause or lead to heart-stopping amounts of
3762  *      paging activity.
3763  *
3764  *      Memory obtained from this call should be freed in the
3765  *      normal way, viz., via vm_deallocate.
3766  */
3767 kern_return_t
3768 vm_map_enter_cpm(
3769         vm_map_t                map,
3770         vm_map_offset_t *addr,
3771         vm_map_size_t           size,
3772         int                     flags)
3773 {
3774         vm_object_t             cpm_obj;
3775         pmap_t                  pmap;
3776         vm_page_t               m, pages;
3777         kern_return_t           kr;
3778         vm_map_offset_t         va, start, end, offset;
3779 #if     MACH_ASSERT
3780         vm_map_offset_t         prev_addr = 0;
3781 #endif  /* MACH_ASSERT */
3782
3783         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3784         vm_tag_t tag;
3785
3786         VM_GET_FLAGS_ALIAS(flags, tag);
3787
3788         if (size == 0) {
3789                 *addr = 0;
3790                 return KERN_SUCCESS;
3791         }
3792         if (anywhere)
3793                 *addr = vm_map_min(map);
3794         else
3795                 *addr = vm_map_trunc_page(*addr,
3796                                           VM_MAP_PAGE_MASK(map));
3797         size = vm_map_round_page(size,
3798                                  VM_MAP_PAGE_MASK(map));
3799
3800         /*
3801          * LP64todo - cpm_allocate should probably allow
3802          * allocations of >4GB, but not with the current
3803          * algorithm, so just cast down the size for now.
3804          */
3805         if (size > VM_MAX_ADDRESS)
3806                 return KERN_RESOURCE_SHORTAGE;
3807         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3808                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3809                 return kr;
3810
3811         cpm_obj = vm_object_allocate((vm_object_size_t)size);
3812         assert(cpm_obj != VM_OBJECT_NULL);
3813         assert(cpm_obj->internal);
3814         assert(cpm_obj->vo_size == (vm_object_size_t)size);
3815         assert(cpm_obj->can_persist == FALSE);
3816         assert(cpm_obj->pager_created == FALSE);
3817         assert(cpm_obj->pageout == FALSE);
3818         assert(cpm_obj->shadow == VM_OBJECT_NULL);
3819
3820         /*
3821          *      Insert pages into object.
3822          */
3823
3824         vm_object_lock(cpm_obj);
3825         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3826                 m = pages;
3827                 pages = NEXT_PAGE(m);
3828                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3829
3830                 assert(!m->gobbled);
3831                 assert(!m->wanted);
3832                 assert(!m->pageout);
3833                 assert(!m->tabled);
3834                 assert(VM_PAGE_WIRED(m));
3835                 /*
3836                  * ENCRYPTED SWAP:
3837                  * "m" is not supposed to be pageable, so it
3838                  * should not be encrypted.  It wouldn't be safe
3839                  * to enter it in a new VM object while encrypted.
3840                  */
3841                 ASSERT_PAGE_DECRYPTED(m);
3842                 assert(m->busy);
3843                 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
3844
3845                 m->busy = FALSE;
3846                 vm_page_insert(m, cpm_obj, offset);
3847         }
3848         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3849         vm_object_unlock(cpm_obj);
3850
3851         /*
3852          *      Hang onto a reference on the object in case a
3853          *      multi-threaded application for some reason decides
3854          *      to deallocate the portion of the address space into
3855          *      which we will insert this object.
3856          *
3857          *      Unfortunately, we must insert the object now before
3858          *      we can talk to the pmap module about which addresses
3859          *      must be wired down.  Hence, the race with a multi-
3860          *      threaded app.
3861          */
3862         vm_object_reference(cpm_obj);
3863
3864         /*
3865          *      Insert object into map.
3866          */
3867
3868         kr = vm_map_enter(
3869                 map,
3870                 addr,
3871                 size,
3872                 (vm_map_offset_t)0,
3873                 flags,
3874                 cpm_obj,
3875                 (vm_object_offset_t)0,
3876                 FALSE,
3877                 VM_PROT_ALL,
3878                 VM_PROT_ALL,
3879                 VM_INHERIT_DEFAULT);
3880
3881         if (kr != KERN_SUCCESS) {
3882                 /*
3883                  *      A CPM object doesn't have can_persist set,
3884                  *      so all we have to do is deallocate it to
3885                  *      free up these pages.
3886                  */
3887                 assert(cpm_obj->pager_created == FALSE);
3888                 assert(cpm_obj->can_persist == FALSE);
3889                 assert(cpm_obj->pageout == FALSE);
3890                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3891                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3892                 vm_object_deallocate(cpm_obj); /* kill creation ref */
3893         }
3894
3895         /*
3896          *      Inform the physical mapping system that the
3897          *      range of addresses may not fault, so that
3898          *      page tables and such can be locked down as well.
3899          */
3900         start = *addr;
3901         end = start + size;
3902         pmap = vm_map_pmap(map);
3903         pmap_pageable(pmap, start, end, FALSE);
3904
3905         /*
3906          *      Enter each page into the pmap, to avoid faults.
3907          *      Note that this loop could be coded more efficiently,
3908          *      if the need arose, rather than looking up each page
3909          *      again.
3910          */
3911         for (offset = 0, va = start; offset < size;
3912              va += PAGE_SIZE, offset += PAGE_SIZE) {
3913                 int type_of_fault;
3914
3915                 vm_object_lock(cpm_obj);
3916                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3917                 assert(m != VM_PAGE_NULL);
3918
3919                 vm_page_zero_fill(m);
3920
3921                 type_of_fault = DBG_ZERO_FILL_FAULT;
3922
3923                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3924                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3925                                &type_of_fault);
3926
3927                 vm_object_unlock(cpm_obj);
3928         }
3929
3930 #if     MACH_ASSERT
3931         /*
3932          *      Verify ordering in address space.
3933          */
3934         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3935                 vm_object_lock(cpm_obj);
3936                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3937                 vm_object_unlock(cpm_obj);
3938                 if (m == VM_PAGE_NULL)
3939                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3940                               cpm_obj, (uint64_t)offset);
3941                 assert(m->tabled);
3942                 assert(!m->busy);
3943                 assert(!m->wanted);
3944                 assert(!m->fictitious);
3945                 assert(!m->private);
3946                 assert(!m->absent);
3947                 assert(!m->error);
3948                 assert(!m->cleaning);
3949                 assert(!m->laundry);
3950                 assert(!m->precious);
3951                 assert(!m->clustered);
3952                 if (offset != 0) {
3953                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
3954                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3955                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
3956                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3957                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3958                                 panic("vm_allocate_cpm:  pages not contig!");
3959                         }
3960                 }
3961                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
3962         }
3963 #endif  /* MACH_ASSERT */
3964
3965         vm_object_deallocate(cpm_obj); /* kill extra ref */
3966
3967         return kr;
3968 }
3969
3970
3971 #else   /* VM_CPM */
3972
3973 /*
3974  *      Interface is defined in all cases, but unless the kernel
3975  *      is built explicitly for this option, the interface does
3976  *      nothing.
3977  */
3978
3979 kern_return_t
3980 vm_map_enter_cpm(
3981         __unused vm_map_t       map,
3982         __unused vm_map_offset_t        *addr,
3983         __unused vm_map_size_t  size,
3984         __unused int            flags)
3985 {
3986         return KERN_FAILURE;
3987 }
3988 #endif /* VM_CPM */
3989
3990 /* Not used without nested pmaps */
3991 #ifndef NO_NESTED_PMAP
3992 /*
3993  * Clip and unnest a portion of a nested submap mapping.
3994  */
3995
3996
3997 static void
3998 vm_map_clip_unnest(
3999         vm_map_t        map,
4000         vm_map_entry_t  entry,
4001         vm_map_offset_t start_unnest,
4002         vm_map_offset_t end_unnest)
4003 {
4004         vm_map_offset_t old_start_unnest = start_unnest;
4005         vm_map_offset_t old_end_unnest = end_unnest;
4006
4007         assert(entry->is_sub_map);
4008         assert(VME_SUBMAP(entry) != NULL);
4009         assert(entry->use_pmap);
4010
4011         /*
4012          * Query the platform for the optimal unnest range.
4013          * DRK: There's some duplication of effort here, since
4014          * callers may have adjusted the range to some extent. This
4015          * routine was introduced to support 1GiB subtree nesting
4016          * for x86 platforms, which can also nest on 2MiB boundaries
4017          * depending on size/alignment.
4018          */
4019         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
4020                 assert(VME_SUBMAP(entry)->is_nested_map);
4021                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4022                 log_unnest_badness(map,
4023                                    old_start_unnest,
4024                                    old_end_unnest,
4025                                    VME_SUBMAP(entry)->is_nested_map,
4026                                    (entry->vme_start +
4027                                     VME_SUBMAP(entry)->lowest_unnestable_start -
4028                                     VME_OFFSET(entry)));
4029         }
4030
4031         if (entry->vme_start > start_unnest ||
4032             entry->vme_end < end_unnest) {
4033                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4034                       "bad nested entry: start=0x%llx end=0x%llx\n",
4035                       (long long)start_unnest, (long long)end_unnest,
4036                       (long long)entry->vme_start, (long long)entry->vme_end);
4037         }
4038
4039         if (start_unnest > entry->vme_start) {
4040                 _vm_map_clip_start(&map->hdr,
4041                                    entry,
4042                                    start_unnest);
4043                 if (map->holelistenabled) {
4044                         vm_map_store_update_first_free(map, NULL, FALSE);
4045                 } else {
4046                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4047                 }
4048         }
4049         if (entry->vme_end > end_unnest) {
4050                 _vm_map_clip_end(&map->hdr,
4051                                  entry,
4052                                  end_unnest);
4053                 if (map->holelistenabled) {
4054                         vm_map_store_update_first_free(map, NULL, FALSE);
4055                 } else {
4056                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4057                 }
4058         }
4059
4060         pmap_unnest(map->pmap,
4061                     entry->vme_start,
4062                     entry->vme_end - entry->vme_start);
4063         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
4064                 /* clean up parent map/maps */
4065                 vm_map_submap_pmap_clean(
4066                         map, entry->vme_start,
4067                         entry->vme_end,
4068                         VME_SUBMAP(entry),
4069                         VME_OFFSET(entry));
4070         }
4071         entry->use_pmap = FALSE;
4072         if ((map->pmap != kernel_pmap) &&
4073             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4074                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
4075         }
4076 }
4077 #endif  /* NO_NESTED_PMAP */
4078
4079 /*
4080  *      vm_map_clip_start:      [ internal use only ]
4081  *
4082  *      Asserts that the given entry begins at or after
4083  *      the specified address; if necessary,
4084  *      it splits the entry into two.
4085  */
4086 void
4087 vm_map_clip_start(
4088         vm_map_t        map,
4089         vm_map_entry_t  entry,
4090         vm_map_offset_t startaddr)
4091 {
4092 #ifndef NO_NESTED_PMAP
4093         if (entry->is_sub_map &&
4094             entry->use_pmap &&
4095             startaddr >= entry->vme_start) {
4096                 vm_map_offset_t start_unnest, end_unnest;
4097
4098                 /*
4099                  * Make sure "startaddr" is no longer in a nested range
4100                  * before we clip.  Unnest only the minimum range the platform
4101                  * can handle.
4102                  * vm_map_clip_unnest may perform additional adjustments to
4103                  * the unnest range.
4104                  */
4105                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4106                 end_unnest = start_unnest + pmap_nesting_size_min;
4107                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4108         }
4109 #endif /* NO_NESTED_PMAP */
4110         if (startaddr > entry->vme_start) {
4111                 if (VME_OBJECT(entry) &&
4112                     !entry->is_sub_map &&
4113                     VME_OBJECT(entry)->phys_contiguous) {
4114                         pmap_remove(map->pmap,
4115                                     (addr64_t)(entry->vme_start),
4116                                     (addr64_t)(entry->vme_end));
4117                 }
4118                 if (entry->vme_atomic) {
4119                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4120                 }
4121                 _vm_map_clip_start(&map->hdr, entry, startaddr);
4122                 if (map->holelistenabled) {
4123                         vm_map_store_update_first_free(map, NULL, FALSE);
4124                 } else {
4125                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4126                 }
4127         }
4128 }
4129
4130
4131 #define vm_map_copy_clip_start(copy, entry, startaddr) \
4132         MACRO_BEGIN \
4133         if ((startaddr) > (entry)->vme_start) \
4134                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4135         MACRO_END
4136
4137 /*
4138  *      This routine is called only when it is known that
4139  *      the entry must be split.
4140  */
4141 static void
4142 _vm_map_clip_start(
4143         struct vm_map_header    *map_header,
4144         vm_map_entry_t          entry,
4145         vm_map_offset_t         start)
4146 {
4147         vm_map_entry_t  new_entry;
4148
4149         /*
4150          *      Split off the front portion --
4151          *      note that we must insert the new
4152          *      entry BEFORE this one, so that
4153          *      this entry has the specified starting
4154          *      address.
4155          */
4156
4157         if (entry->map_aligned) {
4158                 assert(VM_MAP_PAGE_ALIGNED(start,
4159                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4160         }
4161
4162         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4163         vm_map_entry_copy_full(new_entry, entry);
4164
4165         new_entry->vme_end = start;
4166         assert(new_entry->vme_start < new_entry->vme_end);
4167         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
4168         assert(start < entry->vme_end);
4169         entry->vme_start = start;
4170
4171         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
4172
4173         if (entry->is_sub_map)
4174                 vm_map_reference(VME_SUBMAP(new_entry));
4175         else
4176                 vm_object_reference(VME_OBJECT(new_entry));
4177 }
4178
4179
4180 /*
4181  *      vm_map_clip_end:        [ internal use only ]
4182  *
4183  *      Asserts that the given entry ends at or before
4184  *      the specified address; if necessary,
4185  *      it splits the entry into two.
4186  */
4187 void
4188 vm_map_clip_end(
4189         vm_map_t        map,
4190         vm_map_entry_t  entry,
4191         vm_map_offset_t endaddr)
4192 {
4193         if (endaddr > entry->vme_end) {
4194                 /*
4195                  * Within the scope of this clipping, limit "endaddr" to
4196                  * the end of this map entry...
4197                  */
4198                 endaddr = entry->vme_end;
4199         }
4200 #ifndef NO_NESTED_PMAP
4201         if (entry->is_sub_map && entry->use_pmap) {
4202                 vm_map_offset_t start_unnest, end_unnest;
4203
4204                 /*
4205                  * Make sure the range between the start of this entry and
4206                  * the new "endaddr" is no longer nested before we clip.
4207                  * Unnest only the minimum range the platform can handle.
4208                  * vm_map_clip_unnest may perform additional adjustments to
4209                  * the unnest range.
4210                  */
4211                 start_unnest = entry->vme_start;
4212                 end_unnest =
4213                         (endaddr + pmap_nesting_size_min - 1) &
4214                         ~(pmap_nesting_size_min - 1);
4215                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4216         }
4217 #endif /* NO_NESTED_PMAP */
4218         if (endaddr < entry->vme_end) {
4219                 if (VME_OBJECT(entry) &&
4220                     !entry->is_sub_map &&
4221                     VME_OBJECT(entry)->phys_contiguous) {
4222                         pmap_remove(map->pmap,
4223                                     (addr64_t)(entry->vme_start),
4224                                     (addr64_t)(entry->vme_end));
4225                 }
4226                 if (entry->vme_atomic) {
4227                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4228                 }
4229                 _vm_map_clip_end(&map->hdr, entry, endaddr);
4230                 if (map->holelistenabled) {
4231                         vm_map_store_update_first_free(map, NULL, FALSE);
4232                 } else {
4233                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4234                 }
4235         }
4236 }
4237
4238
4239 #define vm_map_copy_clip_end(copy, entry, endaddr) \
4240         MACRO_BEGIN \
4241         if ((endaddr) < (entry)->vme_end) \
4242                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4243         MACRO_END
4244
4245 /*
4246  *      This routine is called only when it is known that
4247  *      the entry must be split.
4248  */
4249 static void
4250 _vm_map_clip_end(
4251         struct vm_map_header    *map_header,
4252         vm_map_entry_t          entry,
4253         vm_map_offset_t         end)
4254 {
4255         vm_map_entry_t  new_entry;
4256
4257         /*
4258          *      Create a new entry and insert it
4259          *      AFTER the specified entry
4260          */
4261
4262         if (entry->map_aligned) {
4263                 assert(VM_MAP_PAGE_ALIGNED(end,
4264                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4265         }
4266
4267         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4268         vm_map_entry_copy_full(new_entry, entry);
4269
4270         assert(entry->vme_start < end);
4271         new_entry->vme_start = entry->vme_end = end;
4272         VME_OFFSET_SET(new_entry,
4273                        VME_OFFSET(new_entry) + (end - entry->vme_start));
4274         assert(new_entry->vme_start < new_entry->vme_end);
4275
4276         _vm_map_store_entry_link(map_header, entry, new_entry);
4277
4278         if (entry->is_sub_map)
4279                 vm_map_reference(VME_SUBMAP(new_entry));
4280         else
4281                 vm_object_reference(VME_OBJECT(new_entry));
4282 }
4283
4284
4285 /*
4286  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
4287  *
4288  *      Asserts that the starting and ending region
4289  *      addresses fall within the valid range of the map.
4290  */
4291 #define VM_MAP_RANGE_CHECK(map, start, end)     \
4292         MACRO_BEGIN                             \
4293         if (start < vm_map_min(map))            \
4294                 start = vm_map_min(map);        \
4295         if (end > vm_map_max(map))              \
4296                 end = vm_map_max(map);          \
4297         if (start > end)                        \
4298                 start = end;                    \
4299         MACRO_END
4300
4301 /*
4302  *      vm_map_range_check:     [ internal use only ]
4303  *
4304  *      Check that the region defined by the specified start and
4305  *      end addresses are wholly contained within a single map
4306  *      entry or set of adjacent map entries of the spacified map,
4307  *      i.e. the specified region contains no unmapped space.
4308  *      If any or all of the region is unmapped, FALSE is returned.
4309  *      Otherwise, TRUE is returned and if the output argument 'entry'
4310  *      is not NULL it points to the map entry containing the start
4311  *      of the region.
4312  *
4313  *      The map is locked for reading on entry and is left locked.
4314  */
4315 static boolean_t
4316 vm_map_range_check(
4317         vm_map_t                map,
4318         vm_map_offset_t         start,
4319         vm_map_offset_t         end,
4320         vm_map_entry_t          *entry)
4321 {
4322         vm_map_entry_t          cur;
4323         vm_map_offset_t         prev;
4324
4325         /*
4326          *      Basic sanity checks first
4327          */
4328         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4329                 return (FALSE);
4330
4331         /*
4332          *      Check first if the region starts within a valid
4333          *      mapping for the map.
4334          */
4335         if (!vm_map_lookup_entry(map, start, &cur))
4336                 return (FALSE);
4337
4338         /*
4339          *      Optimize for the case that the region is contained
4340          *      in a single map entry.
4341          */
4342         if (entry != (vm_map_entry_t *) NULL)
4343                 *entry = cur;
4344         if (end <= cur->vme_end)
4345                 return (TRUE);
4346
4347         /*
4348          *      If the region is not wholly contained within a
4349          *      single entry, walk the entries looking for holes.
4350          */
4351         prev = cur->vme_end;
4352         cur = cur->vme_next;
4353         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4354                 if (end <= cur->vme_end)
4355                         return (TRUE);
4356                 prev = cur->vme_end;
4357                 cur = cur->vme_next;
4358         }
4359         return (FALSE);
4360 }
4361
4362 /*
4363  *      vm_map_submap:          [ kernel use only ]
4364  *
4365  *      Mark the given range as handled by a subordinate map.
4366  *
4367  *      This range must have been created with vm_map_find using
4368  *      the vm_submap_object, and no other operations may have been
4369  *      performed on this range prior to calling vm_map_submap.
4370  *
4371  *      Only a limited number of operations can be performed
4372  *      within this rage after calling vm_map_submap:
4373  *              vm_fault
4374  *      [Don't try vm_map_copyin!]
4375  *
4376  *      To remove a submapping, one must first remove the
4377  *      range from the superior map, and then destroy the
4378  *      submap (if desired).  [Better yet, don't try it.]
4379  */
4380 kern_return_t
4381 vm_map_submap(
4382         vm_map_t        map,
4383         vm_map_offset_t start,
4384         vm_map_offset_t end,
4385         vm_map_t        submap,
4386         vm_map_offset_t offset,
4387 #ifdef NO_NESTED_PMAP
4388         __unused
4389 #endif  /* NO_NESTED_PMAP */
4390         boolean_t       use_pmap)
4391 {
4392         vm_map_entry_t          entry;
4393         kern_return_t           result = KERN_INVALID_ARGUMENT;
4394         vm_object_t             object;
4395
4396         vm_map_lock(map);
4397
4398         if (! vm_map_lookup_entry(map, start, &entry)) {
4399                 entry = entry->vme_next;
4400         }
4401
4402         if (entry == vm_map_to_entry(map) ||
4403             entry->is_sub_map) {
4404                 vm_map_unlock(map);
4405                 return KERN_INVALID_ARGUMENT;
4406         }
4407
4408         vm_map_clip_start(map, entry, start);
4409         vm_map_clip_end(map, entry, end);
4410
4411         if ((entry->vme_start == start) && (entry->vme_end == end) &&
4412             (!entry->is_sub_map) &&
4413             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
4414             (object->resident_page_count == 0) &&
4415             (object->copy == VM_OBJECT_NULL) &&
4416             (object->shadow == VM_OBJECT_NULL) &&
4417             (!object->pager_created)) {
4418                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4419                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
4420                 vm_object_deallocate(object);
4421                 entry->is_sub_map = TRUE;
4422                 entry->use_pmap = FALSE;
4423                 VME_SUBMAP_SET(entry, submap);
4424                 vm_map_reference(submap);
4425                 if (submap->mapped_in_other_pmaps == FALSE &&
4426                     vm_map_pmap(submap) != PMAP_NULL &&
4427                     vm_map_pmap(submap) != vm_map_pmap(map)) {
4428                         /*
4429                          * This submap is being mapped in a map
4430                          * that uses a different pmap.
4431                          * Set its "mapped_in_other_pmaps" flag
4432                          * to indicate that we now need to
4433                          * remove mappings from all pmaps rather
4434                          * than just the submap's pmap.
4435                          */
4436                         submap->mapped_in_other_pmaps = TRUE;
4437                 }
4438
4439 #ifndef NO_NESTED_PMAP
4440                 if (use_pmap) {
4441                         /* nest if platform code will allow */
4442                         if(submap->pmap == NULL) {
4443                                 ledger_t ledger = map->pmap->ledger;
4444                                 submap->pmap = pmap_create(ledger,
4445                                                 (vm_map_size_t) 0, FALSE);
4446                                 if(submap->pmap == PMAP_NULL) {
4447                                         vm_map_unlock(map);
4448                                         return(KERN_NO_SPACE);
4449                                 }
4450                         }
4451                         result = pmap_nest(map->pmap,
4452                                            (VME_SUBMAP(entry))->pmap,
4453                                            (addr64_t)start,
4454                                            (addr64_t)start,
4455                                            (uint64_t)(end - start));
4456                         if(result)
4457                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4458                         entry->use_pmap = TRUE;
4459                 }
4460 #else   /* NO_NESTED_PMAP */
4461                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
4462 #endif  /* NO_NESTED_PMAP */
4463                 result = KERN_SUCCESS;
4464         }
4465         vm_map_unlock(map);
4466
4467         return(result);
4468 }
4469
4470
4471 /*
4472  *      vm_map_protect:
4473  *
4474  *      Sets the protection of the specified address
4475  *      region in the target map.  If "set_max" is
4476  *      specified, the maximum protection is to be set;
4477  *      otherwise, only the current protection is affected.
4478  */
4479 kern_return_t
4480 vm_map_protect(
4481         vm_map_t        map,
4482         vm_map_offset_t start,
4483         vm_map_offset_t end,
4484         vm_prot_t       new_prot,
4485         boolean_t       set_max)
4486 {
4487         vm_map_entry_t                  current;
4488         vm_map_offset_t                 prev;
4489         vm_map_entry_t                  entry;
4490         vm_prot_t                       new_max;
4491
4492         XPR(XPR_VM_MAP,
4493             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4494             map, start, end, new_prot, set_max);
4495
4496         vm_map_lock(map);
4497
4498         /* LP64todo - remove this check when vm_map_commpage64()
4499          * no longer has to stuff in a map_entry for the commpage
4500          * above the map's max_offset.
4501          */
4502         if (start >= map->max_offset) {
4503                 vm_map_unlock(map);
4504                 return(KERN_INVALID_ADDRESS);
4505         }
4506
4507         while(1) {
4508                 /*
4509                  *      Lookup the entry.  If it doesn't start in a valid
4510                  *      entry, return an error.
4511                  */
4512                 if (! vm_map_lookup_entry(map, start, &entry)) {
4513                         vm_map_unlock(map);
4514                         return(KERN_INVALID_ADDRESS);
4515                 }
4516
4517                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4518                         start = SUPERPAGE_ROUND_DOWN(start);
4519                         continue;
4520                 }
4521                 break;
4522         }
4523         if (entry->superpage_size)
4524                 end = SUPERPAGE_ROUND_UP(end);
4525
4526         /*
4527          *      Make a first pass to check for protection and address
4528          *      violations.
4529          */
4530
4531         current = entry;
4532         prev = current->vme_start;
4533         while ((current != vm_map_to_entry(map)) &&
4534                (current->vme_start < end)) {
4535
4536                 /*
4537                  * If there is a hole, return an error.
4538                  */
4539                 if (current->vme_start != prev) {
4540                         vm_map_unlock(map);
4541                         return(KERN_INVALID_ADDRESS);
4542                 }
4543
4544                 new_max = current->max_protection;
4545                 if(new_prot & VM_PROT_COPY) {
4546                         new_max |= VM_PROT_WRITE;
4547                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4548                                 vm_map_unlock(map);
4549                                 return(KERN_PROTECTION_FAILURE);
4550                         }
4551                 } else {
4552                         if ((new_prot & new_max) != new_prot) {
4553                                 vm_map_unlock(map);
4554                                 return(KERN_PROTECTION_FAILURE);
4555                         }
4556                 }
4557
4558
4559                 prev = current->vme_end;
4560                 current = current->vme_next;
4561         }
4562
4563
4564         if (end > prev) {
4565                 vm_map_unlock(map);
4566                 return(KERN_INVALID_ADDRESS);
4567         }
4568
4569         /*
4570          *      Go back and fix up protections.
4571          *      Clip to start here if the range starts within
4572          *      the entry.
4573          */
4574
4575         current = entry;
4576         if (current != vm_map_to_entry(map)) {
4577                 /* clip and unnest if necessary */
4578                 vm_map_clip_start(map, current, start);
4579         }
4580
4581         while ((current != vm_map_to_entry(map)) &&
4582                (current->vme_start < end)) {
4583
4584                 vm_prot_t       old_prot;
4585
4586                 vm_map_clip_end(map, current, end);
4587
4588                 if (current->is_sub_map) {
4589                         /* clipping did unnest if needed */
4590                         assert(!current->use_pmap);
4591                 }
4592
4593                 old_prot = current->protection;
4594
4595                 if(new_prot & VM_PROT_COPY) {
4596                         /* caller is asking specifically to copy the      */
4597                         /* mapped data, this implies that max protection  */
4598                         /* will include write.  Caller must be prepared   */
4599                         /* for loss of shared memory communication in the */
4600                         /* target area after taking this step */
4601
4602                         if (current->is_sub_map == FALSE &&
4603                             VME_OBJECT(current) == VM_OBJECT_NULL) {
4604                                 VME_OBJECT_SET(current,
4605                                                vm_object_allocate(
4606                                                        (vm_map_size_t)
4607                                                        (current->vme_end -
4608                                                         current->vme_start)));
4609                                 VME_OFFSET_SET(current, 0);
4610                                 assert(current->use_pmap);
4611                         }
4612                         assert(current->wired_count == 0);
4613                         current->needs_copy = TRUE;
4614                         current->max_protection |= VM_PROT_WRITE;
4615                 }
4616
4617                 if (set_max)
4618                         current->protection =
4619                                 (current->max_protection =
4620                                  new_prot & ~VM_PROT_COPY) &
4621                                 old_prot;
4622                 else
4623                         current->protection = new_prot & ~VM_PROT_COPY;
4624
4625                 /*
4626                  *      Update physical map if necessary.
4627                  *      If the request is to turn off write protection,
4628                  *      we won't do it for real (in pmap). This is because
4629                  *      it would cause copy-on-write to fail.  We've already
4630                  *      set, the new protection in the map, so if a
4631                  *      write-protect fault occurred, it will be fixed up
4632                  *      properly, COW or not.
4633                  */
4634                 if (current->protection != old_prot) {
4635                         /* Look one level in we support nested pmaps */
4636                         /* from mapped submaps which are direct entries */
4637                         /* in our map */
4638
4639                         vm_prot_t prot;
4640
4641                         prot = current->protection;
4642                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
4643                                 prot &= ~VM_PROT_WRITE;
4644                         } else {
4645                                 assert(!VME_OBJECT(current)->code_signed);
4646                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4647                         }
4648
4649                         if (override_nx(map, VME_ALIAS(current)) && prot)
4650                                 prot |= VM_PROT_EXECUTE;
4651
4652
4653                         if (current->is_sub_map && current->use_pmap) {
4654                                 pmap_protect(VME_SUBMAP(current)->pmap,
4655                                              current->vme_start,
4656                                              current->vme_end,
4657                                              prot);
4658                         } else {
4659                                 pmap_protect(map->pmap,
4660                                              current->vme_start,
4661                                              current->vme_end,
4662                                              prot);
4663                         }
4664                 }
4665                 current = current->vme_next;
4666         }
4667
4668         current = entry;
4669         while ((current != vm_map_to_entry(map)) &&
4670                (current->vme_start <= end)) {
4671                 vm_map_simplify_entry(map, current);
4672                 current = current->vme_next;
4673         }
4674
4675         vm_map_unlock(map);
4676         return(KERN_SUCCESS);
4677 }
4678
4679 /*
4680  *      vm_map_inherit:
4681  *
4682  *      Sets the inheritance of the specified address
4683  *      range in the target map.  Inheritance
4684  *      affects how the map will be shared with
4685  *      child maps at the time of vm_map_fork.
4686  */
4687 kern_return_t
4688 vm_map_inherit(
4689         vm_map_t        map,
4690         vm_map_offset_t start,
4691         vm_map_offset_t end,
4692         vm_inherit_t    new_inheritance)
4693 {
4694         vm_map_entry_t  entry;
4695         vm_map_entry_t  temp_entry;
4696
4697         vm_map_lock(map);
4698
4699         VM_MAP_RANGE_CHECK(map, start, end);
4700
4701         if (vm_map_lookup_entry(map, start, &temp_entry)) {
4702                 entry = temp_entry;
4703         }
4704         else {
4705                 temp_entry = temp_entry->vme_next;
4706                 entry = temp_entry;
4707         }
4708
4709         /* first check entire range for submaps which can't support the */
4710         /* given inheritance. */
4711         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4712                 if(entry->is_sub_map) {
4713                         if(new_inheritance == VM_INHERIT_COPY) {
4714                                 vm_map_unlock(map);
4715                                 return(KERN_INVALID_ARGUMENT);
4716                         }
4717                 }
4718
4719                 entry = entry->vme_next;
4720         }
4721
4722         entry = temp_entry;
4723         if (entry != vm_map_to_entry(map)) {
4724                 /* clip and unnest if necessary */
4725                 vm_map_clip_start(map, entry, start);
4726         }
4727
4728         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4729                 vm_map_clip_end(map, entry, end);
4730                 if (entry->is_sub_map) {
4731                         /* clip did unnest if needed */
4732                         assert(!entry->use_pmap);
4733                 }
4734
4735                 entry->inheritance = new_inheritance;
4736
4737                 entry = entry->vme_next;
4738         }
4739
4740         vm_map_unlock(map);
4741         return(KERN_SUCCESS);
4742 }
4743
4744 /*
4745  * Update the accounting for the amount of wired memory in this map.  If the user has
4746  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
4747  */
4748
4749 static kern_return_t
4750 add_wire_counts(
4751         vm_map_t        map,
4752         vm_map_entry_t  entry,
4753         boolean_t       user_wire)
4754 {
4755         vm_map_size_t   size;
4756
4757         if (user_wire) {
4758                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
4759
4760                 /*
4761                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
4762                  * this map entry.
4763                  */
4764
4765                 if (entry->user_wired_count == 0) {
4766                         size = entry->vme_end - entry->vme_start;
4767
4768                         /*
4769                          * Since this is the first time the user is wiring this map entry, check to see if we're
4770                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4771                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4772                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4773                          * limit, then we fail.
4774                          */
4775
4776                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4777                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4778                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4779                                 return KERN_RESOURCE_SHORTAGE;
4780
4781                         /*
4782                          * The first time the user wires an entry, we also increment the wired_count and add this to
4783                          * the total that has been wired in the map.
4784                          */
4785
4786                         if (entry->wired_count >= MAX_WIRE_COUNT)
4787                                 return KERN_FAILURE;
4788
4789                         entry->wired_count++;
4790                         map->user_wire_size += size;
4791                 }
4792
4793                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4794                         return KERN_FAILURE;
4795
4796                 entry->user_wired_count++;
4797
4798         } else {
4799
4800                 /*
4801                  * The kernel's wiring the memory.  Just bump the count and continue.
4802                  */
4803
4804                 if (entry->wired_count >= MAX_WIRE_COUNT)
4805                         panic("vm_map_wire: too many wirings");
4806
4807                 entry->wired_count++;
4808         }
4809
4810         return KERN_SUCCESS;
4811 }
4812
4813 /*
4814  * Update the memory wiring accounting now that the given map entry is being unwired.
4815  */
4816
4817 static void
4818 subtract_wire_counts(
4819         vm_map_t        map,
4820         vm_map_entry_t  entry,
4821         boolean_t       user_wire)
4822 {
4823
4824         if (user_wire) {
4825
4826                 /*
4827                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4828                  */
4829
4830                 if (entry->user_wired_count == 1) {
4831
4832                         /*
4833                          * We're removing the last user wire reference.  Decrement the wired_count and the total
4834                          * user wired memory for this map.
4835                          */
4836
4837                         assert(entry->wired_count >= 1);
4838                         entry->wired_count--;
4839                         map->user_wire_size -= entry->vme_end - entry->vme_start;
4840                 }
4841
4842                 assert(entry->user_wired_count >= 1);
4843                 entry->user_wired_count--;
4844
4845         } else {
4846
4847                 /*
4848                  * The kernel is unwiring the memory.   Just update the count.
4849                  */
4850
4851                 assert(entry->wired_count >= 1);
4852                 entry->wired_count--;
4853         }
4854 }
4855
4856
4857 /*
4858  *      vm_map_wire:
4859  *
4860  *      Sets the pageability of the specified address range in the
4861  *      target map as wired.  Regions specified as not pageable require
4862  *      locked-down physical memory and physical page maps.  The
4863  *      access_type variable indicates types of accesses that must not
4864  *      generate page faults.  This is checked against protection of
4865  *      memory being locked-down.
4866  *
4867  *      The map must not be locked, but a reference must remain to the
4868  *      map throughout the call.
4869  */
4870 static kern_return_t
4871 vm_map_wire_nested(
4872         vm_map_t                map,
4873         vm_map_offset_t         start,
4874         vm_map_offset_t         end,
4875         vm_prot_t               caller_prot,
4876         boolean_t               user_wire,
4877         pmap_t                  map_pmap,
4878         vm_map_offset_t         pmap_addr,
4879         ppnum_t                 *physpage_p)
4880 {
4881         vm_map_entry_t          entry;
4882         vm_prot_t               access_type;
4883         struct vm_map_entry     *first_entry, tmp_entry;
4884         vm_map_t                real_map;
4885         vm_map_offset_t         s,e;
4886         kern_return_t           rc;
4887         boolean_t               need_wakeup;
4888         boolean_t               main_map = FALSE;
4889         wait_interrupt_t        interruptible_state;
4890         thread_t                cur_thread;
4891         unsigned int            last_timestamp;
4892         vm_map_size_t           size;
4893         boolean_t               wire_and_extract;
4894
4895         access_type = (caller_prot & VM_PROT_ALL);
4896
4897         wire_and_extract = FALSE;
4898         if (physpage_p != NULL) {
4899                 /*
4900                  * The caller wants the physical page number of the
4901                  * wired page.  We return only one physical page number
4902                  * so this works for only one page at a time.
4903                  */
4904                 if ((end - start) != PAGE_SIZE) {
4905                         return KERN_INVALID_ARGUMENT;
4906                 }
4907                 wire_and_extract = TRUE;
4908                 *physpage_p = 0;
4909         }
4910
4911         vm_map_lock(map);
4912         if(map_pmap == NULL)
4913                 main_map = TRUE;
4914         last_timestamp = map->timestamp;
4915
4916         VM_MAP_RANGE_CHECK(map, start, end);
4917         assert(page_aligned(start));
4918         assert(page_aligned(end));
4919         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4920         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4921         if (start == end) {
4922                 /* We wired what the caller asked for, zero pages */
4923                 vm_map_unlock(map);
4924                 return KERN_SUCCESS;
4925         }
4926
4927         need_wakeup = FALSE;
4928         cur_thread = current_thread();
4929
4930         s = start;
4931         rc = KERN_SUCCESS;
4932
4933         if (vm_map_lookup_entry(map, s, &first_entry)) {
4934                 entry = first_entry;
4935                 /*
4936                  * vm_map_clip_start will be done later.
4937                  * We don't want to unnest any nested submaps here !
4938                  */
4939         } else {
4940                 /* Start address is not in map */
4941                 rc = KERN_INVALID_ADDRESS;
4942                 goto done;
4943         }
4944
4945         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4946                 /*
4947                  * At this point, we have wired from "start" to "s".
4948                  * We still need to wire from "s" to "end".
4949                  *
4950                  * "entry" hasn't been clipped, so it could start before "s"
4951                  * and/or end after "end".
4952                  */
4953
4954                 /* "e" is how far we want to wire in this entry */
4955                 e = entry->vme_end;
4956                 if (e > end)
4957                         e = end;
4958
4959                 /*
4960                  * If another thread is wiring/unwiring this entry then
4961                  * block after informing other thread to wake us up.
4962                  */
4963                 if (entry->in_transition) {
4964                         wait_result_t wait_result;
4965
4966                         /*
4967                          * We have not clipped the entry.  Make sure that
4968                          * the start address is in range so that the lookup
4969                          * below will succeed.
4970                          * "s" is the current starting point: we've already
4971                          * wired from "start" to "s" and we still have
4972                          * to wire from "s" to "end".
4973                          */
4974
4975                         entry->needs_wakeup = TRUE;
4976
4977                         /*
4978                          * wake up anybody waiting on entries that we have
4979                          * already wired.
4980                          */
4981                         if (need_wakeup) {
4982                                 vm_map_entry_wakeup(map);
4983                                 need_wakeup = FALSE;
4984                         }
4985                         /*
4986                          * User wiring is interruptible
4987                          */
4988                         wait_result = vm_map_entry_wait(map,
4989                                                         (user_wire) ? THREAD_ABORTSAFE :
4990                                                         THREAD_UNINT);
4991                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
4992                                 /*
4993                                  * undo the wirings we have done so far
4994                                  * We do not clear the needs_wakeup flag,
4995                                  * because we cannot tell if we were the
4996                                  * only one waiting.
4997                                  */
4998                                 rc = KERN_FAILURE;
4999                                 goto done;
5000                         }
5001
5002                         /*
5003                          * Cannot avoid a lookup here. reset timestamp.
5004                          */
5005                         last_timestamp = map->timestamp;
5006
5007                         /*
5008                          * The entry could have been clipped, look it up again.
5009                          * Worse that can happen is, it may not exist anymore.
5010                          */
5011                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
5012                                 /*
5013                                  * User: undo everything upto the previous
5014                                  * entry.  let vm_map_unwire worry about
5015                                  * checking the validity of the range.
5016                                  */
5017                                 rc = KERN_FAILURE;
5018                                 goto done;
5019                         }
5020                         entry = first_entry;
5021                         continue;
5022                 }
5023
5024                 if (entry->is_sub_map) {
5025                         vm_map_offset_t sub_start;
5026                         vm_map_offset_t sub_end;
5027                         vm_map_offset_t local_start;
5028                         vm_map_offset_t local_end;
5029                         pmap_t          pmap;
5030
5031                         if (wire_and_extract) {
5032                                 /*
5033                                  * Wiring would result in copy-on-write
5034                                  * which would not be compatible with
5035                                  * the sharing we have with the original
5036                                  * provider of this memory.
5037                                  */
5038                                 rc = KERN_INVALID_ARGUMENT;
5039                                 goto done;
5040                         }
5041
5042                         vm_map_clip_start(map, entry, s);
5043                         vm_map_clip_end(map, entry, end);
5044
5045                         sub_start = VME_OFFSET(entry);
5046                         sub_end = entry->vme_end;
5047                         sub_end += VME_OFFSET(entry) - entry->vme_start;
5048
5049                         local_end = entry->vme_end;
5050                         if(map_pmap == NULL) {
5051                                 vm_object_t             object;
5052                                 vm_object_offset_t      offset;
5053                                 vm_prot_t               prot;
5054                                 boolean_t               wired;
5055                                 vm_map_entry_t          local_entry;
5056                                 vm_map_version_t         version;
5057                                 vm_map_t                lookup_map;
5058
5059                                 if(entry->use_pmap) {
5060                                         pmap = VME_SUBMAP(entry)->pmap;
5061                                         /* ppc implementation requires that */
5062                                         /* submaps pmap address ranges line */
5063                                         /* up with parent map */
5064 #ifdef notdef
5065                                         pmap_addr = sub_start;
5066 #endif
5067                                         pmap_addr = s;
5068                                 } else {
5069                                         pmap = map->pmap;
5070                                         pmap_addr = s;
5071                                 }
5072
5073                                 if (entry->wired_count) {
5074                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5075                                                 goto done;
5076
5077                                         /*
5078                                          * The map was not unlocked:
5079                                          * no need to goto re-lookup.
5080                                          * Just go directly to next entry.
5081                                          */
5082                                         entry = entry->vme_next;
5083                                         s = entry->vme_start;
5084                                         continue;
5085
5086                                 }
5087
5088                                 /* call vm_map_lookup_locked to */
5089                                 /* cause any needs copy to be   */
5090                                 /* evaluated */
5091                                 local_start = entry->vme_start;
5092                                 lookup_map = map;
5093                                 vm_map_lock_write_to_read(map);
5094                                 if(vm_map_lookup_locked(
5095                                            &lookup_map, local_start,
5096                                            access_type | VM_PROT_COPY,
5097                                            OBJECT_LOCK_EXCLUSIVE,
5098                                            &version, &object,
5099                                            &offset, &prot, &wired,
5100                                            NULL,
5101                                            &real_map)) {
5102
5103                                         vm_map_unlock_read(lookup_map);
5104                                         assert(map_pmap == NULL);
5105                                         vm_map_unwire(map, start,
5106                                                       s, user_wire);
5107                                         return(KERN_FAILURE);
5108                                 }
5109                                 vm_object_unlock(object);
5110                                 if(real_map != lookup_map)
5111                                         vm_map_unlock(real_map);
5112                                 vm_map_unlock_read(lookup_map);
5113                                 vm_map_lock(map);
5114
5115                                 /* we unlocked, so must re-lookup */
5116                                 if (!vm_map_lookup_entry(map,
5117                                                          local_start,
5118                                                          &local_entry)) {
5119                                         rc = KERN_FAILURE;
5120                                         goto done;
5121                                 }
5122
5123                                 /*
5124                                  * entry could have been "simplified",
5125                                  * so re-clip
5126                                  */
5127                                 entry = local_entry;
5128                                 assert(s == local_start);
5129                                 vm_map_clip_start(map, entry, s);
5130                                 vm_map_clip_end(map, entry, end);
5131                                 /* re-compute "e" */
5132                                 e = entry->vme_end;
5133                                 if (e > end)
5134                                         e = end;
5135
5136                                 /* did we have a change of type? */
5137                                 if (!entry->is_sub_map) {
5138                                         last_timestamp = map->timestamp;
5139                                         continue;
5140                                 }
5141                         } else {
5142                                 local_start = entry->vme_start;
5143                                 pmap = map_pmap;
5144                         }
5145
5146                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5147                                 goto done;
5148
5149                         entry->in_transition = TRUE;
5150
5151                         vm_map_unlock(map);
5152                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
5153                                                 sub_start, sub_end,
5154                                                 caller_prot,
5155                                                 user_wire, pmap, pmap_addr,
5156                                                 NULL);
5157                         vm_map_lock(map);
5158
5159                         /*
5160                          * Find the entry again.  It could have been clipped
5161                          * after we unlocked the map.
5162                          */
5163                         if (!vm_map_lookup_entry(map, local_start,
5164                                                  &first_entry))
5165                                 panic("vm_map_wire: re-lookup failed");
5166                         entry = first_entry;
5167
5168                         assert(local_start == s);
5169                         /* re-compute "e" */
5170                         e = entry->vme_end;
5171                         if (e > end)
5172                                 e = end;
5173
5174                         last_timestamp = map->timestamp;
5175                         while ((entry != vm_map_to_entry(map)) &&
5176                                (entry->vme_start < e)) {
5177                                 assert(entry->in_transition);
5178                                 entry->in_transition = FALSE;
5179                                 if (entry->needs_wakeup) {
5180                                         entry->needs_wakeup = FALSE;
5181                                         need_wakeup = TRUE;
5182                                 }
5183                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
5184                                         subtract_wire_counts(map, entry, user_wire);
5185                                 }
5186                                 entry = entry->vme_next;
5187                         }
5188                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5189                                 goto done;
5190                         }
5191
5192                         /* no need to relookup again */
5193                         s = entry->vme_start;
5194                         continue;
5195                 }
5196
5197                 /*
5198                  * If this entry is already wired then increment
5199                  * the appropriate wire reference count.
5200                  */
5201                 if (entry->wired_count) {
5202
5203                         if ((entry->protection & access_type) != access_type) {
5204                                 /* found a protection problem */
5205
5206                                 /*
5207                                  * XXX FBDP
5208                                  * We should always return an error
5209                                  * in this case but since we didn't
5210                                  * enforce it before, let's do
5211                                  * it only for the new "wire_and_extract"
5212                                  * code path for now...
5213                                  */
5214                                 if (wire_and_extract) {
5215                                         rc = KERN_PROTECTION_FAILURE;
5216                                         goto done;
5217                                 }
5218                         }
5219
5220                         /*
5221                          * entry is already wired down, get our reference
5222                          * after clipping to our range.
5223                          */
5224                         vm_map_clip_start(map, entry, s);
5225                         vm_map_clip_end(map, entry, end);
5226
5227                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5228                                 goto done;
5229
5230                         if (wire_and_extract) {
5231                                 vm_object_t             object;
5232                                 vm_object_offset_t      offset;
5233                                 vm_page_t               m;
5234
5235                                 /*
5236                                  * We don't have to "wire" the page again
5237                                  * bit we still have to "extract" its
5238                                  * physical page number, after some sanity
5239                                  * checks.
5240                                  */
5241                                 assert((entry->vme_end - entry->vme_start)
5242                                        == PAGE_SIZE);
5243                                 assert(!entry->needs_copy);
5244                                 assert(!entry->is_sub_map);
5245                                 assert(VME_OBJECT(entry));
5246                                 if (((entry->vme_end - entry->vme_start)
5247                                      != PAGE_SIZE) ||
5248                                     entry->needs_copy ||
5249                                     entry->is_sub_map ||
5250                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
5251                                         rc = KERN_INVALID_ARGUMENT;
5252                                         goto done;
5253                                 }
5254
5255                                 object = VME_OBJECT(entry);
5256                                 offset = VME_OFFSET(entry);
5257                                 /* need exclusive lock to update m->dirty */
5258                                 if (entry->protection & VM_PROT_WRITE) {
5259                                         vm_object_lock(object);
5260                                 } else {
5261                                         vm_object_lock_shared(object);
5262                                 }
5263                                 m = vm_page_lookup(object, offset);
5264                                 assert(m != VM_PAGE_NULL);
5265                                 assert(VM_PAGE_WIRED(m));
5266                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
5267                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
5268                                         if (entry->protection & VM_PROT_WRITE) {
5269                                                 vm_object_lock_assert_exclusive(
5270                                                         object);
5271                                                 m->dirty = TRUE;
5272                                         }
5273                                 } else {
5274                                         /* not already wired !? */
5275                                         *physpage_p = 0;
5276                                 }
5277                                 vm_object_unlock(object);
5278                         }
5279
5280                         /* map was not unlocked: no need to relookup */
5281                         entry = entry->vme_next;
5282                         s = entry->vme_start;
5283                         continue;
5284                 }
5285
5286                 /*
5287                  * Unwired entry or wire request transmitted via submap
5288                  */
5289
5290
5291
5292                 /*
5293                  * Perform actions of vm_map_lookup that need the write
5294                  * lock on the map: create a shadow object for a
5295                  * copy-on-write region, or an object for a zero-fill
5296                  * region.
5297                  */
5298                 size = entry->vme_end - entry->vme_start;
5299                 /*
5300                  * If wiring a copy-on-write page, we need to copy it now
5301                  * even if we're only (currently) requesting read access.
5302                  * This is aggressive, but once it's wired we can't move it.
5303                  */
5304                 if (entry->needs_copy) {
5305                         if (wire_and_extract) {
5306                                 /*
5307                                  * We're supposed to share with the original
5308                                  * provider so should not be "needs_copy"
5309                                  */
5310                                 rc = KERN_INVALID_ARGUMENT;
5311                                 goto done;
5312                         }
5313
5314                         VME_OBJECT_SHADOW(entry, size);
5315                         entry->needs_copy = FALSE;
5316                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5317                         if (wire_and_extract) {
5318                                 /*
5319                                  * We're supposed to share with the original
5320                                  * provider so should already have an object.
5321                                  */
5322                                 rc = KERN_INVALID_ARGUMENT;
5323                                 goto done;
5324                         }
5325                         VME_OBJECT_SET(entry, vm_object_allocate(size));
5326                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
5327                         assert(entry->use_pmap);
5328                 }
5329
5330                 vm_map_clip_start(map, entry, s);
5331                 vm_map_clip_end(map, entry, end);
5332
5333                 /* re-compute "e" */
5334                 e = entry->vme_end;
5335                 if (e > end)
5336                         e = end;
5337
5338                 /*
5339                  * Check for holes and protection mismatch.
5340                  * Holes: Next entry should be contiguous unless this
5341                  *        is the end of the region.
5342                  * Protection: Access requested must be allowed, unless
5343                  *      wiring is by protection class
5344                  */
5345                 if ((entry->vme_end < end) &&
5346                     ((entry->vme_next == vm_map_to_entry(map)) ||
5347                      (entry->vme_next->vme_start > entry->vme_end))) {
5348                         /* found a hole */
5349                         rc = KERN_INVALID_ADDRESS;
5350                         goto done;
5351                 }
5352                 if ((entry->protection & access_type) != access_type) {
5353                         /* found a protection problem */
5354                         rc = KERN_PROTECTION_FAILURE;
5355                         goto done;
5356                 }
5357
5358                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5359
5360                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5361                         goto done;
5362
5363                 entry->in_transition = TRUE;
5364
5365                 /*
5366                  * This entry might get split once we unlock the map.
5367                  * In vm_fault_wire(), we need the current range as
5368                  * defined by this entry.  In order for this to work
5369                  * along with a simultaneous clip operation, we make a
5370                  * temporary copy of this entry and use that for the
5371                  * wiring.  Note that the underlying objects do not
5372                  * change during a clip.
5373                  */
5374                 tmp_entry = *entry;
5375
5376                 /*
5377                  * The in_transition state guarentees that the entry
5378                  * (or entries for this range, if split occured) will be
5379                  * there when the map lock is acquired for the second time.
5380                  */
5381                 vm_map_unlock(map);
5382
5383                 if (!user_wire && cur_thread != THREAD_NULL)
5384                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
5385                 else
5386                         interruptible_state = THREAD_UNINT;
5387
5388                 if(map_pmap)
5389                         rc = vm_fault_wire(map,
5390                                            &tmp_entry, caller_prot, map_pmap, pmap_addr,
5391                                            physpage_p);
5392                 else
5393                         rc = vm_fault_wire(map,
5394                                            &tmp_entry, caller_prot, map->pmap,
5395                                            tmp_entry.vme_start,
5396                                            physpage_p);
5397
5398                 if (!user_wire && cur_thread != THREAD_NULL)
5399                         thread_interrupt_level(interruptible_state);
5400
5401                 vm_map_lock(map);
5402
5403                 if (last_timestamp+1 != map->timestamp) {
5404                         /*
5405                          * Find the entry again.  It could have been clipped
5406                          * after we unlocked the map.
5407                          */
5408                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5409                                                  &first_entry))
5410                                 panic("vm_map_wire: re-lookup failed");
5411
5412                         entry = first_entry;
5413                 }
5414
5415                 last_timestamp = map->timestamp;
5416
5417                 while ((entry != vm_map_to_entry(map)) &&
5418                        (entry->vme_start < tmp_entry.vme_end)) {
5419                         assert(entry->in_transition);
5420                         entry->in_transition = FALSE;
5421                         if (entry->needs_wakeup) {
5422                                 entry->needs_wakeup = FALSE;
5423                                 need_wakeup = TRUE;
5424                         }
5425                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5426                                 subtract_wire_counts(map, entry, user_wire);
5427                         }
5428                         entry = entry->vme_next;
5429                 }
5430
5431                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
5432                         goto done;
5433                 }
5434
5435                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
5436                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
5437                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
5438                         /* found a "new" hole */
5439                         s = tmp_entry.vme_end;
5440                         rc = KERN_INVALID_ADDRESS;
5441                         goto done;
5442                 }
5443
5444                 s = entry->vme_start;
5445
5446         } /* end while loop through map entries */
5447
5448 done:
5449         if (rc == KERN_SUCCESS) {
5450                 /* repair any damage we may have made to the VM map */
5451                 vm_map_simplify_range(map, start, end);
5452         }
5453
5454         vm_map_unlock(map);
5455
5456         /*
5457          * wake up anybody waiting on entries we wired.
5458          */
5459         if (need_wakeup)
5460                 vm_map_entry_wakeup(map);
5461
5462         if (rc != KERN_SUCCESS) {
5463                 /* undo what has been wired so far */
5464                 vm_map_unwire_nested(map, start, s, user_wire,
5465                                      map_pmap, pmap_addr);
5466                 if (physpage_p) {
5467                         *physpage_p = 0;
5468                 }
5469         }
5470
5471         return rc;
5472
5473 }
5474
5475 kern_return_t
5476 vm_map_wire_external(
5477         vm_map_t                map,
5478         vm_map_offset_t         start,
5479         vm_map_offset_t         end,
5480         vm_prot_t               caller_prot,
5481         boolean_t               user_wire)
5482 {
5483         kern_return_t   kret;
5484
5485         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5486         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5487         kret = vm_map_wire_nested(map, start, end, caller_prot,
5488                                   user_wire, (pmap_t)NULL, 0, NULL);
5489         return kret;
5490 }
5491
5492 kern_return_t
5493 vm_map_wire(
5494         vm_map_t                map,
5495         vm_map_offset_t         start,
5496         vm_map_offset_t         end,
5497         vm_prot_t               caller_prot,
5498         boolean_t               user_wire)
5499 {
5500         kern_return_t   kret;
5501
5502         kret = vm_map_wire_nested(map, start, end, caller_prot,
5503                                   user_wire, (pmap_t)NULL, 0, NULL);
5504         return kret;
5505 }
5506
5507 kern_return_t
5508 vm_map_wire_and_extract_external(
5509         vm_map_t        map,
5510         vm_map_offset_t start,
5511         vm_prot_t       caller_prot,
5512         boolean_t       user_wire,
5513         ppnum_t         *physpage_p)
5514 {
5515         kern_return_t   kret;
5516
5517         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5518         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5519         kret = vm_map_wire_nested(map,
5520                                   start,
5521                                   start+VM_MAP_PAGE_SIZE(map),
5522                                   caller_prot,
5523                                   user_wire,
5524                                   (pmap_t)NULL,
5525                                   0,
5526                                   physpage_p);
5527         if (kret != KERN_SUCCESS &&
5528             physpage_p != NULL) {
5529                 *physpage_p = 0;
5530         }
5531         return kret;
5532 }
5533
5534 kern_return_t
5535 vm_map_wire_and_extract(
5536         vm_map_t        map,
5537         vm_map_offset_t start,
5538         vm_prot_t       caller_prot,
5539         boolean_t       user_wire,
5540         ppnum_t         *physpage_p)
5541 {
5542         kern_return_t   kret;
5543
5544         kret = vm_map_wire_nested(map,
5545                                   start,
5546                                   start+VM_MAP_PAGE_SIZE(map),
5547                                   caller_prot,
5548                                   user_wire,
5549                                   (pmap_t)NULL,
5550                                   0,
5551                                   physpage_p);
5552         if (kret != KERN_SUCCESS &&
5553             physpage_p != NULL) {
5554                 *physpage_p = 0;
5555         }
5556         return kret;
5557 }
5558
5559 /*
5560  *      vm_map_unwire:
5561  *
5562  *      Sets the pageability of the specified address range in the target
5563  *      as pageable.  Regions specified must have been wired previously.
5564  *
5565  *      The map must not be locked, but a reference must remain to the map
5566  *      throughout the call.
5567  *
5568  *      Kernel will panic on failures.  User unwire ignores holes and
5569  *      unwired and intransition entries to avoid losing memory by leaving
5570  *      it unwired.
5571  */
5572 static kern_return_t
5573 vm_map_unwire_nested(
5574         vm_map_t                map,
5575         vm_map_offset_t         start,
5576         vm_map_offset_t         end,
5577         boolean_t               user_wire,
5578         pmap_t                  map_pmap,
5579         vm_map_offset_t         pmap_addr)
5580 {
5581         vm_map_entry_t          entry;
5582         struct vm_map_entry     *first_entry, tmp_entry;
5583         boolean_t               need_wakeup;
5584         boolean_t               main_map = FALSE;
5585         unsigned int            last_timestamp;
5586
5587         vm_map_lock(map);
5588         if(map_pmap == NULL)
5589                 main_map = TRUE;
5590         last_timestamp = map->timestamp;
5591
5592         VM_MAP_RANGE_CHECK(map, start, end);
5593         assert(page_aligned(start));
5594         assert(page_aligned(end));
5595         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5596         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5597
5598         if (start == end) {
5599                 /* We unwired what the caller asked for: zero pages */
5600                 vm_map_unlock(map);
5601                 return KERN_SUCCESS;
5602         }
5603
5604         if (vm_map_lookup_entry(map, start, &first_entry)) {
5605                 entry = first_entry;
5606                 /*
5607                  * vm_map_clip_start will be done later.
5608                  * We don't want to unnest any nested sub maps here !
5609                  */
5610         }
5611         else {
5612                 if (!user_wire) {
5613                         panic("vm_map_unwire: start not found");
5614                 }
5615                 /*      Start address is not in map. */
5616                 vm_map_unlock(map);
5617                 return(KERN_INVALID_ADDRESS);
5618         }
5619
5620         if (entry->superpage_size) {
5621                 /* superpages are always wired */
5622                 vm_map_unlock(map);
5623                 return KERN_INVALID_ADDRESS;
5624         }
5625
5626         need_wakeup = FALSE;
5627         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5628                 if (entry->in_transition) {
5629                         /*
5630                          * 1)
5631                          * Another thread is wiring down this entry. Note
5632                          * that if it is not for the other thread we would
5633                          * be unwiring an unwired entry.  This is not
5634                          * permitted.  If we wait, we will be unwiring memory
5635                          * we did not wire.
5636                          *
5637                          * 2)
5638                          * Another thread is unwiring this entry.  We did not
5639                          * have a reference to it, because if we did, this
5640                          * entry will not be getting unwired now.
5641                          */
5642                         if (!user_wire) {
5643                                 /*
5644                                  * XXX FBDP
5645                                  * This could happen:  there could be some
5646                                  * overlapping vslock/vsunlock operations
5647                                  * going on.
5648                                  * We should probably just wait and retry,
5649                                  * but then we have to be careful that this
5650                                  * entry could get "simplified" after
5651                                  * "in_transition" gets unset and before
5652                                  * we re-lookup the entry, so we would
5653                                  * have to re-clip the entry to avoid
5654                                  * re-unwiring what we have already unwired...
5655                                  * See vm_map_wire_nested().
5656                                  *
5657                                  * Or we could just ignore "in_transition"
5658                                  * here and proceed to decement the wired
5659                                  * count(s) on this entry.  That should be fine
5660                                  * as long as "wired_count" doesn't drop all
5661                                  * the way to 0 (and we should panic if THAT
5662                                  * happens).
5663                                  */
5664                                 panic("vm_map_unwire: in_transition entry");
5665                         }
5666
5667                         entry = entry->vme_next;
5668                         continue;
5669                 }
5670
5671                 if (entry->is_sub_map) {
5672                         vm_map_offset_t sub_start;
5673                         vm_map_offset_t sub_end;
5674                         vm_map_offset_t local_end;
5675                         pmap_t          pmap;
5676
5677                         vm_map_clip_start(map, entry, start);
5678                         vm_map_clip_end(map, entry, end);
5679
5680                         sub_start = VME_OFFSET(entry);
5681                         sub_end = entry->vme_end - entry->vme_start;
5682                         sub_end += VME_OFFSET(entry);
5683                         local_end = entry->vme_end;
5684                         if(map_pmap == NULL) {
5685                                 if(entry->use_pmap) {
5686                                         pmap = VME_SUBMAP(entry)->pmap;
5687                                         pmap_addr = sub_start;
5688                                 } else {
5689                                         pmap = map->pmap;
5690                                         pmap_addr = start;
5691                                 }
5692                                 if (entry->wired_count == 0 ||
5693                                     (user_wire && entry->user_wired_count == 0)) {
5694                                         if (!user_wire)
5695                                                 panic("vm_map_unwire: entry is unwired");
5696                                         entry = entry->vme_next;
5697                                         continue;
5698                                 }
5699
5700                                 /*
5701                                  * Check for holes
5702                                  * Holes: Next entry should be contiguous unless
5703                                  * this is the end of the region.
5704                                  */
5705                                 if (((entry->vme_end < end) &&
5706                                      ((entry->vme_next == vm_map_to_entry(map)) ||
5707                                       (entry->vme_next->vme_start
5708                                        > entry->vme_end)))) {
5709                                         if (!user_wire)
5710                                                 panic("vm_map_unwire: non-contiguous region");
5711 /*
5712                                         entry = entry->vme_next;
5713                                         continue;
5714 */
5715                                 }
5716
5717                                 subtract_wire_counts(map, entry, user_wire);
5718
5719                                 if (entry->wired_count != 0) {
5720                                         entry = entry->vme_next;
5721                                         continue;
5722                                 }
5723
5724                                 entry->in_transition = TRUE;
5725                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
5726
5727                                 /*
5728                                  * We can unlock the map now. The in_transition state
5729                                  * guarantees existance of the entry.
5730                                  */
5731                                 vm_map_unlock(map);
5732                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5733                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
5734                                 vm_map_lock(map);
5735
5736                                 if (last_timestamp+1 != map->timestamp) {
5737                                         /*
5738                                          * Find the entry again.  It could have been
5739                                          * clipped or deleted after we unlocked the map.
5740                                          */
5741                                         if (!vm_map_lookup_entry(map,
5742                                                                  tmp_entry.vme_start,
5743                                                                  &first_entry)) {
5744                                                 if (!user_wire)
5745                                                         panic("vm_map_unwire: re-lookup failed");
5746                                                 entry = first_entry->vme_next;
5747                                         } else
5748                                                 entry = first_entry;
5749                                 }
5750                                 last_timestamp = map->timestamp;
5751
5752                                 /*
5753                                  * clear transition bit for all constituent entries
5754                                  * that were in the original entry (saved in
5755                                  * tmp_entry).  Also check for waiters.
5756                                  */
5757                                 while ((entry != vm_map_to_entry(map)) &&
5758                                        (entry->vme_start < tmp_entry.vme_end)) {
5759                                         assert(entry->in_transition);
5760                                         entry->in_transition = FALSE;
5761                                         if (entry->needs_wakeup) {
5762                                                 entry->needs_wakeup = FALSE;
5763                                                 need_wakeup = TRUE;
5764                                         }
5765                                         entry = entry->vme_next;
5766                                 }
5767                                 continue;
5768                         } else {
5769                                 vm_map_unlock(map);
5770                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5771                                                      sub_start, sub_end, user_wire, map_pmap,
5772                                                      pmap_addr);
5773                                 vm_map_lock(map);
5774
5775                                 if (last_timestamp+1 != map->timestamp) {
5776                                         /*
5777                                          * Find the entry again.  It could have been
5778                                          * clipped or deleted after we unlocked the map.
5779                                          */
5780                                         if (!vm_map_lookup_entry(map,
5781                                                                  tmp_entry.vme_start,
5782                                                                  &first_entry)) {
5783                                                 if (!user_wire)
5784                                                         panic("vm_map_unwire: re-lookup failed");
5785                                                 entry = first_entry->vme_next;
5786                                         } else
5787                                                 entry = first_entry;
5788                                 }
5789                                 last_timestamp = map->timestamp;
5790                         }
5791                 }
5792
5793
5794                 if ((entry->wired_count == 0) ||
5795                     (user_wire && entry->user_wired_count == 0)) {
5796                         if (!user_wire)
5797                                 panic("vm_map_unwire: entry is unwired");
5798
5799                         entry = entry->vme_next;
5800                         continue;
5801                 }
5802
5803                 assert(entry->wired_count > 0 &&
5804                        (!user_wire || entry->user_wired_count > 0));
5805
5806                 vm_map_clip_start(map, entry, start);
5807                 vm_map_clip_end(map, entry, end);
5808
5809                 /*
5810                  * Check for holes
5811                  * Holes: Next entry should be contiguous unless
5812                  *        this is the end of the region.
5813                  */
5814                 if (((entry->vme_end < end) &&
5815                      ((entry->vme_next == vm_map_to_entry(map)) ||
5816                       (entry->vme_next->vme_start > entry->vme_end)))) {
5817
5818                         if (!user_wire)
5819                                 panic("vm_map_unwire: non-contiguous region");
5820                         entry = entry->vme_next;
5821                         continue;
5822                 }
5823
5824                 subtract_wire_counts(map, entry, user_wire);
5825
5826                 if (entry->wired_count != 0) {
5827                         entry = entry->vme_next;
5828                         continue;
5829                 }
5830
5831                 if(entry->zero_wired_pages) {
5832                         entry->zero_wired_pages = FALSE;
5833                 }
5834
5835                 entry->in_transition = TRUE;
5836                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
5837
5838                 /*
5839                  * We can unlock the map now. The in_transition state
5840                  * guarantees existance of the entry.
5841                  */
5842                 vm_map_unlock(map);
5843                 if(map_pmap) {
5844                         vm_fault_unwire(map,
5845                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
5846                 } else {
5847                         vm_fault_unwire(map,
5848                                         &tmp_entry, FALSE, map->pmap,
5849                                         tmp_entry.vme_start);
5850                 }
5851                 vm_map_lock(map);
5852
5853                 if (last_timestamp+1 != map->timestamp) {
5854                         /*
5855                          * Find the entry again.  It could have been clipped
5856                          * or deleted after we unlocked the map.
5857                          */
5858                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5859                                                  &first_entry)) {
5860                                 if (!user_wire)
5861                                         panic("vm_map_unwire: re-lookup failed");
5862                                 entry = first_entry->vme_next;
5863                         } else
5864                                 entry = first_entry;
5865                 }
5866                 last_timestamp = map->timestamp;
5867
5868                 /*
5869                  * clear transition bit for all constituent entries that
5870                  * were in the original entry (saved in tmp_entry).  Also
5871                  * check for waiters.
5872                  */
5873                 while ((entry != vm_map_to_entry(map)) &&
5874                        (entry->vme_start < tmp_entry.vme_end)) {
5875                         assert(entry->in_transition);
5876                         entry->in_transition = FALSE;
5877                         if (entry->needs_wakeup) {
5878                                 entry->needs_wakeup = FALSE;
5879                                 need_wakeup = TRUE;
5880                         }
5881                         entry = entry->vme_next;
5882                 }
5883         }
5884
5885         /*
5886          * We might have fragmented the address space when we wired this
5887          * range of addresses.  Attempt to re-coalesce these VM map entries
5888          * with their neighbors now that they're no longer wired.
5889          * Under some circumstances, address space fragmentation can
5890          * prevent VM object shadow chain collapsing, which can cause
5891          * swap space leaks.
5892          */
5893         vm_map_simplify_range(map, start, end);
5894
5895         vm_map_unlock(map);
5896         /*
5897          * wake up anybody waiting on entries that we have unwired.
5898          */
5899         if (need_wakeup)
5900                 vm_map_entry_wakeup(map);
5901         return(KERN_SUCCESS);
5902
5903 }
5904
5905 kern_return_t
5906 vm_map_unwire(
5907         vm_map_t                map,
5908         vm_map_offset_t         start,
5909         vm_map_offset_t         end,
5910         boolean_t               user_wire)
5911 {
5912         return vm_map_unwire_nested(map, start, end,
5913                                     user_wire, (pmap_t)NULL, 0);
5914 }
5915
5916
5917 /*
5918  *      vm_map_entry_delete:    [ internal use only ]
5919  *
5920  *      Deallocate the given entry from the target map.
5921  */
5922 static void
5923 vm_map_entry_delete(
5924         vm_map_t        map,
5925         vm_map_entry_t  entry)
5926 {
5927         vm_map_offset_t s, e;
5928         vm_object_t     object;
5929         vm_map_t        submap;
5930
5931         s = entry->vme_start;
5932         e = entry->vme_end;
5933         assert(page_aligned(s));
5934         assert(page_aligned(e));
5935         if (entry->map_aligned == TRUE) {
5936                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5937                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5938         }
5939         assert(entry->wired_count == 0);
5940         assert(entry->user_wired_count == 0);
5941         assert(!entry->permanent);
5942
5943         if (entry->is_sub_map) {
5944                 object = NULL;
5945                 submap = VME_SUBMAP(entry);
5946         } else {
5947                 submap = NULL;
5948                 object = VME_OBJECT(entry);
5949         }
5950
5951         vm_map_store_entry_unlink(map, entry);
5952         map->size -= e - s;
5953
5954         vm_map_entry_dispose(map, entry);
5955
5956         vm_map_unlock(map);
5957         /*
5958          *      Deallocate the object only after removing all
5959          *      pmap entries pointing to its pages.
5960          */
5961         if (submap)
5962                 vm_map_deallocate(submap);
5963         else
5964                 vm_object_deallocate(object);
5965
5966 }
5967
5968 void
5969 vm_map_submap_pmap_clean(
5970         vm_map_t        map,
5971         vm_map_offset_t start,
5972         vm_map_offset_t end,
5973         vm_map_t        sub_map,
5974         vm_map_offset_t offset)
5975 {
5976         vm_map_offset_t submap_start;
5977         vm_map_offset_t submap_end;
5978         vm_map_size_t   remove_size;
5979         vm_map_entry_t  entry;
5980
5981         submap_end = offset + (end - start);
5982         submap_start = offset;
5983
5984         vm_map_lock_read(sub_map);
5985         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5986
5987                 remove_size = (entry->vme_end - entry->vme_start);
5988                 if(offset > entry->vme_start)
5989                         remove_size -= offset - entry->vme_start;
5990
5991
5992                 if(submap_end < entry->vme_end) {
5993                         remove_size -=
5994                                 entry->vme_end - submap_end;
5995                 }
5996                 if(entry->is_sub_map) {
5997                         vm_map_submap_pmap_clean(
5998                                 sub_map,
5999                                 start,
6000                                 start + remove_size,
6001                                 VME_SUBMAP(entry),
6002                                 VME_OFFSET(entry));
6003                 } else {
6004
6005                         if((map->mapped_in_other_pmaps) && (map->ref_count)
6006                            && (VME_OBJECT(entry) != NULL)) {
6007                                 vm_object_pmap_protect_options(
6008                                         VME_OBJECT(entry),
6009                                         (VME_OFFSET(entry) +
6010                                          offset -
6011                                          entry->vme_start),
6012                                         remove_size,
6013                                         PMAP_NULL,
6014                                         entry->vme_start,
6015                                         VM_PROT_NONE,
6016                                         PMAP_OPTIONS_REMOVE);
6017                         } else {
6018                                 pmap_remove(map->pmap,
6019                                             (addr64_t)start,
6020                                             (addr64_t)(start + remove_size));
6021                         }
6022                 }
6023         }
6024
6025         entry = entry->vme_next;
6026
6027         while((entry != vm_map_to_entry(sub_map))
6028               && (entry->vme_start < submap_end)) {
6029                 remove_size = (entry->vme_end - entry->vme_start);
6030                 if(submap_end < entry->vme_end) {
6031                         remove_size -= entry->vme_end - submap_end;
6032                 }
6033                 if(entry->is_sub_map) {
6034                         vm_map_submap_pmap_clean(
6035                                 sub_map,
6036                                 (start + entry->vme_start) - offset,
6037                                 ((start + entry->vme_start) - offset) + remove_size,
6038                                 VME_SUBMAP(entry),
6039                                 VME_OFFSET(entry));
6040                 } else {
6041                         if((map->mapped_in_other_pmaps) && (map->ref_count)
6042                            && (VME_OBJECT(entry) != NULL)) {
6043                                 vm_object_pmap_protect_options(
6044                                         VME_OBJECT(entry),
6045                                         VME_OFFSET(entry),
6046                                         remove_size,
6047                                         PMAP_NULL,
6048                                         entry->vme_start,
6049                                         VM_PROT_NONE,
6050                                         PMAP_OPTIONS_REMOVE);
6051                         } else {
6052                                 pmap_remove(map->pmap,
6053                                             (addr64_t)((start + entry->vme_start)
6054                                                        - offset),
6055                                             (addr64_t)(((start + entry->vme_start)
6056                                                         - offset) + remove_size));
6057                         }
6058                 }
6059                 entry = entry->vme_next;
6060         }
6061         vm_map_unlock_read(sub_map);
6062         return;
6063 }
6064
6065 /*
6066  *      vm_map_delete:  [ internal use only ]
6067  *
6068  *      Deallocates the given address range from the target map.
6069  *      Removes all user wirings. Unwires one kernel wiring if
6070  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
6071  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
6072  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
6073  *
6074  *      This routine is called with map locked and leaves map locked.
6075  */
6076 static kern_return_t
6077 vm_map_delete(
6078         vm_map_t                map,
6079         vm_map_offset_t         start,
6080         vm_map_offset_t         end,
6081         int                     flags,
6082         vm_map_t                zap_map)
6083 {
6084         vm_map_entry_t          entry, next;
6085         struct   vm_map_entry   *first_entry, tmp_entry;
6086         vm_map_offset_t         s;
6087         vm_object_t             object;
6088         boolean_t               need_wakeup;
6089         unsigned int            last_timestamp = ~0; /* unlikely value */
6090         int                     interruptible;
6091
6092         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
6093                 THREAD_ABORTSAFE : THREAD_UNINT;
6094
6095         /*
6096          * All our DMA I/O operations in IOKit are currently done by
6097          * wiring through the map entries of the task requesting the I/O.
6098          * Because of this, we must always wait for kernel wirings
6099          * to go away on the entries before deleting them.
6100          *
6101          * Any caller who wants to actually remove a kernel wiring
6102          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6103          * properly remove one wiring instead of blasting through
6104          * them all.
6105          */
6106         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6107
6108         while(1) {
6109                 /*
6110                  *      Find the start of the region, and clip it
6111                  */
6112                 if (vm_map_lookup_entry(map, start, &first_entry)) {
6113                         entry = first_entry;
6114                         if (map == kalloc_map &&
6115                             (entry->vme_start != start ||
6116                              entry->vme_end != end)) {
6117                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6118                                       "mismatched entry %p [0x%llx:0x%llx]\n",
6119                                       map,
6120                                       (uint64_t)start,
6121                                       (uint64_t)end,
6122                                       entry,
6123                                       (uint64_t)entry->vme_start,
6124                                       (uint64_t)entry->vme_end);
6125                         }
6126                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
6127                                 start = SUPERPAGE_ROUND_DOWN(start);
6128                                 continue;
6129                         }
6130                         if (start == entry->vme_start) {
6131                                 /*
6132                                  * No need to clip.  We don't want to cause
6133                                  * any unnecessary unnesting in this case...
6134                                  */
6135                         } else {
6136                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6137                                     entry->map_aligned &&
6138                                     !VM_MAP_PAGE_ALIGNED(
6139                                             start,
6140                                             VM_MAP_PAGE_MASK(map))) {
6141                                         /*
6142                                          * The entry will no longer be
6143                                          * map-aligned after clipping
6144                                          * and the caller said it's OK.
6145                                          */
6146                                         entry->map_aligned = FALSE;
6147                                 }
6148                                 if (map == kalloc_map) {
6149                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
6150                                               " clipping %p at 0x%llx\n",
6151                                               map,
6152                                               (uint64_t)start,
6153                                               (uint64_t)end,
6154                                               entry,
6155                                               (uint64_t)start);
6156                                 }
6157                                 vm_map_clip_start(map, entry, start);
6158                         }
6159
6160                         /*
6161                          *      Fix the lookup hint now, rather than each
6162                          *      time through the loop.
6163                          */
6164                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6165                 } else {
6166                         if (map->pmap == kernel_pmap &&
6167                             map->ref_count != 0) {
6168                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6169                                       "no map entry at 0x%llx\n",
6170                                       map,
6171                                       (uint64_t)start,
6172                                       (uint64_t)end,
6173                                       (uint64_t)start);
6174                         }
6175                         entry = first_entry->vme_next;
6176                 }
6177                 break;
6178         }
6179         if (entry->superpage_size)
6180                 end = SUPERPAGE_ROUND_UP(end);
6181
6182         need_wakeup = FALSE;
6183         /*
6184          *      Step through all entries in this region
6185          */
6186         s = entry->vme_start;
6187         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6188                 /*
6189                  * At this point, we have deleted all the memory entries
6190                  * between "start" and "s".  We still need to delete
6191                  * all memory entries between "s" and "end".
6192                  * While we were blocked and the map was unlocked, some
6193                  * new memory entries could have been re-allocated between
6194                  * "start" and "s" and we don't want to mess with those.
6195                  * Some of those entries could even have been re-assembled
6196                  * with an entry after "s" (in vm_map_simplify_entry()), so
6197                  * we may have to vm_map_clip_start() again.
6198                  */
6199
6200                 if (entry->vme_start >= s) {
6201                         /*
6202                          * This entry starts on or after "s"
6203                          * so no need to clip its start.
6204                          */
6205                 } else {
6206                         /*
6207                          * This entry has been re-assembled by a
6208                          * vm_map_simplify_entry().  We need to
6209                          * re-clip its start.
6210                          */
6211                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6212                             entry->map_aligned &&
6213                             !VM_MAP_PAGE_ALIGNED(s,
6214                                                  VM_MAP_PAGE_MASK(map))) {
6215                                 /*
6216                                  * The entry will no longer be map-aligned
6217                                  * after clipping and the caller said it's OK.
6218                                  */
6219                                 entry->map_aligned = FALSE;
6220                         }
6221                         if (map == kalloc_map) {
6222                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6223                                       "clipping %p at 0x%llx\n",
6224                                       map,
6225                                       (uint64_t)start,
6226                                       (uint64_t)end,
6227                                       entry,
6228                                       (uint64_t)s);
6229                         }
6230                         vm_map_clip_start(map, entry, s);
6231                 }
6232                 if (entry->vme_end <= end) {
6233                         /*
6234                          * This entry is going away completely, so no need
6235                          * to clip and possibly cause an unnecessary unnesting.
6236                          */
6237                 } else {
6238                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6239                             entry->map_aligned &&
6240                             !VM_MAP_PAGE_ALIGNED(end,
6241                                                  VM_MAP_PAGE_MASK(map))) {
6242                                 /*
6243                                  * The entry will no longer be map-aligned
6244                                  * after clipping and the caller said it's OK.
6245                                  */
6246                                 entry->map_aligned = FALSE;
6247                         }
6248                         if (map == kalloc_map) {
6249                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6250                                       "clipping %p at 0x%llx\n",
6251                                       map,
6252                                       (uint64_t)start,
6253                                       (uint64_t)end,
6254                                       entry,
6255                                       (uint64_t)end);
6256                         }
6257                         vm_map_clip_end(map, entry, end);
6258                 }
6259
6260                 if (entry->permanent) {
6261                         panic("attempt to remove permanent VM map entry "
6262                               "%p [0x%llx:0x%llx]\n",
6263                               entry, (uint64_t) s, (uint64_t) end);
6264                 }
6265
6266
6267                 if (entry->in_transition) {
6268                         wait_result_t wait_result;
6269
6270                         /*
6271                          * Another thread is wiring/unwiring this entry.
6272                          * Let the other thread know we are waiting.
6273                          */
6274                         assert(s == entry->vme_start);
6275                         entry->needs_wakeup = TRUE;
6276
6277                         /*
6278                          * wake up anybody waiting on entries that we have
6279                          * already unwired/deleted.
6280                          */
6281                         if (need_wakeup) {
6282                                 vm_map_entry_wakeup(map);
6283                                 need_wakeup = FALSE;
6284                         }
6285
6286                         wait_result = vm_map_entry_wait(map, interruptible);
6287
6288                         if (interruptible &&
6289                             wait_result == THREAD_INTERRUPTED) {
6290                                 /*
6291                                  * We do not clear the needs_wakeup flag,
6292                                  * since we cannot tell if we were the only one.
6293                                  */
6294                                 return KERN_ABORTED;
6295                         }
6296
6297                         /*
6298                          * The entry could have been clipped or it
6299                          * may not exist anymore.  Look it up again.
6300                          */
6301                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6302                                 /*
6303                                  * User: use the next entry
6304                                  */
6305                                 entry = first_entry->vme_next;
6306                                 s = entry->vme_start;
6307                         } else {
6308                                 entry = first_entry;
6309                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6310                         }
6311                         last_timestamp = map->timestamp;
6312                         continue;
6313                 } /* end in_transition */
6314
6315                 if (entry->wired_count) {
6316                         boolean_t       user_wire;
6317
6318                         user_wire = entry->user_wired_count > 0;
6319
6320                         /*
6321                          *      Remove a kernel wiring if requested
6322                          */
6323                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
6324                                 entry->wired_count--;
6325                         }
6326
6327                         /*
6328                          *      Remove all user wirings for proper accounting
6329                          */
6330                         if (entry->user_wired_count > 0) {
6331                                 while (entry->user_wired_count)
6332                                         subtract_wire_counts(map, entry, user_wire);
6333                         }
6334
6335                         if (entry->wired_count != 0) {
6336                                 assert(map != kernel_map);
6337                                 /*
6338                                  * Cannot continue.  Typical case is when
6339                                  * a user thread has physical io pending on
6340                                  * on this page.  Either wait for the
6341                                  * kernel wiring to go away or return an
6342                                  * error.
6343                                  */
6344                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
6345                                         wait_result_t wait_result;
6346
6347                                         assert(s == entry->vme_start);
6348                                         entry->needs_wakeup = TRUE;
6349                                         wait_result = vm_map_entry_wait(map,
6350                                                                         interruptible);
6351
6352                                         if (interruptible &&
6353                                             wait_result == THREAD_INTERRUPTED) {
6354                                                 /*
6355                                                  * We do not clear the
6356                                                  * needs_wakeup flag, since we
6357                                                  * cannot tell if we were the
6358                                                  * only one.
6359                                                  */
6360                                                 return KERN_ABORTED;
6361                                         }
6362
6363                                         /*
6364                                          * The entry could have been clipped or
6365                                          * it may not exist anymore.  Look it
6366                                          * up again.
6367                                          */
6368                                         if (!vm_map_lookup_entry(map, s,
6369                                                                  &first_entry)) {
6370                                                 assert(map != kernel_map);
6371                                                 /*
6372                                                  * User: use the next entry
6373                                                  */
6374                                                 entry = first_entry->vme_next;
6375                                                 s = entry->vme_start;
6376                                         } else {
6377                                                 entry = first_entry;
6378                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6379                                         }
6380                                         last_timestamp = map->timestamp;
6381                                         continue;
6382                                 }
6383                                 else {
6384                                         return KERN_FAILURE;
6385                                 }
6386                         }
6387
6388                         entry->in_transition = TRUE;
6389                         /*
6390                          * copy current entry.  see comment in vm_map_wire()
6391                          */
6392                         tmp_entry = *entry;
6393                         assert(s == entry->vme_start);
6394
6395                         /*
6396                          * We can unlock the map now. The in_transition
6397                          * state guarentees existance of the entry.
6398                          */
6399                         vm_map_unlock(map);
6400
6401                         if (tmp_entry.is_sub_map) {
6402                                 vm_map_t sub_map;
6403                                 vm_map_offset_t sub_start, sub_end;
6404                                 pmap_t pmap;
6405                                 vm_map_offset_t pmap_addr;
6406
6407
6408                                 sub_map = VME_SUBMAP(&tmp_entry);
6409                                 sub_start = VME_OFFSET(&tmp_entry);
6410                                 sub_end = sub_start + (tmp_entry.vme_end -
6411                                                        tmp_entry.vme_start);
6412                                 if (tmp_entry.use_pmap) {
6413                                         pmap = sub_map->pmap;
6414                                         pmap_addr = tmp_entry.vme_start;
6415                                 } else {
6416                                         pmap = map->pmap;
6417                                         pmap_addr = tmp_entry.vme_start;
6418                                 }
6419                                 (void) vm_map_unwire_nested(sub_map,
6420                                                             sub_start, sub_end,
6421                                                             user_wire,
6422                                                             pmap, pmap_addr);
6423                         } else {
6424
6425                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
6426                                         pmap_protect_options(
6427                                                 map->pmap,
6428                                                 tmp_entry.vme_start,
6429                                                 tmp_entry.vme_end,
6430                                                 VM_PROT_NONE,
6431                                                 PMAP_OPTIONS_REMOVE,
6432                                                 NULL);
6433                                 }
6434                                 vm_fault_unwire(map, &tmp_entry,
6435                                                 VME_OBJECT(&tmp_entry) == kernel_object,
6436                                                 map->pmap, tmp_entry.vme_start);
6437                         }
6438
6439                         vm_map_lock(map);
6440
6441                         if (last_timestamp+1 != map->timestamp) {
6442                                 /*
6443                                  * Find the entry again.  It could have
6444                                  * been clipped after we unlocked the map.
6445                                  */
6446                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
6447                                         assert((map != kernel_map) &&
6448                                                (!entry->is_sub_map));
6449                                         first_entry = first_entry->vme_next;
6450                                         s = first_entry->vme_start;
6451                                 } else {
6452                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6453                                 }
6454                         } else {
6455                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6456                                 first_entry = entry;
6457                         }
6458
6459                         last_timestamp = map->timestamp;
6460
6461                         entry = first_entry;
6462                         while ((entry != vm_map_to_entry(map)) &&
6463                                (entry->vme_start < tmp_entry.vme_end)) {
6464                                 assert(entry->in_transition);
6465                                 entry->in_transition = FALSE;
6466                                 if (entry->needs_wakeup) {
6467                                         entry->needs_wakeup = FALSE;
6468                                         need_wakeup = TRUE;
6469                                 }
6470                                 entry = entry->vme_next;
6471                         }
6472                         /*
6473                          * We have unwired the entry(s).  Go back and
6474                          * delete them.
6475                          */
6476                         entry = first_entry;
6477                         continue;
6478                 }
6479
6480                 /* entry is unwired */
6481                 assert(entry->wired_count == 0);
6482                 assert(entry->user_wired_count == 0);
6483
6484                 assert(s == entry->vme_start);
6485
6486                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6487                         /*
6488                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6489                          * vm_map_delete(), some map entries might have been
6490                          * transferred to a "zap_map", which doesn't have a
6491                          * pmap.  The original pmap has already been flushed
6492                          * in the vm_map_delete() call targeting the original
6493                          * map, but when we get to destroying the "zap_map",
6494                          * we don't have any pmap to flush, so let's just skip
6495                          * all this.
6496                          */
6497                 } else if (entry->is_sub_map) {
6498                         if (entry->use_pmap) {
6499 #ifndef NO_NESTED_PMAP
6500                                 int pmap_flags;
6501
6502                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6503                                         /*
6504                                          * This is the final cleanup of the
6505                                          * address space being terminated.
6506                                          * No new mappings are expected and
6507                                          * we don't really need to unnest the
6508                                          * shared region (and lose the "global"
6509                                          * pmap mappings, if applicable).
6510                                          *
6511                                          * Tell the pmap layer that we're
6512                                          * "clean" wrt nesting.
6513                                          */
6514                                         pmap_flags = PMAP_UNNEST_CLEAN;
6515                                 } else {
6516                                         /*
6517                                          * We're unmapping part of the nested
6518                                          * shared region, so we can't keep the
6519                                          * nested pmap.
6520                                          */
6521                                         pmap_flags = 0;
6522                                 }
6523                                 pmap_unnest_options(
6524                                         map->pmap,
6525                                         (addr64_t)entry->vme_start,
6526                                         entry->vme_end - entry->vme_start,
6527                                         pmap_flags);
6528 #endif  /* NO_NESTED_PMAP */
6529                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6530                                         /* clean up parent map/maps */
6531                                         vm_map_submap_pmap_clean(
6532                                                 map, entry->vme_start,
6533                                                 entry->vme_end,
6534                                                 VME_SUBMAP(entry),
6535                                                 VME_OFFSET(entry));
6536                                 }
6537                         } else {
6538                                 vm_map_submap_pmap_clean(
6539                                         map, entry->vme_start, entry->vme_end,
6540                                         VME_SUBMAP(entry),
6541                                         VME_OFFSET(entry));
6542                         }
6543                 } else if (VME_OBJECT(entry) != kernel_object &&
6544                            VME_OBJECT(entry) != compressor_object) {
6545                         object = VME_OBJECT(entry);
6546                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6547                                 vm_object_pmap_protect_options(
6548                                         object, VME_OFFSET(entry),
6549                                         entry->vme_end - entry->vme_start,
6550                                         PMAP_NULL,
6551                                         entry->vme_start,
6552                                         VM_PROT_NONE,
6553                                         PMAP_OPTIONS_REMOVE);
6554                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
6555                                    (map->pmap == kernel_pmap)) {
6556                                 /* Remove translations associated
6557                                  * with this range unless the entry
6558                                  * does not have an object, or
6559                                  * it's the kernel map or a descendant
6560                                  * since the platform could potentially
6561                                  * create "backdoor" mappings invisible
6562                                  * to the VM. It is expected that
6563                                  * objectless, non-kernel ranges
6564                                  * do not have such VM invisible
6565                                  * translations.
6566                                  */
6567                                 pmap_remove_options(map->pmap,
6568                                                     (addr64_t)entry->vme_start,
6569                                                     (addr64_t)entry->vme_end,
6570                                                     PMAP_OPTIONS_REMOVE);
6571                         }
6572                 }
6573
6574                 if (entry->iokit_acct) {
6575                         /* alternate accounting */
6576                         DTRACE_VM4(vm_map_iokit_unmapped_region,
6577                                    vm_map_t, map,
6578                                    vm_map_offset_t, entry->vme_start,
6579                                    vm_map_offset_t, entry->vme_end,
6580                                    int, VME_ALIAS(entry));
6581                         vm_map_iokit_unmapped_region(map,
6582                                                      (entry->vme_end -
6583                                                       entry->vme_start));
6584                         entry->iokit_acct = FALSE;
6585                 }
6586
6587                 /*
6588                  * All pmap mappings for this map entry must have been
6589                  * cleared by now.
6590                  */
6591 #if DEBUG
6592                 assert(vm_map_pmap_is_empty(map,
6593                                             entry->vme_start,
6594                                             entry->vme_end));
6595 #endif /* DEBUG */
6596
6597                 next = entry->vme_next;
6598
6599                 if (map->pmap == kernel_pmap &&
6600                     map->ref_count != 0 &&
6601                     entry->vme_end < end &&
6602                     (next == vm_map_to_entry(map) ||
6603                      next->vme_start != entry->vme_end)) {
6604                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
6605                               "hole after %p at 0x%llx\n",
6606                               map,
6607                               (uint64_t)start,
6608                               (uint64_t)end,
6609                               entry,
6610                               (uint64_t)entry->vme_end);
6611                 }
6612
6613                 s = next->vme_start;
6614                 last_timestamp = map->timestamp;
6615
6616                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6617                     zap_map != VM_MAP_NULL) {
6618                         vm_map_size_t entry_size;
6619                         /*
6620                          * The caller wants to save the affected VM map entries
6621                          * into the "zap_map".  The caller will take care of
6622                          * these entries.
6623                          */
6624                         /* unlink the entry from "map" ... */
6625                         vm_map_store_entry_unlink(map, entry);
6626                         /* ... and add it to the end of the "zap_map" */
6627                         vm_map_store_entry_link(zap_map,
6628                                           vm_map_last_entry(zap_map),
6629                                           entry);
6630                         entry_size = entry->vme_end - entry->vme_start;
6631                         map->size -= entry_size;
6632                         zap_map->size += entry_size;
6633                         /* we didn't unlock the map, so no timestamp increase */
6634                         last_timestamp--;
6635                 } else {
6636                         vm_map_entry_delete(map, entry);
6637                         /* vm_map_entry_delete unlocks the map */
6638                         vm_map_lock(map);
6639                 }
6640
6641                 entry = next;
6642
6643                 if(entry == vm_map_to_entry(map)) {
6644                         break;
6645                 }
6646                 if (last_timestamp+1 != map->timestamp) {
6647                         /*
6648                          * we are responsible for deleting everything
6649                          * from the give space, if someone has interfered
6650                          * we pick up where we left off, back fills should
6651                          * be all right for anyone except map_delete and
6652                          * we have to assume that the task has been fully
6653                          * disabled before we get here
6654                          */
6655                         if (!vm_map_lookup_entry(map, s, &entry)){
6656                                 entry = entry->vme_next;
6657                                 s = entry->vme_start;
6658                         } else {
6659                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6660                         }
6661                         /*
6662                          * others can not only allocate behind us, we can
6663                          * also see coalesce while we don't have the map lock
6664                          */
6665                         if(entry == vm_map_to_entry(map)) {
6666                                 break;
6667                         }
6668                 }
6669                 last_timestamp = map->timestamp;
6670         }
6671
6672         if (map->wait_for_space)
6673                 thread_wakeup((event_t) map);
6674         /*
6675          * wake up anybody waiting on entries that we have already deleted.
6676          */
6677         if (need_wakeup)
6678                 vm_map_entry_wakeup(map);
6679
6680         return KERN_SUCCESS;
6681 }
6682
6683 /*
6684  *      vm_map_remove:
6685  *
6686  *      Remove the given address range from the target map.
6687  *      This is the exported form of vm_map_delete.
6688  */
6689 kern_return_t
6690 vm_map_remove(
6691         vm_map_t        map,
6692         vm_map_offset_t start,
6693         vm_map_offset_t end,
6694          boolean_t      flags)
6695 {
6696         kern_return_t   result;
6697
6698         vm_map_lock(map);
6699         VM_MAP_RANGE_CHECK(map, start, end);
6700         /*
6701          * For the zone_map, the kernel controls the allocation/freeing of memory.
6702          * Any free to the zone_map should be within the bounds of the map and
6703          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6704          * free to the zone_map into a no-op, there is a problem and we should
6705          * panic.
6706          */
6707         if ((map == zone_map) && (start == end))
6708                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6709         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6710         vm_map_unlock(map);
6711
6712         return(result);
6713 }
6714
6715 /*
6716  *      vm_map_remove_locked:
6717  *
6718  *      Remove the given address range from the target locked map.
6719  *      This is the exported form of vm_map_delete.
6720  */
6721 kern_return_t
6722 vm_map_remove_locked(
6723         vm_map_t        map,
6724         vm_map_offset_t start,
6725         vm_map_offset_t end,
6726         boolean_t       flags)
6727 {
6728         kern_return_t   result;
6729
6730         VM_MAP_RANGE_CHECK(map, start, end);
6731         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6732         return(result);
6733 }
6734
6735
6736 /*
6737  *      Routine:        vm_map_copy_discard
6738  *
6739  *      Description:
6740  *              Dispose of a map copy object (returned by
6741  *              vm_map_copyin).
6742  */
6743 void
6744 vm_map_copy_discard(
6745         vm_map_copy_t   copy)
6746 {
6747         if (copy == VM_MAP_COPY_NULL)
6748                 return;
6749
6750         switch (copy->type) {
6751         case VM_MAP_COPY_ENTRY_LIST:
6752                 while (vm_map_copy_first_entry(copy) !=
6753                        vm_map_copy_to_entry(copy)) {
6754                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
6755
6756                         vm_map_copy_entry_unlink(copy, entry);
6757                         if (entry->is_sub_map) {
6758                                 vm_map_deallocate(VME_SUBMAP(entry));
6759                         } else {
6760                                 vm_object_deallocate(VME_OBJECT(entry));
6761                         }
6762                         vm_map_copy_entry_dispose(copy, entry);
6763                 }
6764                 break;
6765         case VM_MAP_COPY_OBJECT:
6766                 vm_object_deallocate(copy->cpy_object);
6767                 break;
6768         case VM_MAP_COPY_KERNEL_BUFFER:
6769
6770                 /*
6771                  * The vm_map_copy_t and possibly the data buffer were
6772                  * allocated by a single call to kalloc(), i.e. the
6773                  * vm_map_copy_t was not allocated out of the zone.
6774                  */
6775                 if (copy->size > msg_ool_size_small || copy->offset)
6776                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6777                               (long long)copy->size, (long long)copy->offset);
6778                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
6779                 return;
6780         }
6781         zfree(vm_map_copy_zone, copy);
6782 }
6783
6784 /*
6785  *      Routine:        vm_map_copy_copy
6786  *
6787  *      Description:
6788  *                      Move the information in a map copy object to
6789  *                      a new map copy object, leaving the old one
6790  *                      empty.
6791  *
6792  *                      This is used by kernel routines that need
6793  *                      to look at out-of-line data (in copyin form)
6794  *                      before deciding whether to return SUCCESS.
6795  *                      If the routine returns FAILURE, the original
6796  *                      copy object will be deallocated; therefore,
6797  *                      these routines must make a copy of the copy
6798  *                      object and leave the original empty so that
6799  *                      deallocation will not fail.
6800  */
6801 vm_map_copy_t
6802 vm_map_copy_copy(
6803         vm_map_copy_t   copy)
6804 {
6805         vm_map_copy_t   new_copy;
6806
6807         if (copy == VM_MAP_COPY_NULL)
6808                 return VM_MAP_COPY_NULL;
6809
6810         /*
6811          * Allocate a new copy object, and copy the information
6812          * from the old one into it.
6813          */
6814
6815         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6816         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6817         *new_copy = *copy;
6818
6819         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6820                 /*
6821                  * The links in the entry chain must be
6822                  * changed to point to the new copy object.
6823                  */
6824                 vm_map_copy_first_entry(copy)->vme_prev
6825                         = vm_map_copy_to_entry(new_copy);
6826                 vm_map_copy_last_entry(copy)->vme_next
6827                         = vm_map_copy_to_entry(new_copy);
6828         }
6829
6830         /*
6831          * Change the old copy object into one that contains
6832          * nothing to be deallocated.
6833          */
6834         copy->type = VM_MAP_COPY_OBJECT;
6835         copy->cpy_object = VM_OBJECT_NULL;
6836
6837         /*
6838          * Return the new object.
6839          */
6840         return new_copy;
6841 }
6842
6843 static kern_return_t
6844 vm_map_overwrite_submap_recurse(
6845         vm_map_t        dst_map,
6846         vm_map_offset_t dst_addr,
6847         vm_map_size_t   dst_size)
6848 {
6849         vm_map_offset_t dst_end;
6850         vm_map_entry_t  tmp_entry;
6851         vm_map_entry_t  entry;
6852         kern_return_t   result;
6853         boolean_t       encountered_sub_map = FALSE;
6854
6855
6856
6857         /*
6858          *      Verify that the destination is all writeable
6859          *      initially.  We have to trunc the destination
6860          *      address and round the copy size or we'll end up
6861          *      splitting entries in strange ways.
6862          */
6863
6864         dst_end = vm_map_round_page(dst_addr + dst_size,
6865                                     VM_MAP_PAGE_MASK(dst_map));
6866         vm_map_lock(dst_map);
6867
6868 start_pass_1:
6869         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6870                 vm_map_unlock(dst_map);
6871                 return(KERN_INVALID_ADDRESS);
6872         }
6873
6874         vm_map_clip_start(dst_map,
6875                           tmp_entry,
6876                           vm_map_trunc_page(dst_addr,
6877                                             VM_MAP_PAGE_MASK(dst_map)));
6878         if (tmp_entry->is_sub_map) {
6879                 /* clipping did unnest if needed */
6880                 assert(!tmp_entry->use_pmap);
6881         }
6882
6883         for (entry = tmp_entry;;) {
6884                 vm_map_entry_t  next;
6885
6886                 next = entry->vme_next;
6887                 while(entry->is_sub_map) {
6888                         vm_map_offset_t sub_start;
6889                         vm_map_offset_t sub_end;
6890                         vm_map_offset_t local_end;
6891
6892                         if (entry->in_transition) {
6893                                 /*
6894                                  * Say that we are waiting, and wait for entry.
6895                                  */
6896                                 entry->needs_wakeup = TRUE;
6897                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6898
6899                                 goto start_pass_1;
6900                         }
6901
6902                         encountered_sub_map = TRUE;
6903                         sub_start = VME_OFFSET(entry);
6904
6905                         if(entry->vme_end < dst_end)
6906                                 sub_end = entry->vme_end;
6907                         else
6908                                 sub_end = dst_end;
6909                         sub_end -= entry->vme_start;
6910                         sub_end += VME_OFFSET(entry);
6911                         local_end = entry->vme_end;
6912                         vm_map_unlock(dst_map);
6913
6914                         result = vm_map_overwrite_submap_recurse(
6915                                 VME_SUBMAP(entry),
6916                                 sub_start,
6917                                 sub_end - sub_start);
6918
6919                         if(result != KERN_SUCCESS)
6920                                 return result;
6921                         if (dst_end <= entry->vme_end)
6922                                 return KERN_SUCCESS;
6923                         vm_map_lock(dst_map);
6924                         if(!vm_map_lookup_entry(dst_map, local_end,
6925                                                 &tmp_entry)) {
6926                                 vm_map_unlock(dst_map);
6927                                 return(KERN_INVALID_ADDRESS);
6928                         }
6929                         entry = tmp_entry;
6930                         next = entry->vme_next;
6931                 }
6932
6933                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6934                         vm_map_unlock(dst_map);
6935                         return(KERN_PROTECTION_FAILURE);
6936                 }
6937
6938                 /*
6939                  *      If the entry is in transition, we must wait
6940                  *      for it to exit that state.  Anything could happen
6941                  *      when we unlock the map, so start over.
6942                  */
6943                 if (entry->in_transition) {
6944
6945                         /*
6946                          * Say that we are waiting, and wait for entry.
6947                          */
6948                         entry->needs_wakeup = TRUE;
6949                         vm_map_entry_wait(dst_map, THREAD_UNINT);
6950
6951                         goto start_pass_1;
6952                 }
6953
6954 /*
6955  *              our range is contained completely within this map entry
6956  */
6957                 if (dst_end <= entry->vme_end) {
6958                         vm_map_unlock(dst_map);
6959                         return KERN_SUCCESS;
6960                 }
6961 /*
6962  *              check that range specified is contiguous region
6963  */
6964                 if ((next == vm_map_to_entry(dst_map)) ||
6965                     (next->vme_start != entry->vme_end)) {
6966                         vm_map_unlock(dst_map);
6967                         return(KERN_INVALID_ADDRESS);
6968                 }
6969
6970                 /*
6971                  *      Check for permanent objects in the destination.
6972                  */
6973                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6974                     ((!VME_OBJECT(entry)->internal) ||
6975                      (VME_OBJECT(entry)->true_share))) {
6976                         if(encountered_sub_map) {
6977                                 vm_map_unlock(dst_map);
6978                                 return(KERN_FAILURE);
6979                         }
6980                 }
6981
6982
6983                 entry = next;
6984         }/* for */
6985         vm_map_unlock(dst_map);
6986         return(KERN_SUCCESS);
6987 }
6988
6989 /*
6990  *      Routine:        vm_map_copy_overwrite
6991  *
6992  *      Description:
6993  *              Copy the memory described by the map copy
6994  *              object (copy; returned by vm_map_copyin) onto
6995  *              the specified destination region (dst_map, dst_addr).
6996  *              The destination must be writeable.
6997  *
6998  *              Unlike vm_map_copyout, this routine actually
6999  *              writes over previously-mapped memory.  If the
7000  *              previous mapping was to a permanent (user-supplied)
7001  *              memory object, it is preserved.
7002  *
7003  *              The attributes (protection and inheritance) of the
7004  *              destination region are preserved.
7005  *
7006  *              If successful, consumes the copy object.
7007  *              Otherwise, the caller is responsible for it.
7008  *
7009  *      Implementation notes:
7010  *              To overwrite aligned temporary virtual memory, it is
7011  *              sufficient to remove the previous mapping and insert
7012  *              the new copy.  This replacement is done either on
7013  *              the whole region (if no permanent virtual memory
7014  *              objects are embedded in the destination region) or
7015  *              in individual map entries.
7016  *
7017  *              To overwrite permanent virtual memory , it is necessary
7018  *              to copy each page, as the external memory management
7019  *              interface currently does not provide any optimizations.
7020  *
7021  *              Unaligned memory also has to be copied.  It is possible
7022  *              to use 'vm_trickery' to copy the aligned data.  This is
7023  *              not done but not hard to implement.
7024  *
7025  *              Once a page of permanent memory has been overwritten,
7026  *              it is impossible to interrupt this function; otherwise,
7027  *              the call would be neither atomic nor location-independent.
7028  *              The kernel-state portion of a user thread must be
7029  *              interruptible.
7030  *
7031  *              It may be expensive to forward all requests that might
7032  *              overwrite permanent memory (vm_write, vm_copy) to
7033  *              uninterruptible kernel threads.  This routine may be
7034  *              called by interruptible threads; however, success is
7035  *              not guaranteed -- if the request cannot be performed
7036  *              atomically and interruptibly, an error indication is
7037  *              returned.
7038  */
7039
7040 static kern_return_t
7041 vm_map_copy_overwrite_nested(
7042         vm_map_t                dst_map,
7043         vm_map_address_t        dst_addr,
7044         vm_map_copy_t           copy,
7045         boolean_t               interruptible,
7046         pmap_t                  pmap,
7047         boolean_t               discard_on_success)
7048 {
7049         vm_map_offset_t         dst_end;
7050         vm_map_entry_t          tmp_entry;
7051         vm_map_entry_t          entry;
7052         kern_return_t           kr;
7053         boolean_t               aligned = TRUE;
7054         boolean_t               contains_permanent_objects = FALSE;
7055         boolean_t               encountered_sub_map = FALSE;
7056         vm_map_offset_t         base_addr;
7057         vm_map_size_t           copy_size;
7058         vm_map_size_t           total_size;
7059
7060
7061         /*
7062          *      Check for null copy object.
7063          */
7064
7065         if (copy == VM_MAP_COPY_NULL)
7066                 return(KERN_SUCCESS);
7067
7068         /*
7069          *      Check for special kernel buffer allocated
7070          *      by new_ipc_kmsg_copyin.
7071          */
7072
7073         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7074                 return(vm_map_copyout_kernel_buffer(
7075                                dst_map, &dst_addr,
7076                                copy, copy->size, TRUE, discard_on_success));
7077         }
7078
7079         /*
7080          *      Only works for entry lists at the moment.  Will
7081          *      support page lists later.
7082          */
7083
7084         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7085
7086         if (copy->size == 0) {
7087                 if (discard_on_success)
7088                         vm_map_copy_discard(copy);
7089                 return(KERN_SUCCESS);
7090         }
7091
7092         /*
7093          *      Verify that the destination is all writeable
7094          *      initially.  We have to trunc the destination
7095          *      address and round the copy size or we'll end up
7096          *      splitting entries in strange ways.
7097          */
7098
7099         if (!VM_MAP_PAGE_ALIGNED(copy->size,
7100                                  VM_MAP_PAGE_MASK(dst_map)) ||
7101             !VM_MAP_PAGE_ALIGNED(copy->offset,
7102                                  VM_MAP_PAGE_MASK(dst_map)) ||
7103             !VM_MAP_PAGE_ALIGNED(dst_addr,
7104                                  VM_MAP_PAGE_MASK(dst_map)))
7105         {
7106                 aligned = FALSE;
7107                 dst_end = vm_map_round_page(dst_addr + copy->size,
7108                                             VM_MAP_PAGE_MASK(dst_map));
7109         } else {
7110                 dst_end = dst_addr + copy->size;
7111         }
7112
7113         vm_map_lock(dst_map);
7114
7115         /* LP64todo - remove this check when vm_map_commpage64()
7116          * no longer has to stuff in a map_entry for the commpage
7117          * above the map's max_offset.
7118          */
7119         if (dst_addr >= dst_map->max_offset) {
7120                 vm_map_unlock(dst_map);
7121                 return(KERN_INVALID_ADDRESS);
7122         }
7123
7124 start_pass_1:
7125         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7126                 vm_map_unlock(dst_map);
7127                 return(KERN_INVALID_ADDRESS);
7128         }
7129         vm_map_clip_start(dst_map,
7130                           tmp_entry,
7131                           vm_map_trunc_page(dst_addr,
7132                                             VM_MAP_PAGE_MASK(dst_map)));
7133         for (entry = tmp_entry;;) {
7134                 vm_map_entry_t  next = entry->vme_next;
7135
7136                 while(entry->is_sub_map) {
7137                         vm_map_offset_t sub_start;
7138                         vm_map_offset_t sub_end;
7139                         vm_map_offset_t local_end;
7140
7141                         if (entry->in_transition) {
7142
7143                                 /*
7144                                  * Say that we are waiting, and wait for entry.
7145                                  */
7146                                 entry->needs_wakeup = TRUE;
7147                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7148
7149                                 goto start_pass_1;
7150                         }
7151
7152                         local_end = entry->vme_end;
7153                         if (!(entry->needs_copy)) {
7154                                 /* if needs_copy we are a COW submap */
7155                                 /* in such a case we just replace so */
7156                                 /* there is no need for the follow-  */
7157                                 /* ing check.                        */
7158                                 encountered_sub_map = TRUE;
7159                                 sub_start = VME_OFFSET(entry);
7160
7161                                 if(entry->vme_end < dst_end)
7162                                         sub_end = entry->vme_end;
7163                                 else
7164                                         sub_end = dst_end;
7165                                 sub_end -= entry->vme_start;
7166                                 sub_end += VME_OFFSET(entry);
7167                                 vm_map_unlock(dst_map);
7168
7169                                 kr = vm_map_overwrite_submap_recurse(
7170                                         VME_SUBMAP(entry),
7171                                         sub_start,
7172                                         sub_end - sub_start);
7173                                 if(kr != KERN_SUCCESS)
7174                                         return kr;
7175                                 vm_map_lock(dst_map);
7176                         }
7177
7178                         if (dst_end <= entry->vme_end)
7179                                 goto start_overwrite;
7180                         if(!vm_map_lookup_entry(dst_map, local_end,
7181                                                 &entry)) {
7182                                 vm_map_unlock(dst_map);
7183                                 return(KERN_INVALID_ADDRESS);
7184                         }
7185                         next = entry->vme_next;
7186                 }
7187
7188                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7189                         vm_map_unlock(dst_map);
7190                         return(KERN_PROTECTION_FAILURE);
7191                 }
7192
7193                 /*
7194                  *      If the entry is in transition, we must wait
7195                  *      for it to exit that state.  Anything could happen
7196                  *      when we unlock the map, so start over.
7197                  */
7198                 if (entry->in_transition) {
7199
7200                         /*
7201                          * Say that we are waiting, and wait for entry.
7202                          */
7203                         entry->needs_wakeup = TRUE;
7204                         vm_map_entry_wait(dst_map, THREAD_UNINT);
7205
7206                         goto start_pass_1;
7207                 }
7208
7209 /*
7210  *              our range is contained completely within this map entry
7211  */
7212                 if (dst_end <= entry->vme_end)
7213                         break;
7214 /*
7215  *              check that range specified is contiguous region
7216  */
7217                 if ((next == vm_map_to_entry(dst_map)) ||
7218                     (next->vme_start != entry->vme_end)) {
7219                         vm_map_unlock(dst_map);
7220                         return(KERN_INVALID_ADDRESS);
7221                 }
7222
7223
7224                 /*
7225                  *      Check for permanent objects in the destination.
7226                  */
7227                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7228                     ((!VME_OBJECT(entry)->internal) ||
7229                      (VME_OBJECT(entry)->true_share))) {
7230                         contains_permanent_objects = TRUE;
7231                 }
7232
7233                 entry = next;
7234         }/* for */
7235
7236 start_overwrite:
7237         /*
7238          *      If there are permanent objects in the destination, then
7239          *      the copy cannot be interrupted.
7240          */
7241
7242         if (interruptible && contains_permanent_objects) {
7243                 vm_map_unlock(dst_map);
7244                 return(KERN_FAILURE);   /* XXX */
7245         }
7246
7247         /*
7248          *
7249          *      Make a second pass, overwriting the data
7250          *      At the beginning of each loop iteration,
7251          *      the next entry to be overwritten is "tmp_entry"
7252          *      (initially, the value returned from the lookup above),
7253          *      and the starting address expected in that entry
7254          *      is "start".
7255          */
7256
7257         total_size = copy->size;
7258         if(encountered_sub_map) {
7259                 copy_size = 0;
7260                 /* re-calculate tmp_entry since we've had the map */
7261                 /* unlocked */
7262                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7263                         vm_map_unlock(dst_map);
7264                         return(KERN_INVALID_ADDRESS);
7265                 }
7266         } else {
7267                 copy_size = copy->size;
7268         }
7269
7270         base_addr = dst_addr;
7271         while(TRUE) {
7272                 /* deconstruct the copy object and do in parts */
7273                 /* only in sub_map, interruptable case */
7274                 vm_map_entry_t  copy_entry;
7275                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
7276                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
7277                 int             nentries;
7278                 int             remaining_entries = 0;
7279                 vm_map_offset_t new_offset = 0;
7280
7281                 for (entry = tmp_entry; copy_size == 0;) {
7282                         vm_map_entry_t  next;
7283
7284                         next = entry->vme_next;
7285
7286                         /* tmp_entry and base address are moved along */
7287                         /* each time we encounter a sub-map.  Otherwise */
7288                         /* entry can outpase tmp_entry, and the copy_size */
7289                         /* may reflect the distance between them */
7290                         /* if the current entry is found to be in transition */
7291                         /* we will start over at the beginning or the last */
7292                         /* encounter of a submap as dictated by base_addr */
7293                         /* we will zero copy_size accordingly. */
7294                         if (entry->in_transition) {
7295                                 /*
7296                                  * Say that we are waiting, and wait for entry.
7297                                  */
7298                                 entry->needs_wakeup = TRUE;
7299                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7300
7301                                 if(!vm_map_lookup_entry(dst_map, base_addr,
7302                                                         &tmp_entry)) {
7303                                         vm_map_unlock(dst_map);
7304                                         return(KERN_INVALID_ADDRESS);
7305                                 }
7306                                 copy_size = 0;
7307                                 entry = tmp_entry;
7308                                 continue;
7309                         }
7310                         if(entry->is_sub_map) {
7311                                 vm_map_offset_t sub_start;
7312                                 vm_map_offset_t sub_end;
7313                                 vm_map_offset_t local_end;
7314
7315                                 if (entry->needs_copy) {
7316                                         /* if this is a COW submap */
7317                                         /* just back the range with a */
7318                                         /* anonymous entry */
7319                                         if(entry->vme_end < dst_end)
7320                                                 sub_end = entry->vme_end;
7321                                         else
7322                                                 sub_end = dst_end;
7323                                         if(entry->vme_start < base_addr)
7324                                                 sub_start = base_addr;
7325                                         else
7326                                                 sub_start = entry->vme_start;
7327                                         vm_map_clip_end(
7328                                                 dst_map, entry, sub_end);
7329                                         vm_map_clip_start(
7330                                                 dst_map, entry, sub_start);
7331                                         assert(!entry->use_pmap);
7332                                         entry->is_sub_map = FALSE;
7333                                         vm_map_deallocate(
7334                                                 VME_SUBMAP(entry));
7335                                         VME_SUBMAP_SET(entry, NULL);
7336                                         entry->is_shared = FALSE;
7337                                         entry->needs_copy = FALSE;
7338                                         VME_OFFSET_SET(entry, 0);
7339                                         /*
7340                                          * XXX FBDP
7341                                          * We should propagate the protections
7342                                          * of the submap entry here instead
7343                                          * of forcing them to VM_PROT_ALL...
7344                                          * Or better yet, we should inherit
7345                                          * the protection of the copy_entry.
7346                                          */
7347                                         entry->protection = VM_PROT_ALL;
7348                                         entry->max_protection = VM_PROT_ALL;
7349                                         entry->wired_count = 0;
7350                                         entry->user_wired_count = 0;
7351                                         if(entry->inheritance
7352                                            == VM_INHERIT_SHARE)
7353                                                 entry->inheritance = VM_INHERIT_COPY;
7354                                         continue;
7355                                 }
7356                                 /* first take care of any non-sub_map */
7357                                 /* entries to send */
7358                                 if(base_addr < entry->vme_start) {
7359                                         /* stuff to send */
7360                                         copy_size =
7361                                                 entry->vme_start - base_addr;
7362                                         break;
7363                                 }
7364                                 sub_start = VME_OFFSET(entry);
7365
7366                                 if(entry->vme_end < dst_end)
7367                                         sub_end = entry->vme_end;
7368                                 else
7369                                         sub_end = dst_end;
7370                                 sub_end -= entry->vme_start;
7371                                 sub_end += VME_OFFSET(entry);
7372                                 local_end = entry->vme_end;
7373                                 vm_map_unlock(dst_map);
7374                                 copy_size = sub_end - sub_start;
7375
7376                                 /* adjust the copy object */
7377                                 if (total_size > copy_size) {
7378                                         vm_map_size_t   local_size = 0;
7379                                         vm_map_size_t   entry_size;
7380
7381                                         nentries = 1;
7382                                         new_offset = copy->offset;
7383                                         copy_entry = vm_map_copy_first_entry(copy);
7384                                         while(copy_entry !=
7385                                               vm_map_copy_to_entry(copy)){
7386                                                 entry_size = copy_entry->vme_end -
7387                                                         copy_entry->vme_start;
7388                                                 if((local_size < copy_size) &&
7389                                                    ((local_size + entry_size)
7390                                                     >= copy_size)) {
7391                                                         vm_map_copy_clip_end(copy,
7392                                                                              copy_entry,
7393                                                                              copy_entry->vme_start +
7394                                                                              (copy_size - local_size));
7395                                                         entry_size = copy_entry->vme_end -
7396                                                                 copy_entry->vme_start;
7397                                                         local_size += entry_size;
7398                                                         new_offset += entry_size;
7399                                                 }
7400                                                 if(local_size >= copy_size) {
7401                                                         next_copy = copy_entry->vme_next;
7402                                                         copy_entry->vme_next =
7403                                                                 vm_map_copy_to_entry(copy);
7404                                                         previous_prev =
7405                                                                 copy->cpy_hdr.links.prev;
7406                                                         copy->cpy_hdr.links.prev = copy_entry;
7407                                                         copy->size = copy_size;
7408                                                         remaining_entries =
7409                                                                 copy->cpy_hdr.nentries;
7410                                                         remaining_entries -= nentries;
7411                                                         copy->cpy_hdr.nentries = nentries;
7412                                                         break;
7413                                                 } else {
7414                                                         local_size += entry_size;
7415                                                         new_offset += entry_size;
7416                                                         nentries++;
7417                                                 }
7418                                                 copy_entry = copy_entry->vme_next;
7419                                         }
7420                                 }
7421
7422                                 if((entry->use_pmap) && (pmap == NULL)) {
7423                                         kr = vm_map_copy_overwrite_nested(
7424                                                 VME_SUBMAP(entry),
7425                                                 sub_start,
7426                                                 copy,
7427                                                 interruptible,
7428                                                 VME_SUBMAP(entry)->pmap,
7429                                                 TRUE);
7430                                 } else if (pmap != NULL) {
7431                                         kr = vm_map_copy_overwrite_nested(
7432                                                 VME_SUBMAP(entry),
7433                                                 sub_start,
7434                                                 copy,
7435                                                 interruptible, pmap,
7436                                                 TRUE);
7437                                 } else {
7438                                         kr = vm_map_copy_overwrite_nested(
7439                                                 VME_SUBMAP(entry),
7440                                                 sub_start,
7441                                                 copy,
7442                                                 interruptible,
7443                                                 dst_map->pmap,
7444                                                 TRUE);
7445                                 }
7446                                 if(kr != KERN_SUCCESS) {
7447                                         if(next_copy != NULL) {
7448                                                 copy->cpy_hdr.nentries +=
7449                                                         remaining_entries;
7450                                                 copy->cpy_hdr.links.prev->vme_next =
7451                                                         next_copy;
7452                                                 copy->cpy_hdr.links.prev
7453                                                         = previous_prev;
7454                                                 copy->size = total_size;
7455                                         }
7456                                         return kr;
7457                                 }
7458                                 if (dst_end <= local_end) {
7459                                         return(KERN_SUCCESS);
7460                                 }
7461                                 /* otherwise copy no longer exists, it was */
7462                                 /* destroyed after successful copy_overwrite */
7463                                 copy = (vm_map_copy_t)
7464                                         zalloc(vm_map_copy_zone);
7465                                 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7466                                 vm_map_copy_first_entry(copy) =
7467                                         vm_map_copy_last_entry(copy) =
7468                                         vm_map_copy_to_entry(copy);
7469                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
7470                                 copy->offset = new_offset;
7471
7472                                 /*
7473                                  * XXX FBDP
7474                                  * this does not seem to deal with
7475                                  * the VM map store (R&B tree)
7476                                  */
7477
7478                                 total_size -= copy_size;
7479                                 copy_size = 0;
7480                                 /* put back remainder of copy in container */
7481                                 if(next_copy != NULL) {
7482                                         copy->cpy_hdr.nentries = remaining_entries;
7483                                         copy->cpy_hdr.links.next = next_copy;
7484                                         copy->cpy_hdr.links.prev = previous_prev;
7485                                         copy->size = total_size;
7486                                         next_copy->vme_prev =
7487                                                 vm_map_copy_to_entry(copy);
7488                                         next_copy = NULL;
7489                                 }
7490                                 base_addr = local_end;
7491                                 vm_map_lock(dst_map);
7492                                 if(!vm_map_lookup_entry(dst_map,
7493                                                         local_end, &tmp_entry)) {
7494                                         vm_map_unlock(dst_map);
7495                                         return(KERN_INVALID_ADDRESS);
7496                                 }
7497                                 entry = tmp_entry;
7498                                 continue;
7499                         }
7500                         if (dst_end <= entry->vme_end) {
7501                                 copy_size = dst_end - base_addr;
7502                                 break;
7503                         }
7504
7505                         if ((next == vm_map_to_entry(dst_map)) ||
7506                             (next->vme_start != entry->vme_end)) {
7507                                 vm_map_unlock(dst_map);
7508                                 return(KERN_INVALID_ADDRESS);
7509                         }
7510
7511                         entry = next;
7512                 }/* for */
7513
7514                 next_copy = NULL;
7515                 nentries = 1;
7516
7517                 /* adjust the copy object */
7518                 if (total_size > copy_size) {
7519                         vm_map_size_t   local_size = 0;
7520                         vm_map_size_t   entry_size;
7521
7522                         new_offset = copy->offset;
7523                         copy_entry = vm_map_copy_first_entry(copy);
7524                         while(copy_entry != vm_map_copy_to_entry(copy)) {
7525                                 entry_size = copy_entry->vme_end -
7526                                         copy_entry->vme_start;
7527                                 if((local_size < copy_size) &&
7528                                    ((local_size + entry_size)
7529                                     >= copy_size)) {
7530                                         vm_map_copy_clip_end(copy, copy_entry,
7531                                                              copy_entry->vme_start +
7532                                                              (copy_size - local_size));
7533                                         entry_size = copy_entry->vme_end -
7534                                                 copy_entry->vme_start;
7535                                         local_size += entry_size;
7536                                         new_offset += entry_size;
7537                                 }
7538                                 if(local_size >= copy_size) {
7539                                         next_copy = copy_entry->vme_next;
7540                                         copy_entry->vme_next =
7541                                                 vm_map_copy_to_entry(copy);
7542                                         previous_prev =
7543                                                 copy->cpy_hdr.links.prev;
7544                                         copy->cpy_hdr.links.prev = copy_entry;
7545                                         copy->size = copy_size;
7546                                         remaining_entries =
7547                                                 copy->cpy_hdr.nentries;
7548                                         remaining_entries -= nentries;
7549                                         copy->cpy_hdr.nentries = nentries;
7550                                         break;
7551                                 } else {
7552                                         local_size += entry_size;
7553                                         new_offset += entry_size;
7554                                         nentries++;
7555                                 }
7556                                 copy_entry = copy_entry->vme_next;
7557                         }
7558                 }
7559
7560                 if (aligned) {
7561                         pmap_t  local_pmap;
7562
7563                         if(pmap)
7564                                 local_pmap = pmap;
7565                         else
7566                                 local_pmap = dst_map->pmap;
7567
7568                         if ((kr =  vm_map_copy_overwrite_aligned(
7569                                      dst_map, tmp_entry, copy,
7570                                      base_addr, local_pmap)) != KERN_SUCCESS) {
7571                                 if(next_copy != NULL) {
7572                                         copy->cpy_hdr.nentries +=
7573                                                 remaining_entries;
7574                                         copy->cpy_hdr.links.prev->vme_next =
7575                                                 next_copy;
7576                                         copy->cpy_hdr.links.prev =
7577                                                 previous_prev;
7578                                         copy->size += copy_size;
7579                                 }
7580                                 return kr;
7581                         }
7582                         vm_map_unlock(dst_map);
7583                 } else {
7584                         /*
7585                          * Performance gain:
7586                          *
7587                          * if the copy and dst address are misaligned but the same
7588                          * offset within the page we can copy_not_aligned the
7589                          * misaligned parts and copy aligned the rest.  If they are
7590                          * aligned but len is unaligned we simply need to copy
7591                          * the end bit unaligned.  We'll need to split the misaligned
7592                          * bits of the region in this case !
7593                          */
7594                         /* ALWAYS UNLOCKS THE dst_map MAP */
7595                         kr = vm_map_copy_overwrite_unaligned(
7596                                 dst_map,
7597                                 tmp_entry,
7598                                 copy,
7599                                 base_addr,
7600                                 discard_on_success);
7601                         if (kr != KERN_SUCCESS) {
7602                                 if(next_copy != NULL) {
7603                                         copy->cpy_hdr.nentries +=
7604                                                 remaining_entries;
7605                                         copy->cpy_hdr.links.prev->vme_next =
7606                                                 next_copy;
7607                                         copy->cpy_hdr.links.prev =
7608                                                 previous_prev;
7609                                         copy->size += copy_size;
7610                                 }
7611                                 return kr;
7612                         }
7613                 }
7614                 total_size -= copy_size;
7615                 if(total_size == 0)
7616                         break;
7617                 base_addr += copy_size;
7618                 copy_size = 0;
7619                 copy->offset = new_offset;
7620                 if(next_copy != NULL) {
7621                         copy->cpy_hdr.nentries = remaining_entries;
7622                         copy->cpy_hdr.links.next = next_copy;
7623                         copy->cpy_hdr.links.prev = previous_prev;
7624                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
7625                         copy->size = total_size;
7626                 }
7627                 vm_map_lock(dst_map);
7628                 while(TRUE) {
7629                         if (!vm_map_lookup_entry(dst_map,
7630                                                  base_addr, &tmp_entry)) {
7631                                 vm_map_unlock(dst_map);
7632                                 return(KERN_INVALID_ADDRESS);
7633                         }
7634                         if (tmp_entry->in_transition) {
7635                                 entry->needs_wakeup = TRUE;
7636                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7637                         } else {
7638                                 break;
7639                         }
7640                 }
7641                 vm_map_clip_start(dst_map,
7642                                   tmp_entry,
7643                                   vm_map_trunc_page(base_addr,
7644                                                     VM_MAP_PAGE_MASK(dst_map)));
7645
7646                 entry = tmp_entry;
7647         } /* while */
7648
7649         /*
7650          *      Throw away the vm_map_copy object
7651          */
7652         if (discard_on_success)
7653                 vm_map_copy_discard(copy);
7654
7655         return(KERN_SUCCESS);
7656 }/* vm_map_copy_overwrite */
7657
7658 kern_return_t
7659 vm_map_copy_overwrite(
7660         vm_map_t        dst_map,
7661         vm_map_offset_t dst_addr,
7662         vm_map_copy_t   copy,
7663         boolean_t       interruptible)
7664 {
7665         vm_map_size_t   head_size, tail_size;
7666         vm_map_copy_t   head_copy, tail_copy;
7667         vm_map_offset_t head_addr, tail_addr;
7668         vm_map_entry_t  entry;
7669         kern_return_t   kr;
7670
7671         head_size = 0;
7672         tail_size = 0;
7673         head_copy = NULL;
7674         tail_copy = NULL;
7675         head_addr = 0;
7676         tail_addr = 0;
7677
7678         if (interruptible ||
7679             copy == VM_MAP_COPY_NULL ||
7680             copy->type != VM_MAP_COPY_ENTRY_LIST) {
7681                 /*
7682                  * We can't split the "copy" map if we're interruptible
7683                  * or if we don't have a "copy" map...
7684                  */
7685         blunt_copy:
7686                 return vm_map_copy_overwrite_nested(dst_map,
7687                                                     dst_addr,
7688                                                     copy,
7689                                                     interruptible,
7690                                                     (pmap_t) NULL,
7691                                                     TRUE);
7692         }
7693
7694         if (copy->size < 3 * PAGE_SIZE) {
7695                 /*
7696                  * Too small to bother with optimizing...
7697                  */
7698                 goto blunt_copy;
7699         }
7700
7701         if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7702             (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7703                 /*
7704                  * Incompatible mis-alignment of source and destination...
7705                  */
7706                 goto blunt_copy;
7707         }
7708
7709         /*
7710          * Proper alignment or identical mis-alignment at the beginning.
7711          * Let's try and do a small unaligned copy first (if needed)
7712          * and then an aligned copy for the rest.
7713          */
7714         if (!page_aligned(dst_addr)) {
7715                 head_addr = dst_addr;
7716                 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7717                              (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7718         }
7719         if (!page_aligned(copy->offset + copy->size)) {
7720                 /*
7721                  * Mis-alignment at the end.
7722                  * Do an aligned copy up to the last page and
7723                  * then an unaligned copy for the remaining bytes.
7724                  */
7725                 tail_size = ((copy->offset + copy->size) &
7726                              VM_MAP_PAGE_MASK(dst_map));
7727                 tail_addr = dst_addr + copy->size - tail_size;
7728         }
7729
7730         if (head_size + tail_size == copy->size) {
7731                 /*
7732                  * It's all unaligned, no optimization possible...
7733                  */
7734                 goto blunt_copy;
7735         }
7736
7737         /*
7738          * Can't optimize if there are any submaps in the
7739          * destination due to the way we free the "copy" map
7740          * progressively in vm_map_copy_overwrite_nested()
7741          * in that case.
7742          */
7743         vm_map_lock_read(dst_map);
7744         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7745                 vm_map_unlock_read(dst_map);
7746                 goto blunt_copy;
7747         }
7748         for (;
7749              (entry != vm_map_copy_to_entry(copy) &&
7750               entry->vme_start < dst_addr + copy->size);
7751              entry = entry->vme_next) {
7752                 if (entry->is_sub_map) {
7753                         vm_map_unlock_read(dst_map);
7754                         goto blunt_copy;
7755                 }
7756         }
7757         vm_map_unlock_read(dst_map);
7758
7759         if (head_size) {
7760                 /*
7761                  * Unaligned copy of the first "head_size" bytes, to reach
7762                  * a page boundary.
7763                  */
7764
7765                 /*
7766                  * Extract "head_copy" out of "copy".
7767                  */
7768                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7769                 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7770                 vm_map_copy_first_entry(head_copy) =
7771                         vm_map_copy_to_entry(head_copy);
7772                 vm_map_copy_last_entry(head_copy) =
7773                         vm_map_copy_to_entry(head_copy);
7774                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7775                 head_copy->cpy_hdr.nentries = 0;
7776                 head_copy->cpy_hdr.entries_pageable =
7777                         copy->cpy_hdr.entries_pageable;
7778                 vm_map_store_init(&head_copy->cpy_hdr);
7779
7780                 head_copy->offset = copy->offset;
7781                 head_copy->size = head_size;
7782
7783                 copy->offset += head_size;
7784                 copy->size -= head_size;
7785
7786                 entry = vm_map_copy_first_entry(copy);
7787                 vm_map_copy_clip_end(copy, entry, copy->offset);
7788                 vm_map_copy_entry_unlink(copy, entry);
7789                 vm_map_copy_entry_link(head_copy,
7790                                        vm_map_copy_to_entry(head_copy),
7791                                        entry);
7792
7793                 /*
7794                  * Do the unaligned copy.
7795                  */
7796                 kr = vm_map_copy_overwrite_nested(dst_map,
7797                                                   head_addr,
7798                                                   head_copy,
7799                                                   interruptible,
7800                                                   (pmap_t) NULL,
7801                                                   FALSE);
7802                 if (kr != KERN_SUCCESS)
7803                         goto done;
7804         }
7805
7806         if (tail_size) {
7807                 /*
7808                  * Extract "tail_copy" out of "copy".
7809                  */
7810                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7811                 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7812                 vm_map_copy_first_entry(tail_copy) =
7813                         vm_map_copy_to_entry(tail_copy);
7814                 vm_map_copy_last_entry(tail_copy) =
7815                         vm_map_copy_to_entry(tail_copy);
7816                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7817                 tail_copy->cpy_hdr.nentries = 0;
7818                 tail_copy->cpy_hdr.entries_pageable =
7819                         copy->cpy_hdr.entries_pageable;
7820                 vm_map_store_init(&tail_copy->cpy_hdr);
7821
7822                 tail_copy->offset = copy->offset + copy->size - tail_size;
7823                 tail_copy->size = tail_size;
7824
7825                 copy->size -= tail_size;
7826
7827                 entry = vm_map_copy_last_entry(copy);
7828                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7829                 entry = vm_map_copy_last_entry(copy);
7830                 vm_map_copy_entry_unlink(copy, entry);
7831                 vm_map_copy_entry_link(tail_copy,
7832                                        vm_map_copy_last_entry(tail_copy),
7833                                        entry);
7834         }
7835
7836         /*
7837          * Copy most (or possibly all) of the data.
7838          */
7839         kr = vm_map_copy_overwrite_nested(dst_map,
7840                                           dst_addr + head_size,
7841                                           copy,
7842                                           interruptible,
7843                                           (pmap_t) NULL,
7844                                           FALSE);
7845         if (kr != KERN_SUCCESS) {
7846                 goto done;
7847         }
7848
7849         if (tail_size) {
7850                 kr = vm_map_copy_overwrite_nested(dst_map,
7851                                                   tail_addr,
7852                                                   tail_copy,
7853                                                   interruptible,
7854                                                   (pmap_t) NULL,
7855                                                   FALSE);
7856         }
7857
7858 done:
7859         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7860         if (kr == KERN_SUCCESS) {
7861                 /*
7862                  * Discard all the copy maps.
7863                  */
7864                 if (head_copy) {
7865                         vm_map_copy_discard(head_copy);
7866                         head_copy = NULL;
7867                 }
7868                 vm_map_copy_discard(copy);
7869                 if (tail_copy) {
7870                         vm_map_copy_discard(tail_copy);
7871                         tail_copy = NULL;
7872                 }
7873         } else {
7874                 /*
7875                  * Re-assemble the original copy map.
7876                  */
7877                 if (head_copy) {
7878                         entry = vm_map_copy_first_entry(head_copy);
7879                         vm_map_copy_entry_unlink(head_copy, entry);
7880                         vm_map_copy_entry_link(copy,
7881                                                vm_map_copy_to_entry(copy),
7882                                                entry);
7883                         copy->offset -= head_size;
7884                         copy->size += head_size;
7885                         vm_map_copy_discard(head_copy);
7886                         head_copy = NULL;
7887                 }
7888                 if (tail_copy) {
7889                         entry = vm_map_copy_last_entry(tail_copy);
7890                         vm_map_copy_entry_unlink(tail_copy, entry);
7891                         vm_map_copy_entry_link(copy,
7892                                                vm_map_copy_last_entry(copy),
7893                                                entry);
7894                         copy->size += tail_size;
7895                         vm_map_copy_discard(tail_copy);
7896                         tail_copy = NULL;
7897                 }
7898         }
7899         return kr;
7900 }
7901
7902
7903 /*
7904  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
7905  *
7906  *      Decription:
7907  *      Physically copy unaligned data
7908  *
7909  *      Implementation:
7910  *      Unaligned parts of pages have to be physically copied.  We use
7911  *      a modified form of vm_fault_copy (which understands none-aligned
7912  *      page offsets and sizes) to do the copy.  We attempt to copy as
7913  *      much memory in one go as possibly, however vm_fault_copy copies
7914  *      within 1 memory object so we have to find the smaller of "amount left"
7915  *      "source object data size" and "target object data size".  With
7916  *      unaligned data we don't need to split regions, therefore the source
7917  *      (copy) object should be one map entry, the target range may be split
7918  *      over multiple map entries however.  In any event we are pessimistic
7919  *      about these assumptions.
7920  *
7921  *      Assumptions:
7922  *      dst_map is locked on entry and is return locked on success,
7923  *      unlocked on error.
7924  */
7925
7926 static kern_return_t
7927 vm_map_copy_overwrite_unaligned(
7928         vm_map_t        dst_map,
7929         vm_map_entry_t  entry,
7930         vm_map_copy_t   copy,
7931         vm_map_offset_t start,
7932         boolean_t       discard_on_success)
7933 {
7934         vm_map_entry_t          copy_entry;
7935         vm_map_entry_t          copy_entry_next;
7936         vm_map_version_t        version;
7937         vm_object_t             dst_object;
7938         vm_object_offset_t      dst_offset;
7939         vm_object_offset_t      src_offset;
7940         vm_object_offset_t      entry_offset;
7941         vm_map_offset_t         entry_end;
7942         vm_map_size_t           src_size,
7943                                 dst_size,
7944                                 copy_size,
7945                                 amount_left;
7946         kern_return_t           kr = KERN_SUCCESS;
7947
7948
7949         copy_entry = vm_map_copy_first_entry(copy);
7950
7951         vm_map_lock_write_to_read(dst_map);
7952
7953         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7954         amount_left = copy->size;
7955 /*
7956  *      unaligned so we never clipped this entry, we need the offset into
7957  *      the vm_object not just the data.
7958  */
7959         while (amount_left > 0) {
7960
7961                 if (entry == vm_map_to_entry(dst_map)) {
7962                         vm_map_unlock_read(dst_map);
7963                         return KERN_INVALID_ADDRESS;
7964                 }
7965
7966                 /* "start" must be within the current map entry */
7967                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7968
7969                 dst_offset = start - entry->vme_start;
7970
7971                 dst_size = entry->vme_end - start;
7972
7973                 src_size = copy_entry->vme_end -
7974                         (copy_entry->vme_start + src_offset);
7975
7976                 if (dst_size < src_size) {
7977 /*
7978  *                      we can only copy dst_size bytes before
7979  *                      we have to get the next destination entry
7980  */
7981                         copy_size = dst_size;
7982                 } else {
7983 /*
7984  *                      we can only copy src_size bytes before
7985  *                      we have to get the next source copy entry
7986  */
7987                         copy_size = src_size;
7988                 }
7989
7990                 if (copy_size > amount_left) {
7991                         copy_size = amount_left;
7992                 }
7993 /*
7994  *              Entry needs copy, create a shadow shadow object for
7995  *              Copy on write region.
7996  */
7997                 if (entry->needs_copy &&
7998                     ((entry->protection & VM_PROT_WRITE) != 0))
7999                 {
8000                         if (vm_map_lock_read_to_write(dst_map)) {
8001                                 vm_map_lock_read(dst_map);
8002                                 goto RetryLookup;
8003                         }
8004                         VME_OBJECT_SHADOW(entry,
8005                                           (vm_map_size_t)(entry->vme_end
8006                                                           - entry->vme_start));
8007                         entry->needs_copy = FALSE;
8008                         vm_map_lock_write_to_read(dst_map);
8009                 }
8010                 dst_object = VME_OBJECT(entry);
8011 /*
8012  *              unlike with the virtual (aligned) copy we're going
8013  *              to fault on it therefore we need a target object.
8014  */
8015                 if (dst_object == VM_OBJECT_NULL) {
8016                         if (vm_map_lock_read_to_write(dst_map)) {
8017                                 vm_map_lock_read(dst_map);
8018                                 goto RetryLookup;
8019                         }
8020                         dst_object = vm_object_allocate((vm_map_size_t)
8021                                                         entry->vme_end - entry->vme_start);
8022                         VME_OBJECT(entry) = dst_object;
8023                         VME_OFFSET_SET(entry, 0);
8024                         assert(entry->use_pmap);
8025                         vm_map_lock_write_to_read(dst_map);
8026                 }
8027 /*
8028  *              Take an object reference and unlock map. The "entry" may
8029  *              disappear or change when the map is unlocked.
8030  */
8031                 vm_object_reference(dst_object);
8032                 version.main_timestamp = dst_map->timestamp;
8033                 entry_offset = VME_OFFSET(entry);
8034                 entry_end = entry->vme_end;
8035                 vm_map_unlock_read(dst_map);
8036 /*
8037  *              Copy as much as possible in one pass
8038  */
8039                 kr = vm_fault_copy(
8040                         VME_OBJECT(copy_entry),
8041                         VME_OFFSET(copy_entry) + src_offset,
8042                         &copy_size,
8043                         dst_object,
8044                         entry_offset + dst_offset,
8045                         dst_map,
8046                         &version,
8047                         THREAD_UNINT );
8048
8049                 start += copy_size;
8050                 src_offset += copy_size;
8051                 amount_left -= copy_size;
8052 /*
8053  *              Release the object reference
8054  */
8055                 vm_object_deallocate(dst_object);
8056 /*
8057  *              If a hard error occurred, return it now
8058  */
8059                 if (kr != KERN_SUCCESS)
8060                         return kr;
8061
8062                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
8063                     || amount_left == 0)
8064                 {
8065 /*
8066  *                      all done with this copy entry, dispose.
8067  */
8068                         copy_entry_next = copy_entry->vme_next;
8069
8070                         if (discard_on_success) {
8071                                 vm_map_copy_entry_unlink(copy, copy_entry);
8072                                 assert(!copy_entry->is_sub_map);
8073                                 vm_object_deallocate(VME_OBJECT(copy_entry));
8074                                 vm_map_copy_entry_dispose(copy, copy_entry);
8075                         }
8076
8077                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
8078                             amount_left) {
8079 /*
8080  *                              not finished copying but run out of source
8081  */
8082                                 return KERN_INVALID_ADDRESS;
8083                         }
8084
8085                         copy_entry = copy_entry_next;
8086
8087                         src_offset = 0;
8088                 }
8089
8090                 if (amount_left == 0)
8091                         return KERN_SUCCESS;
8092
8093                 vm_map_lock_read(dst_map);
8094                 if (version.main_timestamp == dst_map->timestamp) {
8095                         if (start == entry_end) {
8096 /*
8097  *                              destination region is split.  Use the version
8098  *                              information to avoid a lookup in the normal
8099  *                              case.
8100  */
8101                                 entry = entry->vme_next;
8102 /*
8103  *                              should be contiguous. Fail if we encounter
8104  *                              a hole in the destination.
8105  */
8106                                 if (start != entry->vme_start) {
8107                                         vm_map_unlock_read(dst_map);
8108                                         return KERN_INVALID_ADDRESS ;
8109                                 }
8110                         }
8111                 } else {
8112 /*
8113  *                      Map version check failed.
8114  *                      we must lookup the entry because somebody
8115  *                      might have changed the map behind our backs.
8116  */
8117                 RetryLookup:
8118                         if (!vm_map_lookup_entry(dst_map, start, &entry))
8119                         {
8120                                 vm_map_unlock_read(dst_map);
8121                                 return KERN_INVALID_ADDRESS ;
8122                         }
8123                 }
8124         }/* while */
8125
8126         return KERN_SUCCESS;
8127 }/* vm_map_copy_overwrite_unaligned */
8128
8129 /*
8130  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
8131  *
8132  *      Description:
8133  *      Does all the vm_trickery possible for whole pages.
8134  *
8135  *      Implementation:
8136  *
8137  *      If there are no permanent objects in the destination,
8138  *      and the source and destination map entry zones match,
8139  *      and the destination map entry is not shared,
8140  *      then the map entries can be deleted and replaced
8141  *      with those from the copy.  The following code is the
8142  *      basic idea of what to do, but there are lots of annoying
8143  *      little details about getting protection and inheritance
8144  *      right.  Should add protection, inheritance, and sharing checks
8145  *      to the above pass and make sure that no wiring is involved.
8146  */
8147
8148 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8149 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8150 int vm_map_copy_overwrite_aligned_src_large = 0;
8151
8152 static kern_return_t
8153 vm_map_copy_overwrite_aligned(
8154         vm_map_t        dst_map,
8155         vm_map_entry_t  tmp_entry,
8156         vm_map_copy_t   copy,
8157         vm_map_offset_t start,
8158         __unused pmap_t pmap)
8159 {
8160         vm_object_t     object;
8161         vm_map_entry_t  copy_entry;
8162         vm_map_size_t   copy_size;
8163         vm_map_size_t   size;
8164         vm_map_entry_t  entry;
8165
8166         while ((copy_entry = vm_map_copy_first_entry(copy))
8167                != vm_map_copy_to_entry(copy))
8168         {
8169                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8170
8171                 entry = tmp_entry;
8172                 if (entry->is_sub_map) {
8173                         /* unnested when clipped earlier */
8174                         assert(!entry->use_pmap);
8175                 }
8176                 if (entry == vm_map_to_entry(dst_map)) {
8177                         vm_map_unlock(dst_map);
8178                         return KERN_INVALID_ADDRESS;
8179                 }
8180                 size = (entry->vme_end - entry->vme_start);
8181                 /*
8182                  *      Make sure that no holes popped up in the
8183                  *      address map, and that the protection is
8184                  *      still valid, in case the map was unlocked
8185                  *      earlier.
8186                  */
8187
8188                 if ((entry->vme_start != start) || ((entry->is_sub_map)
8189                                                     && !entry->needs_copy)) {
8190                         vm_map_unlock(dst_map);
8191                         return(KERN_INVALID_ADDRESS);
8192                 }
8193                 assert(entry != vm_map_to_entry(dst_map));
8194
8195                 /*
8196                  *      Check protection again
8197                  */
8198
8199                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8200                         vm_map_unlock(dst_map);
8201                         return(KERN_PROTECTION_FAILURE);
8202                 }
8203
8204                 /*
8205                  *      Adjust to source size first
8206                  */
8207
8208                 if (copy_size < size) {
8209                         if (entry->map_aligned &&
8210                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8211                                                  VM_MAP_PAGE_MASK(dst_map))) {
8212                                 /* no longer map-aligned */
8213                                 entry->map_aligned = FALSE;
8214                         }
8215                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8216                         size = copy_size;
8217                 }
8218
8219                 /*
8220                  *      Adjust to destination size
8221                  */
8222
8223                 if (size < copy_size) {
8224                         vm_map_copy_clip_end(copy, copy_entry,
8225                                              copy_entry->vme_start + size);
8226                         copy_size = size;
8227                 }
8228
8229                 assert((entry->vme_end - entry->vme_start) == size);
8230                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8231                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8232
8233                 /*
8234                  *      If the destination contains temporary unshared memory,
8235                  *      we can perform the copy by throwing it away and
8236                  *      installing the source data.
8237                  */
8238
8239                 object = VME_OBJECT(entry);
8240                 if ((!entry->is_shared &&
8241                      ((object == VM_OBJECT_NULL) ||
8242                       (object->internal && !object->true_share))) ||
8243                     entry->needs_copy) {
8244                         vm_object_t     old_object = VME_OBJECT(entry);
8245                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
8246                         vm_object_offset_t      offset;
8247
8248                         /*
8249                          * Ensure that the source and destination aren't
8250                          * identical
8251                          */
8252                         if (old_object == VME_OBJECT(copy_entry) &&
8253                             old_offset == VME_OFFSET(copy_entry)) {
8254                                 vm_map_copy_entry_unlink(copy, copy_entry);
8255                                 vm_map_copy_entry_dispose(copy, copy_entry);
8256
8257                                 if (old_object != VM_OBJECT_NULL)
8258                                         vm_object_deallocate(old_object);
8259
8260                                 start = tmp_entry->vme_end;
8261                                 tmp_entry = tmp_entry->vme_next;
8262                                 continue;
8263                         }
8264
8265 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8266 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
8267                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8268                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
8269                             copy_size <= __TRADEOFF1_COPY_SIZE) {
8270                                 /*
8271                                  * Virtual vs. Physical copy tradeoff #1.
8272                                  *
8273                                  * Copying only a few pages out of a large
8274                                  * object:  do a physical copy instead of
8275                                  * a virtual copy, to avoid possibly keeping
8276                                  * the entire large object alive because of
8277                                  * those few copy-on-write pages.
8278                                  */
8279                                 vm_map_copy_overwrite_aligned_src_large++;
8280                                 goto slow_copy;
8281                         }
8282
8283                         if ((dst_map->pmap != kernel_pmap) &&
8284                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8285                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
8286                                 vm_object_t new_object, new_shadow;
8287
8288                                 /*
8289                                  * We're about to map something over a mapping
8290                                  * established by malloc()...
8291                                  */
8292                                 new_object = VME_OBJECT(copy_entry);
8293                                 if (new_object != VM_OBJECT_NULL) {
8294                                         vm_object_lock_shared(new_object);
8295                                 }
8296                                 while (new_object != VM_OBJECT_NULL &&
8297                                        !new_object->true_share &&
8298                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8299                                        new_object->internal) {
8300                                         new_shadow = new_object->shadow;
8301                                         if (new_shadow == VM_OBJECT_NULL) {
8302                                                 break;
8303                                         }
8304                                         vm_object_lock_shared(new_shadow);
8305                                         vm_object_unlock(new_object);
8306                                         new_object = new_shadow;
8307                                 }
8308                                 if (new_object != VM_OBJECT_NULL) {
8309                                         if (!new_object->internal) {
8310                                                 /*
8311                                                  * The new mapping is backed
8312                                                  * by an external object.  We
8313                                                  * don't want malloc'ed memory
8314                                                  * to be replaced with such a
8315                                                  * non-anonymous mapping, so
8316                                                  * let's go off the optimized
8317                                                  * path...
8318                                                  */
8319                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
8320                                                 vm_object_unlock(new_object);
8321                                                 goto slow_copy;
8322                                         }
8323                                         if (new_object->true_share ||
8324                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8325                                                 /*
8326                                                  * Same if there's a "true_share"
8327                                                  * object in the shadow chain, or
8328                                                  * an object with a non-default
8329                                                  * (SYMMETRIC) copy strategy.
8330                                                  */
8331                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8332                                                 vm_object_unlock(new_object);
8333                                                 goto slow_copy;
8334                                         }
8335                                         vm_object_unlock(new_object);
8336                                 }
8337                                 /*
8338                                  * The new mapping is still backed by
8339                                  * anonymous (internal) memory, so it's
8340                                  * OK to substitute it for the original
8341                                  * malloc() mapping.
8342                                  */
8343                         }
8344
8345                         if (old_object != VM_OBJECT_NULL) {
8346                                 if(entry->is_sub_map) {
8347                                         if(entry->use_pmap) {
8348 #ifndef NO_NESTED_PMAP
8349                                                 pmap_unnest(dst_map->pmap,
8350                                                             (addr64_t)entry->vme_start,
8351                                                             entry->vme_end - entry->vme_start);
8352 #endif  /* NO_NESTED_PMAP */
8353                                                 if(dst_map->mapped_in_other_pmaps) {
8354                                                         /* clean up parent */
8355                                                         /* map/maps */
8356                                                         vm_map_submap_pmap_clean(
8357                                                                 dst_map, entry->vme_start,
8358                                                                 entry->vme_end,
8359                                                                 VME_SUBMAP(entry),
8360                                                                 VME_OFFSET(entry));
8361                                                 }
8362                                         } else {
8363                                                 vm_map_submap_pmap_clean(
8364                                                         dst_map, entry->vme_start,
8365                                                         entry->vme_end,
8366                                                         VME_SUBMAP(entry),
8367                                                         VME_OFFSET(entry));
8368                                         }
8369                                         vm_map_deallocate(VME_SUBMAP(entry));
8370                                 } else {
8371                                         if(dst_map->mapped_in_other_pmaps) {
8372                                                 vm_object_pmap_protect_options(
8373                                                         VME_OBJECT(entry),
8374                                                         VME_OFFSET(entry),
8375                                                         entry->vme_end
8376                                                         - entry->vme_start,
8377                                                         PMAP_NULL,
8378                                                         entry->vme_start,
8379                                                         VM_PROT_NONE,
8380                                                         PMAP_OPTIONS_REMOVE);
8381                                         } else {
8382                                                 pmap_remove_options(
8383                                                         dst_map->pmap,
8384                                                         (addr64_t)(entry->vme_start),
8385                                                         (addr64_t)(entry->vme_end),
8386                                                         PMAP_OPTIONS_REMOVE);
8387                                         }
8388                                         vm_object_deallocate(old_object);
8389                                 }
8390                         }
8391
8392                         entry->is_sub_map = FALSE;
8393                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8394                         object = VME_OBJECT(entry);
8395                         entry->needs_copy = copy_entry->needs_copy;
8396                         entry->wired_count = 0;
8397                         entry->user_wired_count = 0;
8398                         offset = VME_OFFSET(copy_entry);
8399                         VME_OFFSET_SET(entry, offset);
8400
8401                         vm_map_copy_entry_unlink(copy, copy_entry);
8402                         vm_map_copy_entry_dispose(copy, copy_entry);
8403
8404                         /*
8405                          * we could try to push pages into the pmap at this point, BUT
8406                          * this optimization only saved on average 2 us per page if ALL
8407                          * the pages in the source were currently mapped
8408                          * and ALL the pages in the dest were touched, if there were fewer
8409                          * than 2/3 of the pages touched, this optimization actually cost more cycles
8410                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
8411                          */
8412
8413                         /*
8414                          *      Set up for the next iteration.  The map
8415                          *      has not been unlocked, so the next
8416                          *      address should be at the end of this
8417                          *      entry, and the next map entry should be
8418                          *      the one following it.
8419                          */
8420
8421                         start = tmp_entry->vme_end;
8422                         tmp_entry = tmp_entry->vme_next;
8423                 } else {
8424                         vm_map_version_t        version;
8425                         vm_object_t             dst_object;
8426                         vm_object_offset_t      dst_offset;
8427                         kern_return_t           r;
8428
8429                 slow_copy:
8430                         if (entry->needs_copy) {
8431                                 VME_OBJECT_SHADOW(entry,
8432                                                   (entry->vme_end -
8433                                                    entry->vme_start));
8434                                 entry->needs_copy = FALSE;
8435                         }
8436
8437                         dst_object = VME_OBJECT(entry);
8438                         dst_offset = VME_OFFSET(entry);
8439
8440                         /*
8441                          *      Take an object reference, and record
8442                          *      the map version information so that the
8443                          *      map can be safely unlocked.
8444                          */
8445
8446                         if (dst_object == VM_OBJECT_NULL) {
8447                                 /*
8448                                  * We would usually have just taken the
8449                                  * optimized path above if the destination
8450                                  * object has not been allocated yet.  But we
8451                                  * now disable that optimization if the copy
8452                                  * entry's object is not backed by anonymous
8453                                  * memory to avoid replacing malloc'ed
8454                                  * (i.e. re-usable) anonymous memory with a
8455                                  * not-so-anonymous mapping.
8456                                  * So we have to handle this case here and
8457                                  * allocate a new VM object for this map entry.
8458                                  */
8459                                 dst_object = vm_object_allocate(
8460                                         entry->vme_end - entry->vme_start);
8461                                 dst_offset = 0;
8462                                 VME_OBJECT_SET(entry, dst_object);
8463                                 VME_OFFSET_SET(entry, dst_offset);
8464                                 assert(entry->use_pmap);
8465
8466                         }
8467
8468                         vm_object_reference(dst_object);
8469
8470                         /* account for unlock bumping up timestamp */
8471                         version.main_timestamp = dst_map->timestamp + 1;
8472
8473                         vm_map_unlock(dst_map);
8474
8475                         /*
8476                          *      Copy as much as possible in one pass
8477                          */
8478
8479                         copy_size = size;
8480                         r = vm_fault_copy(
8481                                 VME_OBJECT(copy_entry),
8482                                 VME_OFFSET(copy_entry),
8483                                 &copy_size,
8484                                 dst_object,
8485                                 dst_offset,
8486                                 dst_map,
8487                                 &version,
8488                                 THREAD_UNINT );
8489
8490                         /*
8491                          *      Release the object reference
8492                          */
8493
8494                         vm_object_deallocate(dst_object);
8495
8496                         /*
8497                          *      If a hard error occurred, return it now
8498                          */
8499
8500                         if (r != KERN_SUCCESS)
8501                                 return(r);
8502
8503                         if (copy_size != 0) {
8504                                 /*
8505                                  *      Dispose of the copied region
8506                                  */
8507
8508                                 vm_map_copy_clip_end(copy, copy_entry,
8509                                                      copy_entry->vme_start + copy_size);
8510                                 vm_map_copy_entry_unlink(copy, copy_entry);
8511                                 vm_object_deallocate(VME_OBJECT(copy_entry));
8512                                 vm_map_copy_entry_dispose(copy, copy_entry);
8513                         }
8514
8515                         /*
8516                          *      Pick up in the destination map where we left off.
8517                          *
8518                          *      Use the version information to avoid a lookup
8519                          *      in the normal case.
8520                          */
8521
8522                         start += copy_size;
8523                         vm_map_lock(dst_map);
8524                         if (version.main_timestamp == dst_map->timestamp &&
8525                             copy_size != 0) {
8526                                 /* We can safely use saved tmp_entry value */
8527
8528                                 if (tmp_entry->map_aligned &&
8529                                     !VM_MAP_PAGE_ALIGNED(
8530                                             start,
8531                                             VM_MAP_PAGE_MASK(dst_map))) {
8532                                         /* no longer map-aligned */
8533                                         tmp_entry->map_aligned = FALSE;
8534                                 }
8535                                 vm_map_clip_end(dst_map, tmp_entry, start);
8536                                 tmp_entry = tmp_entry->vme_next;
8537                         } else {
8538                                 /* Must do lookup of tmp_entry */
8539
8540                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8541                                         vm_map_unlock(dst_map);
8542                                         return(KERN_INVALID_ADDRESS);
8543                                 }
8544                                 if (tmp_entry->map_aligned &&
8545                                     !VM_MAP_PAGE_ALIGNED(
8546                                             start,
8547                                             VM_MAP_PAGE_MASK(dst_map))) {
8548                                         /* no longer map-aligned */
8549                                         tmp_entry->map_aligned = FALSE;
8550                                 }
8551                                 vm_map_clip_start(dst_map, tmp_entry, start);
8552                         }
8553                 }
8554         }/* while */
8555
8556         return(KERN_SUCCESS);
8557 }/* vm_map_copy_overwrite_aligned */
8558
8559 /*
8560  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
8561  *
8562  *      Description:
8563  *              Copy in data to a kernel buffer from space in the
8564  *              source map. The original space may be optionally
8565  *              deallocated.
8566  *
8567  *              If successful, returns a new copy object.
8568  */
8569 static kern_return_t
8570 vm_map_copyin_kernel_buffer(
8571         vm_map_t        src_map,
8572         vm_map_offset_t src_addr,
8573         vm_map_size_t   len,
8574         boolean_t       src_destroy,
8575         vm_map_copy_t   *copy_result)
8576 {
8577         kern_return_t kr;
8578         vm_map_copy_t copy;
8579         vm_size_t kalloc_size;
8580
8581         if (len > msg_ool_size_small)
8582                 return KERN_INVALID_ARGUMENT;
8583
8584         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8585
8586         copy = (vm_map_copy_t)kalloc(kalloc_size);
8587         if (copy == VM_MAP_COPY_NULL)
8588                 return KERN_RESOURCE_SHORTAGE;
8589         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8590         copy->size = len;
8591         copy->offset = 0;
8592
8593         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
8594         if (kr != KERN_SUCCESS) {
8595                 kfree(copy, kalloc_size);
8596                 return kr;
8597         }
8598         if (src_destroy) {
8599                 (void) vm_map_remove(
8600                         src_map,
8601                         vm_map_trunc_page(src_addr,
8602                                           VM_MAP_PAGE_MASK(src_map)),
8603                         vm_map_round_page(src_addr + len,
8604                                           VM_MAP_PAGE_MASK(src_map)),
8605                         (VM_MAP_REMOVE_INTERRUPTIBLE |
8606                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8607                          ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
8608         }
8609         *copy_result = copy;
8610         return KERN_SUCCESS;
8611 }
8612
8613 /*
8614  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
8615  *
8616  *      Description:
8617  *              Copy out data from a kernel buffer into space in the
8618  *              destination map. The space may be otpionally dynamically
8619  *              allocated.
8620  *
8621  *              If successful, consumes the copy object.
8622  *              Otherwise, the caller is responsible for it.
8623  */
8624 static int vm_map_copyout_kernel_buffer_failures = 0;
8625 static kern_return_t
8626 vm_map_copyout_kernel_buffer(
8627         vm_map_t                map,
8628         vm_map_address_t        *addr,  /* IN/OUT */
8629         vm_map_copy_t           copy,
8630         vm_map_size_t           copy_size,
8631         boolean_t               overwrite,
8632         boolean_t               consume_on_success)
8633 {
8634         kern_return_t kr = KERN_SUCCESS;
8635         thread_t thread = current_thread();
8636
8637         assert(copy->size == copy_size);
8638
8639         /*
8640          * check for corrupted vm_map_copy structure
8641          */
8642         if (copy_size > msg_ool_size_small || copy->offset)
8643                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8644                       (long long)copy->size, (long long)copy->offset);
8645
8646         if (!overwrite) {
8647
8648                 /*
8649                  * Allocate space in the target map for the data
8650                  */
8651                 *addr = 0;
8652                 kr = vm_map_enter(map,
8653                                   addr,
8654                                   vm_map_round_page(copy_size,
8655                                                     VM_MAP_PAGE_MASK(map)),
8656                                   (vm_map_offset_t) 0,
8657                                   VM_FLAGS_ANYWHERE,
8658                                   VM_OBJECT_NULL,
8659                                   (vm_object_offset_t) 0,
8660                                   FALSE,
8661                                   VM_PROT_DEFAULT,
8662                                   VM_PROT_ALL,
8663                                   VM_INHERIT_DEFAULT);
8664                 if (kr != KERN_SUCCESS)
8665                         return kr;
8666         }
8667
8668         /*
8669          * Copyout the data from the kernel buffer to the target map.
8670          */
8671         if (thread->map == map) {
8672
8673                 /*
8674                  * If the target map is the current map, just do
8675                  * the copy.
8676                  */
8677                 assert((vm_size_t)copy_size == copy_size);
8678                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
8679                         kr = KERN_INVALID_ADDRESS;
8680                 }
8681         }
8682         else {
8683                 vm_map_t oldmap;
8684
8685                 /*
8686                  * If the target map is another map, assume the
8687                  * target's address space identity for the duration
8688                  * of the copy.
8689                  */
8690                 vm_map_reference(map);
8691                 oldmap = vm_map_switch(map);
8692
8693                 assert((vm_size_t)copy_size == copy_size);
8694                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
8695                         vm_map_copyout_kernel_buffer_failures++;
8696                         kr = KERN_INVALID_ADDRESS;
8697                 }
8698
8699                 (void) vm_map_switch(oldmap);
8700                 vm_map_deallocate(map);
8701         }
8702
8703         if (kr != KERN_SUCCESS) {
8704                 /* the copy failed, clean up */
8705                 if (!overwrite) {
8706                         /*
8707                          * Deallocate the space we allocated in the target map.
8708                          */
8709                         (void) vm_map_remove(
8710                                 map,
8711                                 vm_map_trunc_page(*addr,
8712                                                   VM_MAP_PAGE_MASK(map)),
8713                                 vm_map_round_page((*addr +
8714                                                    vm_map_round_page(copy_size,
8715                                                                      VM_MAP_PAGE_MASK(map))),
8716                                                   VM_MAP_PAGE_MASK(map)),
8717                                 VM_MAP_NO_FLAGS);
8718                         *addr = 0;
8719                 }
8720         } else {
8721                 /* copy was successful, dicard the copy structure */
8722                 if (consume_on_success) {
8723                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
8724                 }
8725         }
8726
8727         return kr;
8728 }
8729
8730 /*
8731  *      Macro:          vm_map_copy_insert
8732  *
8733  *      Description:
8734  *              Link a copy chain ("copy") into a map at the
8735  *              specified location (after "where").
8736  *      Side effects:
8737  *              The copy chain is destroyed.
8738  *      Warning:
8739  *              The arguments are evaluated multiple times.
8740  */
8741 #define vm_map_copy_insert(map, where, copy)                            \
8742 MACRO_BEGIN                                                             \
8743         vm_map_store_copy_insert(map, where, copy);       \
8744         zfree(vm_map_copy_zone, copy);          \
8745 MACRO_END
8746
8747 void
8748 vm_map_copy_remap(
8749         vm_map_t        map,
8750         vm_map_entry_t  where,
8751         vm_map_copy_t   copy,
8752         vm_map_offset_t adjustment,
8753         vm_prot_t       cur_prot,
8754         vm_prot_t       max_prot,
8755         vm_inherit_t    inheritance)
8756 {
8757         vm_map_entry_t  copy_entry, new_entry;
8758
8759         for (copy_entry = vm_map_copy_first_entry(copy);
8760              copy_entry != vm_map_copy_to_entry(copy);
8761              copy_entry = copy_entry->vme_next) {
8762                 /* get a new VM map entry for the map */
8763                 new_entry = vm_map_entry_create(map,
8764                                                 !map->hdr.entries_pageable);
8765                 /* copy the "copy entry" to the new entry */
8766                 vm_map_entry_copy(new_entry, copy_entry);
8767                 /* adjust "start" and "end" */
8768                 new_entry->vme_start += adjustment;
8769                 new_entry->vme_end += adjustment;
8770                 /* clear some attributes */
8771                 new_entry->inheritance = inheritance;
8772                 new_entry->protection = cur_prot;
8773                 new_entry->max_protection = max_prot;
8774                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8775                 /* take an extra reference on the entry's "object" */
8776                 if (new_entry->is_sub_map) {
8777                         assert(!new_entry->use_pmap); /* not nested */
8778                         vm_map_lock(VME_SUBMAP(new_entry));
8779                         vm_map_reference(VME_SUBMAP(new_entry));
8780                         vm_map_unlock(VME_SUBMAP(new_entry));
8781                 } else {
8782                         vm_object_reference(VME_OBJECT(new_entry));
8783                 }
8784                 /* insert the new entry in the map */
8785                 vm_map_store_entry_link(map, where, new_entry);
8786                 /* continue inserting the "copy entries" after the new entry */
8787                 where = new_entry;
8788         }
8789 }
8790
8791
8792 /*
8793  * Returns true if *size matches (or is in the range of) copy->size.
8794  * Upon returning true, the *size field is updated with the actual size of the
8795  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
8796  */
8797 boolean_t
8798 vm_map_copy_validate_size(
8799         vm_map_t                dst_map,
8800         vm_map_copy_t           copy,
8801         vm_map_size_t           *size)
8802 {
8803         if (copy == VM_MAP_COPY_NULL)
8804                 return FALSE;
8805         vm_map_size_t copy_sz = copy->size;
8806         vm_map_size_t sz = *size;
8807         switch (copy->type) {
8808         case VM_MAP_COPY_OBJECT:
8809         case VM_MAP_COPY_KERNEL_BUFFER:
8810                 if (sz == copy_sz)
8811                         return TRUE;
8812                 break;
8813         case VM_MAP_COPY_ENTRY_LIST:
8814                 /*
8815                  * potential page-size rounding prevents us from exactly
8816                  * validating this flavor of vm_map_copy, but we can at least
8817                  * assert that it's within a range.
8818                  */
8819                 if (copy_sz >= sz &&
8820                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
8821                         *size = copy_sz;
8822                         return TRUE;
8823                 }
8824                 break;
8825         default:
8826                 break;
8827         }
8828         return FALSE;
8829 }
8830
8831 /*
8832  *      Routine:        vm_map_copyout_size
8833  *
8834  *      Description:
8835  *              Copy out a copy chain ("copy") into newly-allocated
8836  *              space in the destination map. Uses a prevalidated
8837  *              size for the copy object (vm_map_copy_validate_size).
8838  *
8839  *              If successful, consumes the copy object.
8840  *              Otherwise, the caller is responsible for it.
8841  */
8842 kern_return_t
8843 vm_map_copyout_size(
8844         vm_map_t                dst_map,
8845         vm_map_address_t        *dst_addr,      /* OUT */
8846         vm_map_copy_t           copy,
8847         vm_map_size_t           copy_size)
8848 {
8849         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
8850                                        TRUE, /* consume_on_success */
8851                                        VM_PROT_DEFAULT,
8852                                        VM_PROT_ALL,
8853                                        VM_INHERIT_DEFAULT);
8854 }
8855
8856 /*
8857  *      Routine:        vm_map_copyout
8858  *
8859  *      Description:
8860  *              Copy out a copy chain ("copy") into newly-allocated
8861  *              space in the destination map.
8862  *
8863  *              If successful, consumes the copy object.
8864  *              Otherwise, the caller is responsible for it.
8865  */
8866 kern_return_t
8867 vm_map_copyout(
8868         vm_map_t                dst_map,
8869         vm_map_address_t        *dst_addr,      /* OUT */
8870         vm_map_copy_t           copy)
8871 {
8872         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
8873                                        TRUE, /* consume_on_success */
8874                                        VM_PROT_DEFAULT,
8875                                        VM_PROT_ALL,
8876                                        VM_INHERIT_DEFAULT);
8877 }
8878
8879 kern_return_t
8880 vm_map_copyout_internal(
8881         vm_map_t                dst_map,
8882         vm_map_address_t        *dst_addr,      /* OUT */
8883         vm_map_copy_t           copy,
8884         vm_map_size_t           copy_size,
8885         boolean_t               consume_on_success,
8886         vm_prot_t               cur_protection,
8887         vm_prot_t               max_protection,
8888         vm_inherit_t            inheritance)
8889 {
8890         vm_map_size_t           size;
8891         vm_map_size_t           adjustment;
8892         vm_map_offset_t         start;
8893         vm_object_offset_t      vm_copy_start;
8894         vm_map_entry_t          last;
8895         vm_map_entry_t          entry;
8896         vm_map_entry_t          hole_entry;
8897
8898         /*
8899          *      Check for null copy object.
8900          */
8901
8902         if (copy == VM_MAP_COPY_NULL) {
8903                 *dst_addr = 0;
8904                 return(KERN_SUCCESS);
8905         }
8906
8907         if (copy->size != copy_size) {
8908                 *dst_addr = 0;
8909                 return KERN_FAILURE;
8910         }
8911
8912         /*
8913          *      Check for special copy object, created
8914          *      by vm_map_copyin_object.
8915          */
8916
8917         if (copy->type == VM_MAP_COPY_OBJECT) {
8918                 vm_object_t             object = copy->cpy_object;
8919                 kern_return_t           kr;
8920                 vm_object_offset_t      offset;
8921
8922                 offset = vm_object_trunc_page(copy->offset);
8923                 size = vm_map_round_page((copy_size +
8924                                           (vm_map_size_t)(copy->offset -
8925                                                           offset)),
8926                                          VM_MAP_PAGE_MASK(dst_map));
8927                 *dst_addr = 0;
8928                 kr = vm_map_enter(dst_map, dst_addr, size,
8929                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8930                                   object, offset, FALSE,
8931                                   VM_PROT_DEFAULT, VM_PROT_ALL,
8932                                   VM_INHERIT_DEFAULT);
8933                 if (kr != KERN_SUCCESS)
8934                         return(kr);
8935                 /* Account for non-pagealigned copy object */
8936                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8937                 if (consume_on_success)
8938                         zfree(vm_map_copy_zone, copy);
8939                 return(KERN_SUCCESS);
8940         }
8941
8942         /*
8943          *      Check for special kernel buffer allocated
8944          *      by new_ipc_kmsg_copyin.
8945          */
8946
8947         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8948                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8949                                                     copy, copy_size, FALSE,
8950                                                     consume_on_success);
8951         }
8952
8953
8954         /*
8955          *      Find space for the data
8956          */
8957
8958         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8959                                           VM_MAP_COPY_PAGE_MASK(copy));
8960         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
8961                                  VM_MAP_COPY_PAGE_MASK(copy))
8962                 - vm_copy_start;
8963
8964
8965 StartAgain: ;
8966
8967         vm_map_lock(dst_map);
8968         if( dst_map->disable_vmentry_reuse == TRUE) {
8969                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8970                 last = entry;
8971         } else {
8972                 if (dst_map->holelistenabled) {
8973                         hole_entry = (vm_map_entry_t)dst_map->holes_list;
8974
8975                         if (hole_entry == NULL) {
8976                                 /*
8977                                  * No more space in the map?
8978                                  */
8979                                 vm_map_unlock(dst_map);
8980                                 return(KERN_NO_SPACE);
8981                         }
8982
8983                         last = hole_entry;
8984                         start = last->vme_start;
8985                 } else {
8986                         assert(first_free_is_valid(dst_map));
8987                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8988                         vm_map_min(dst_map) : last->vme_end;
8989                 }
8990                 start = vm_map_round_page(start,
8991                                           VM_MAP_PAGE_MASK(dst_map));
8992         }
8993
8994         while (TRUE) {
8995                 vm_map_entry_t  next = last->vme_next;
8996                 vm_map_offset_t end = start + size;
8997
8998                 if ((end > dst_map->max_offset) || (end < start)) {
8999                         if (dst_map->wait_for_space) {
9000                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
9001                                         assert_wait((event_t) dst_map,
9002                                                     THREAD_INTERRUPTIBLE);
9003                                         vm_map_unlock(dst_map);
9004                                         thread_block(THREAD_CONTINUE_NULL);
9005                                         goto StartAgain;
9006                                 }
9007                         }
9008                         vm_map_unlock(dst_map);
9009                         return(KERN_NO_SPACE);
9010                 }
9011
9012                 if (dst_map->holelistenabled) {
9013                         if (last->vme_end >= end)
9014                                 break;
9015                 } else {
9016                         /*
9017                          *      If there are no more entries, we must win.
9018                          *
9019                          *      OR
9020                          *
9021                          *      If there is another entry, it must be
9022                          *      after the end of the potential new region.
9023                          */
9024
9025                         if (next == vm_map_to_entry(dst_map))
9026                                 break;
9027
9028                         if (next->vme_start >= end)
9029                                 break;
9030                 }
9031
9032                 last = next;
9033
9034                 if (dst_map->holelistenabled) {
9035                         if (last == (vm_map_entry_t) dst_map->holes_list) {
9036                                 /*
9037                                  * Wrapped around
9038                                  */
9039                                 vm_map_unlock(dst_map);
9040                                 return(KERN_NO_SPACE);
9041                         }
9042                         start = last->vme_start;
9043                 } else {
9044                         start = last->vme_end;
9045                 }
9046                 start = vm_map_round_page(start,
9047                                           VM_MAP_PAGE_MASK(dst_map));
9048         }
9049
9050         if (dst_map->holelistenabled) {
9051                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
9052                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
9053                 }
9054         }
9055
9056
9057         adjustment = start - vm_copy_start;
9058         if (! consume_on_success) {
9059                 /*
9060                  * We're not allowed to consume "copy", so we'll have to
9061                  * copy its map entries into the destination map below.
9062                  * No need to re-allocate map entries from the correct
9063                  * (pageable or not) zone, since we'll get new map entries
9064                  * during the transfer.
9065                  * We'll also adjust the map entries's "start" and "end"
9066                  * during the transfer, to keep "copy"'s entries consistent
9067                  * with its "offset".
9068                  */
9069                 goto after_adjustments;
9070         }
9071
9072         /*
9073          *      Since we're going to just drop the map
9074          *      entries from the copy into the destination
9075          *      map, they must come from the same pool.
9076          */
9077
9078         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
9079                 /*
9080                  * Mismatches occur when dealing with the default
9081                  * pager.
9082                  */
9083                 zone_t          old_zone;
9084                 vm_map_entry_t  next, new;
9085
9086                 /*
9087                  * Find the zone that the copies were allocated from
9088                  */
9089
9090                 entry = vm_map_copy_first_entry(copy);
9091
9092                 /*
9093                  * Reinitialize the copy so that vm_map_copy_entry_link
9094                  * will work.
9095                  */
9096                 vm_map_store_copy_reset(copy, entry);
9097                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
9098
9099                 /*
9100                  * Copy each entry.
9101                  */
9102                 while (entry != vm_map_copy_to_entry(copy)) {
9103                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9104                         vm_map_entry_copy_full(new, entry);
9105                         assert(!new->iokit_acct);
9106                         if (new->is_sub_map) {
9107                                 /* clr address space specifics */
9108                                 new->use_pmap = FALSE;
9109                         }
9110                         vm_map_copy_entry_link(copy,
9111                                                vm_map_copy_last_entry(copy),
9112                                                new);
9113                         next = entry->vme_next;
9114                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
9115                         zfree(old_zone, entry);
9116                         entry = next;
9117                 }
9118         }
9119
9120         /*
9121          *      Adjust the addresses in the copy chain, and
9122          *      reset the region attributes.
9123          */
9124
9125         for (entry = vm_map_copy_first_entry(copy);
9126              entry != vm_map_copy_to_entry(copy);
9127              entry = entry->vme_next) {
9128                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
9129                         /*
9130                          * We're injecting this copy entry into a map that
9131                          * has the standard page alignment, so clear
9132                          * "map_aligned" (which might have been inherited
9133                          * from the original map entry).
9134                          */
9135                         entry->map_aligned = FALSE;
9136                 }
9137
9138                 entry->vme_start += adjustment;
9139                 entry->vme_end += adjustment;
9140
9141                 if (entry->map_aligned) {
9142                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
9143                                                    VM_MAP_PAGE_MASK(dst_map)));
9144                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
9145                                                    VM_MAP_PAGE_MASK(dst_map)));
9146                 }
9147
9148                 entry->inheritance = VM_INHERIT_DEFAULT;
9149                 entry->protection = VM_PROT_DEFAULT;
9150                 entry->max_protection = VM_PROT_ALL;
9151                 entry->behavior = VM_BEHAVIOR_DEFAULT;
9152
9153                 /*
9154                  * If the entry is now wired,
9155                  * map the pages into the destination map.
9156                  */
9157                 if (entry->wired_count != 0) {
9158                         vm_map_offset_t va;
9159                         vm_object_offset_t       offset;
9160                         vm_object_t object;
9161                         vm_prot_t prot;
9162                         int     type_of_fault;
9163
9164                         object = VME_OBJECT(entry);
9165                         offset = VME_OFFSET(entry);
9166                         va = entry->vme_start;
9167
9168                         pmap_pageable(dst_map->pmap,
9169                                       entry->vme_start,
9170                                       entry->vme_end,
9171                                       TRUE);
9172
9173                         while (va < entry->vme_end) {
9174                                 vm_page_t       m;
9175
9176                                 /*
9177                                  * Look up the page in the object.
9178                                  * Assert that the page will be found in the
9179                                  * top object:
9180                                  * either
9181                                  *      the object was newly created by
9182                                  *      vm_object_copy_slowly, and has
9183                                  *      copies of all of the pages from
9184                                  *      the source object
9185                                  * or
9186                                  *      the object was moved from the old
9187                                  *      map entry; because the old map
9188                                  *      entry was wired, all of the pages
9189                                  *      were in the top-level object.
9190                                  *      (XXX not true if we wire pages for
9191                                  *       reading)
9192                                  */
9193                                 vm_object_lock(object);
9194
9195                                 m = vm_page_lookup(object, offset);
9196                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
9197                                     m->absent)
9198                                         panic("vm_map_copyout: wiring %p", m);
9199
9200                                 /*
9201                                  * ENCRYPTED SWAP:
9202                                  * The page is assumed to be wired here, so it
9203                                  * shouldn't be encrypted.  Otherwise, we
9204                                  * couldn't enter it in the page table, since
9205                                  * we don't want the user to see the encrypted
9206                                  * data.
9207                                  */
9208                                 ASSERT_PAGE_DECRYPTED(m);
9209
9210                                 prot = entry->protection;
9211
9212                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9213                                     prot)
9214                                         prot |= VM_PROT_EXECUTE;
9215
9216                                 type_of_fault = DBG_CACHE_HIT_FAULT;
9217
9218                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
9219                                                VM_PAGE_WIRED(m), FALSE, FALSE,
9220                                                FALSE, VME_ALIAS(entry),
9221                                                ((entry->iokit_acct ||
9222                                                  (!entry->is_sub_map &&
9223                                                   !entry->use_pmap))
9224                                                 ? PMAP_OPTIONS_ALT_ACCT
9225                                                 : 0),
9226                                                NULL, &type_of_fault);
9227
9228                                 vm_object_unlock(object);
9229
9230                                 offset += PAGE_SIZE_64;
9231                                 va += PAGE_SIZE;
9232                         }
9233                 }
9234         }
9235
9236 after_adjustments:
9237
9238         /*
9239          *      Correct the page alignment for the result
9240          */
9241
9242         *dst_addr = start + (copy->offset - vm_copy_start);
9243
9244         /*
9245          *      Update the hints and the map size
9246          */
9247
9248         if (consume_on_success) {
9249                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9250         } else {
9251                 SAVE_HINT_MAP_WRITE(dst_map, last);
9252         }
9253
9254         dst_map->size += size;
9255
9256         /*
9257          *      Link in the copy
9258          */
9259
9260         if (consume_on_success) {
9261                 vm_map_copy_insert(dst_map, last, copy);
9262         } else {
9263                 vm_map_copy_remap(dst_map, last, copy, adjustment,
9264                                   cur_protection, max_protection,
9265                                   inheritance);
9266         }
9267
9268         vm_map_unlock(dst_map);
9269
9270         /*
9271          * XXX  If wiring_required, call vm_map_pageable
9272          */
9273
9274         return(KERN_SUCCESS);
9275 }
9276
9277 /*
9278  *      Routine:        vm_map_copyin
9279  *
9280  *      Description:
9281  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
9282  *
9283  */
9284
9285 #undef vm_map_copyin
9286
9287 kern_return_t
9288 vm_map_copyin(
9289         vm_map_t                        src_map,
9290         vm_map_address_t        src_addr,
9291         vm_map_size_t           len,
9292         boolean_t                       src_destroy,
9293         vm_map_copy_t           *copy_result)   /* OUT */
9294 {
9295         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9296                                         FALSE, copy_result, FALSE));
9297 }
9298
9299 /*
9300  *      Routine:        vm_map_copyin_common
9301  *
9302  *      Description:
9303  *              Copy the specified region (src_addr, len) from the
9304  *              source address space (src_map), possibly removing
9305  *              the region from the source address space (src_destroy).
9306  *
9307  *      Returns:
9308  *              A vm_map_copy_t object (copy_result), suitable for
9309  *              insertion into another address space (using vm_map_copyout),
9310  *              copying over another address space region (using
9311  *              vm_map_copy_overwrite).  If the copy is unused, it
9312  *              should be destroyed (using vm_map_copy_discard).
9313  *
9314  *      In/out conditions:
9315  *              The source map should not be locked on entry.
9316  */
9317
9318 typedef struct submap_map {
9319         vm_map_t        parent_map;
9320         vm_map_offset_t base_start;
9321         vm_map_offset_t base_end;
9322         vm_map_size_t   base_len;
9323         struct submap_map *next;
9324 } submap_map_t;
9325
9326 kern_return_t
9327 vm_map_copyin_common(
9328         vm_map_t        src_map,
9329         vm_map_address_t src_addr,
9330         vm_map_size_t   len,
9331         boolean_t       src_destroy,
9332         __unused boolean_t      src_volatile,
9333         vm_map_copy_t   *copy_result,   /* OUT */
9334         boolean_t       use_maxprot)
9335 {
9336         int flags;
9337
9338         flags = 0;
9339         if (src_destroy) {
9340                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9341         }
9342         if (use_maxprot) {
9343                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9344         }
9345         return vm_map_copyin_internal(src_map,
9346                                       src_addr,
9347                                       len,
9348                                       flags,
9349                                       copy_result);
9350 }
9351 kern_return_t
9352 vm_map_copyin_internal(
9353         vm_map_t        src_map,
9354         vm_map_address_t src_addr,
9355         vm_map_size_t   len,
9356         int             flags,
9357         vm_map_copy_t   *copy_result)   /* OUT */
9358 {
9359         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
9360                                          * in multi-level lookup, this
9361                                          * entry contains the actual
9362                                          * vm_object/offset.
9363                                          */
9364         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
9365
9366         vm_map_offset_t src_start;      /* Start of current entry --
9367                                          * where copy is taking place now
9368                                          */
9369         vm_map_offset_t src_end;        /* End of entire region to be
9370                                          * copied */
9371         vm_map_offset_t src_base;
9372         vm_map_t        base_map = src_map;
9373         boolean_t       map_share=FALSE;
9374         submap_map_t    *parent_maps = NULL;
9375
9376         vm_map_copy_t   copy;           /* Resulting copy */
9377         vm_map_address_t copy_addr;
9378         vm_map_size_t   copy_size;
9379         boolean_t       src_destroy;
9380         boolean_t       use_maxprot;
9381         boolean_t       preserve_purgeable;
9382
9383         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9384                 return KERN_INVALID_ARGUMENT;
9385         }
9386
9387         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9388         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
9389         preserve_purgeable =
9390                 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
9391
9392         /*
9393          *      Check for copies of zero bytes.
9394          */
9395
9396         if (len == 0) {
9397                 *copy_result = VM_MAP_COPY_NULL;
9398                 return(KERN_SUCCESS);
9399         }
9400
9401         /*
9402          *      Check that the end address doesn't overflow
9403          */
9404         src_end = src_addr + len;
9405         if (src_end < src_addr)
9406                 return KERN_INVALID_ADDRESS;
9407
9408         /*
9409          *      Compute (page aligned) start and end of region
9410          */
9411         src_start = vm_map_trunc_page(src_addr,
9412                                       VM_MAP_PAGE_MASK(src_map));
9413         src_end = vm_map_round_page(src_end,
9414                                     VM_MAP_PAGE_MASK(src_map));
9415
9416         /*
9417          * If the copy is sufficiently small, use a kernel buffer instead
9418          * of making a virtual copy.  The theory being that the cost of
9419          * setting up VM (and taking C-O-W faults) dominates the copy costs
9420          * for small regions.
9421          */
9422         if ((len < msg_ool_size_small) &&
9423             !use_maxprot &&
9424             !preserve_purgeable &&
9425             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
9426             /*
9427              * Since the "msg_ool_size_small" threshold was increased and
9428              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
9429              * address space limits, we revert to doing a virtual copy if the
9430              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
9431              * of the commpage would now fail when it used to work.
9432              */
9433             (src_start >= vm_map_min(src_map) &&
9434              src_start < vm_map_max(src_map) &&
9435              src_end >= vm_map_min(src_map) &&
9436              src_end < vm_map_max(src_map)))
9437                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9438                                                    src_destroy, copy_result);
9439
9440         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
9441
9442         /*
9443          *      Allocate a header element for the list.
9444          *
9445          *      Use the start and end in the header to
9446          *      remember the endpoints prior to rounding.
9447          */
9448
9449         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9450         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
9451         vm_map_copy_first_entry(copy) =
9452                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9453         copy->type = VM_MAP_COPY_ENTRY_LIST;
9454         copy->cpy_hdr.nentries = 0;
9455         copy->cpy_hdr.entries_pageable = TRUE;
9456 #if 00
9457         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9458 #else
9459         /*
9460          * The copy entries can be broken down for a variety of reasons,
9461          * so we can't guarantee that they will remain map-aligned...
9462          * Will need to adjust the first copy_entry's "vme_start" and
9463          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9464          * rather than the original map's alignment.
9465          */
9466         copy->cpy_hdr.page_shift = PAGE_SHIFT;
9467 #endif
9468
9469         vm_map_store_init( &(copy->cpy_hdr) );
9470
9471         copy->offset = src_addr;
9472         copy->size = len;
9473
9474         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9475
9476 #define RETURN(x)                                               \
9477         MACRO_BEGIN                                             \
9478         vm_map_unlock(src_map);                                 \
9479         if(src_map != base_map)                                 \
9480                 vm_map_deallocate(src_map);                     \
9481         if (new_entry != VM_MAP_ENTRY_NULL)                     \
9482                 vm_map_copy_entry_dispose(copy,new_entry);      \
9483         vm_map_copy_discard(copy);                              \
9484         {                                                       \
9485                 submap_map_t    *_ptr;                          \
9486                                                                 \
9487                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
9488                         parent_maps=parent_maps->next;          \
9489                         if (_ptr->parent_map != base_map)       \
9490                                 vm_map_deallocate(_ptr->parent_map);    \
9491                         kfree(_ptr, sizeof(submap_map_t));      \
9492                 }                                               \
9493         }                                                       \
9494         MACRO_RETURN(x);                                        \
9495         MACRO_END
9496
9497         /*
9498          *      Find the beginning of the region.
9499          */
9500
9501         vm_map_lock(src_map);
9502
9503         /*
9504          * Lookup the original "src_addr" rather than the truncated
9505          * "src_start", in case "src_start" falls in a non-map-aligned
9506          * map entry *before* the map entry that contains "src_addr"...
9507          */
9508         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
9509                 RETURN(KERN_INVALID_ADDRESS);
9510         if(!tmp_entry->is_sub_map) {
9511                 /*
9512                  * ... but clip to the map-rounded "src_start" rather than
9513                  * "src_addr" to preserve map-alignment.  We'll adjust the
9514                  * first copy entry at the end, if needed.
9515                  */
9516                 vm_map_clip_start(src_map, tmp_entry, src_start);
9517         }
9518         if (src_start < tmp_entry->vme_start) {
9519                 /*
9520                  * Move "src_start" up to the start of the
9521                  * first map entry to copy.
9522                  */
9523                 src_start = tmp_entry->vme_start;
9524         }
9525         /* set for later submap fix-up */
9526         copy_addr = src_start;
9527
9528         /*
9529          *      Go through entries until we get to the end.
9530          */
9531
9532         while (TRUE) {
9533                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
9534                 vm_map_size_t   src_size;               /* Size of source
9535                                                          * map entry (in both
9536                                                          * maps)
9537                                                          */
9538
9539                 vm_object_t             src_object;     /* Object to copy */
9540                 vm_object_offset_t      src_offset;
9541
9542                 boolean_t       src_needs_copy;         /* Should source map
9543                                                          * be made read-only
9544                                                          * for copy-on-write?
9545                                                          */
9546
9547                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
9548
9549                 boolean_t       was_wired;              /* Was source wired? */
9550                 vm_map_version_t version;               /* Version before locks
9551                                                          * dropped to make copy
9552                                                          */
9553                 kern_return_t   result;                 /* Return value from
9554                                                          * copy_strategically.
9555                                                          */
9556                 while(tmp_entry->is_sub_map) {
9557                         vm_map_size_t submap_len;
9558                         submap_map_t *ptr;
9559
9560                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9561                         ptr->next = parent_maps;
9562                         parent_maps = ptr;
9563                         ptr->parent_map = src_map;
9564                         ptr->base_start = src_start;
9565                         ptr->base_end = src_end;
9566                         submap_len = tmp_entry->vme_end - src_start;
9567                         if(submap_len > (src_end-src_start))
9568                                 submap_len = src_end-src_start;
9569                         ptr->base_len = submap_len;
9570
9571                         src_start -= tmp_entry->vme_start;
9572                         src_start += VME_OFFSET(tmp_entry);
9573                         src_end = src_start + submap_len;
9574                         src_map = VME_SUBMAP(tmp_entry);
9575                         vm_map_lock(src_map);
9576                         /* keep an outstanding reference for all maps in */
9577                         /* the parents tree except the base map */
9578                         vm_map_reference(src_map);
9579                         vm_map_unlock(ptr->parent_map);
9580                         if (!vm_map_lookup_entry(
9581                                     src_map, src_start, &tmp_entry))
9582                                 RETURN(KERN_INVALID_ADDRESS);
9583                         map_share = TRUE;
9584                         if(!tmp_entry->is_sub_map)
9585                                 vm_map_clip_start(src_map, tmp_entry, src_start);
9586                         src_entry = tmp_entry;
9587                 }
9588                 /* we are now in the lowest level submap... */
9589
9590                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9591                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
9592                         /* This is not, supported for now.In future */
9593                         /* we will need to detect the phys_contig   */
9594                         /* condition and then upgrade copy_slowly   */
9595                         /* to do physical copy from the device mem  */
9596                         /* based object. We can piggy-back off of   */
9597                         /* the was wired boolean to set-up the      */
9598                         /* proper handling */
9599                         RETURN(KERN_PROTECTION_FAILURE);
9600                 }
9601                 /*
9602                  *      Create a new address map entry to hold the result.
9603                  *      Fill in the fields from the appropriate source entries.
9604                  *      We must unlock the source map to do this if we need
9605                  *      to allocate a map entry.
9606                  */
9607                 if (new_entry == VM_MAP_ENTRY_NULL) {
9608                         version.main_timestamp = src_map->timestamp;
9609                         vm_map_unlock(src_map);
9610
9611                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9612
9613                         vm_map_lock(src_map);
9614                         if ((version.main_timestamp + 1) != src_map->timestamp) {
9615                                 if (!vm_map_lookup_entry(src_map, src_start,
9616                                                          &tmp_entry)) {
9617                                         RETURN(KERN_INVALID_ADDRESS);
9618                                 }
9619                                 if (!tmp_entry->is_sub_map)
9620                                         vm_map_clip_start(src_map, tmp_entry, src_start);
9621                                 continue; /* restart w/ new tmp_entry */
9622                         }
9623                 }
9624
9625                 /*
9626                  *      Verify that the region can be read.
9627                  */
9628                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
9629                      !use_maxprot) ||
9630                     (src_entry->max_protection & VM_PROT_READ) == 0)
9631                         RETURN(KERN_PROTECTION_FAILURE);
9632
9633                 /*
9634                  *      Clip against the endpoints of the entire region.
9635                  */
9636
9637                 vm_map_clip_end(src_map, src_entry, src_end);
9638
9639                 src_size = src_entry->vme_end - src_start;
9640                 src_object = VME_OBJECT(src_entry);
9641                 src_offset = VME_OFFSET(src_entry);
9642                 was_wired = (src_entry->wired_count != 0);
9643
9644                 vm_map_entry_copy(new_entry, src_entry);
9645                 if (new_entry->is_sub_map) {
9646                         /* clr address space specifics */
9647                         new_entry->use_pmap = FALSE;
9648                 }
9649
9650                 /*
9651                  *      Attempt non-blocking copy-on-write optimizations.
9652                  */
9653
9654                 if (src_destroy &&
9655                     (src_object == VM_OBJECT_NULL ||
9656                      (src_object->internal && !src_object->true_share
9657                       && !map_share))) {
9658                         /*
9659                          * If we are destroying the source, and the object
9660                          * is internal, we can move the object reference
9661                          * from the source to the copy.  The copy is
9662                          * copy-on-write only if the source is.
9663                          * We make another reference to the object, because
9664                          * destroying the source entry will deallocate it.
9665                          */
9666                         vm_object_reference(src_object);
9667
9668                         /*
9669                          * Copy is always unwired.  vm_map_copy_entry
9670                          * set its wired count to zero.
9671                          */
9672
9673                         goto CopySuccessful;
9674                 }
9675
9676
9677         RestartCopy:
9678                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
9679                     src_object, new_entry, VME_OBJECT(new_entry),
9680                     was_wired, 0);
9681                 if ((src_object == VM_OBJECT_NULL ||
9682                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9683                     vm_object_copy_quickly(
9684                             &VME_OBJECT(new_entry),
9685                             src_offset,
9686                             src_size,
9687                             &src_needs_copy,
9688                             &new_entry_needs_copy)) {
9689
9690                         new_entry->needs_copy = new_entry_needs_copy;
9691
9692                         /*
9693                          *      Handle copy-on-write obligations
9694                          */
9695
9696                         if (src_needs_copy && !tmp_entry->needs_copy) {
9697                                 vm_prot_t prot;
9698
9699                                 prot = src_entry->protection & ~VM_PROT_WRITE;
9700
9701                                 if (override_nx(src_map, VME_ALIAS(src_entry))
9702                                     && prot)
9703                                         prot |= VM_PROT_EXECUTE;
9704
9705                                 vm_object_pmap_protect(
9706                                         src_object,
9707                                         src_offset,
9708                                         src_size,
9709                                         (src_entry->is_shared ?
9710                                          PMAP_NULL
9711                                          : src_map->pmap),
9712                                         src_entry->vme_start,
9713                                         prot);
9714
9715                                 assert(tmp_entry->wired_count == 0);
9716                                 tmp_entry->needs_copy = TRUE;
9717                         }
9718
9719                         /*
9720                          *      The map has never been unlocked, so it's safe
9721                          *      to move to the next entry rather than doing
9722                          *      another lookup.
9723                          */
9724
9725                         goto CopySuccessful;
9726                 }
9727
9728                 /*
9729                  *      Take an object reference, so that we may
9730                  *      release the map lock(s).
9731                  */
9732
9733                 assert(src_object != VM_OBJECT_NULL);
9734                 vm_object_reference(src_object);
9735
9736                 /*
9737                  *      Record the timestamp for later verification.
9738                  *      Unlock the map.
9739                  */
9740
9741                 version.main_timestamp = src_map->timestamp;
9742                 vm_map_unlock(src_map); /* Increments timestamp once! */
9743
9744                 /*
9745                  *      Perform the copy
9746                  */
9747
9748                 if (was_wired) {
9749                 CopySlowly:
9750                         vm_object_lock(src_object);
9751                         result = vm_object_copy_slowly(
9752                                 src_object,
9753                                 src_offset,
9754                                 src_size,
9755                                 THREAD_UNINT,
9756                                 &VME_OBJECT(new_entry));
9757                         VME_OFFSET_SET(new_entry, 0);
9758                         new_entry->needs_copy = FALSE;
9759
9760                 }
9761                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9762                          (tmp_entry->is_shared  || map_share)) {
9763                         vm_object_t new_object;
9764
9765                         vm_object_lock_shared(src_object);
9766                         new_object = vm_object_copy_delayed(
9767                                 src_object,
9768                                 src_offset,
9769                                 src_size,
9770                                 TRUE);
9771                         if (new_object == VM_OBJECT_NULL)
9772                                 goto CopySlowly;
9773
9774                         VME_OBJECT_SET(new_entry, new_object);
9775                         assert(new_entry->wired_count == 0);
9776                         new_entry->needs_copy = TRUE;
9777                         assert(!new_entry->iokit_acct);
9778                         assert(new_object->purgable == VM_PURGABLE_DENY);
9779                         new_entry->use_pmap = TRUE;
9780                         result = KERN_SUCCESS;
9781
9782                 } else {
9783                         vm_object_offset_t new_offset;
9784                         new_offset = VME_OFFSET(new_entry);
9785                         result = vm_object_copy_strategically(src_object,
9786                                                               src_offset,
9787                                                               src_size,
9788                                                               &VME_OBJECT(new_entry),
9789                                                               &new_offset,
9790                                                               &new_entry_needs_copy);
9791                         if (new_offset != VME_OFFSET(new_entry)) {
9792                                 VME_OFFSET_SET(new_entry, new_offset);
9793                         }
9794
9795                         new_entry->needs_copy = new_entry_needs_copy;
9796                 }
9797
9798                 if (result == KERN_SUCCESS &&
9799                     preserve_purgeable &&
9800                     src_object->purgable != VM_PURGABLE_DENY) {
9801                         vm_object_t     new_object;
9802
9803                         new_object = VME_OBJECT(new_entry);
9804                         assert(new_object != src_object);
9805                         vm_object_lock(new_object);
9806                         assert(new_object->ref_count == 1);
9807                         assert(new_object->shadow == VM_OBJECT_NULL);
9808                         assert(new_object->copy == VM_OBJECT_NULL);
9809                         assert(new_object->vo_purgeable_owner == NULL);
9810
9811                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
9812                         new_object->true_share = TRUE;
9813                         /* start as non-volatile with no owner... */
9814                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
9815                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
9816                         /* ... and move to src_object's purgeable state */
9817                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
9818                                 int state;
9819                                 state = src_object->purgable;
9820                                 vm_object_purgable_control(
9821                                         new_object,
9822                                         VM_PURGABLE_SET_STATE,
9823                                         &state);
9824                         }
9825                         vm_object_unlock(new_object);
9826                         new_object = VM_OBJECT_NULL;
9827                 }
9828
9829                 if (result != KERN_SUCCESS &&
9830                     result != KERN_MEMORY_RESTART_COPY) {
9831                         vm_map_lock(src_map);
9832                         RETURN(result);
9833                 }
9834
9835                 /*
9836                  *      Throw away the extra reference
9837                  */
9838
9839                 vm_object_deallocate(src_object);
9840
9841                 /*
9842                  *      Verify that the map has not substantially
9843                  *      changed while the copy was being made.
9844                  */
9845
9846                 vm_map_lock(src_map);
9847
9848                 if ((version.main_timestamp + 1) == src_map->timestamp)
9849                         goto VerificationSuccessful;
9850
9851                 /*
9852                  *      Simple version comparison failed.
9853                  *
9854                  *      Retry the lookup and verify that the
9855                  *      same object/offset are still present.
9856                  *
9857                  *      [Note: a memory manager that colludes with
9858                  *      the calling task can detect that we have
9859                  *      cheated.  While the map was unlocked, the
9860                  *      mapping could have been changed and restored.]
9861                  */
9862
9863                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9864                         if (result != KERN_MEMORY_RESTART_COPY) {
9865                                 vm_object_deallocate(VME_OBJECT(new_entry));
9866                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
9867                                 assert(!new_entry->iokit_acct);
9868                                 new_entry->use_pmap = TRUE;
9869                         }
9870                         RETURN(KERN_INVALID_ADDRESS);
9871                 }
9872
9873                 src_entry = tmp_entry;
9874                 vm_map_clip_start(src_map, src_entry, src_start);
9875
9876                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9877                      !use_maxprot) ||
9878                     ((src_entry->max_protection & VM_PROT_READ) == 0))
9879                         goto VerificationFailed;
9880
9881                 if (src_entry->vme_end < new_entry->vme_end) {
9882                         /*
9883                          * This entry might have been shortened
9884                          * (vm_map_clip_end) or been replaced with
9885                          * an entry that ends closer to "src_start"
9886                          * than before.
9887                          * Adjust "new_entry" accordingly; copying
9888                          * less memory would be correct but we also
9889                          * redo the copy (see below) if the new entry
9890                          * no longer points at the same object/offset.
9891                          */
9892                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9893                                                    VM_MAP_COPY_PAGE_MASK(copy)));
9894                         new_entry->vme_end = src_entry->vme_end;
9895                         src_size = new_entry->vme_end - src_start;
9896                 } else if (src_entry->vme_end > new_entry->vme_end) {
9897                         /*
9898                          * This entry might have been extended
9899                          * (vm_map_entry_simplify() or coalesce)
9900                          * or been replaced with an entry that ends farther
9901                          * from "src_start" than before.
9902                          *
9903                          * We've called vm_object_copy_*() only on
9904                          * the previous <start:end> range, so we can't
9905                          * just extend new_entry.  We have to re-do
9906                          * the copy based on the new entry as if it was
9907                          * pointing at a different object/offset (see
9908                          * "Verification failed" below).
9909                          */
9910                 }
9911
9912                 if ((VME_OBJECT(src_entry) != src_object) ||
9913                     (VME_OFFSET(src_entry) != src_offset) ||
9914                     (src_entry->vme_end > new_entry->vme_end)) {
9915
9916                         /*
9917                          *      Verification failed.
9918                          *
9919                          *      Start over with this top-level entry.
9920                          */
9921
9922                 VerificationFailed: ;
9923
9924                         vm_object_deallocate(VME_OBJECT(new_entry));
9925                         tmp_entry = src_entry;
9926                         continue;
9927                 }
9928
9929                 /*
9930                  *      Verification succeeded.
9931                  */
9932
9933         VerificationSuccessful: ;
9934
9935                 if (result == KERN_MEMORY_RESTART_COPY)
9936                         goto RestartCopy;
9937
9938                 /*
9939                  *      Copy succeeded.
9940                  */
9941
9942         CopySuccessful: ;
9943
9944                 /*
9945                  *      Link in the new copy entry.
9946                  */
9947
9948                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9949                                        new_entry);
9950
9951                 /*
9952                  *      Determine whether the entire region
9953                  *      has been copied.
9954                  */
9955                 src_base = src_start;
9956                 src_start = new_entry->vme_end;
9957                 new_entry = VM_MAP_ENTRY_NULL;
9958                 while ((src_start >= src_end) && (src_end != 0)) {
9959                         submap_map_t    *ptr;
9960
9961                         if (src_map == base_map) {
9962                                 /* back to the top */
9963                                 break;
9964                         }
9965
9966                         ptr = parent_maps;
9967                         assert(ptr != NULL);
9968                         parent_maps = parent_maps->next;
9969
9970                         /* fix up the damage we did in that submap */
9971                         vm_map_simplify_range(src_map,
9972                                               src_base,
9973                                               src_end);
9974
9975                         vm_map_unlock(src_map);
9976                         vm_map_deallocate(src_map);
9977                         vm_map_lock(ptr->parent_map);
9978                         src_map = ptr->parent_map;
9979                         src_base = ptr->base_start;
9980                         src_start = ptr->base_start + ptr->base_len;
9981                         src_end = ptr->base_end;
9982                         if (!vm_map_lookup_entry(src_map,
9983                                                  src_start,
9984                                                  &tmp_entry) &&
9985                             (src_end > src_start)) {
9986                                 RETURN(KERN_INVALID_ADDRESS);
9987                         }
9988                         kfree(ptr, sizeof(submap_map_t));
9989                         if (parent_maps == NULL)
9990                                 map_share = FALSE;
9991                         src_entry = tmp_entry->vme_prev;
9992                 }
9993
9994                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9995                     (src_start >= src_addr + len) &&
9996                     (src_addr + len != 0)) {
9997                         /*
9998                          * Stop copying now, even though we haven't reached
9999                          * "src_end".  We'll adjust the end of the last copy
10000                          * entry at the end, if needed.
10001                          *
10002                          * If src_map's aligment is different from the
10003                          * system's page-alignment, there could be
10004                          * extra non-map-aligned map entries between
10005                          * the original (non-rounded) "src_addr + len"
10006                          * and the rounded "src_end".
10007                          * We do not want to copy those map entries since
10008                          * they're not part of the copied range.
10009                          */
10010                         break;
10011                 }
10012
10013                 if ((src_start >= src_end) && (src_end != 0))
10014                         break;
10015
10016                 /*
10017                  *      Verify that there are no gaps in the region
10018                  */
10019
10020                 tmp_entry = src_entry->vme_next;
10021                 if ((tmp_entry->vme_start != src_start) ||
10022                     (tmp_entry == vm_map_to_entry(src_map))) {
10023                         RETURN(KERN_INVALID_ADDRESS);
10024                 }
10025         }
10026
10027         /*
10028          * If the source should be destroyed, do it now, since the
10029          * copy was successful.
10030          */
10031         if (src_destroy) {
10032                 (void) vm_map_delete(
10033                         src_map,
10034                         vm_map_trunc_page(src_addr,
10035                                           VM_MAP_PAGE_MASK(src_map)),
10036                         src_end,
10037                         ((src_map == kernel_map) ?
10038                          VM_MAP_REMOVE_KUNWIRE :
10039                          VM_MAP_NO_FLAGS),
10040                         VM_MAP_NULL);
10041         } else {
10042                 /* fix up the damage we did in the base map */
10043                 vm_map_simplify_range(
10044                         src_map,
10045                         vm_map_trunc_page(src_addr,
10046                                           VM_MAP_PAGE_MASK(src_map)),
10047                         vm_map_round_page(src_end,
10048                                           VM_MAP_PAGE_MASK(src_map)));
10049         }
10050
10051         vm_map_unlock(src_map);
10052
10053         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
10054                 vm_map_offset_t original_start, original_offset, original_end;
10055
10056                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
10057
10058                 /* adjust alignment of first copy_entry's "vme_start" */
10059                 tmp_entry = vm_map_copy_first_entry(copy);
10060                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10061                         vm_map_offset_t adjustment;
10062
10063                         original_start = tmp_entry->vme_start;
10064                         original_offset = VME_OFFSET(tmp_entry);
10065
10066                         /* map-align the start of the first copy entry... */
10067                         adjustment = (tmp_entry->vme_start -
10068                                       vm_map_trunc_page(
10069                                               tmp_entry->vme_start,
10070                                               VM_MAP_PAGE_MASK(src_map)));
10071                         tmp_entry->vme_start -= adjustment;
10072                         VME_OFFSET_SET(tmp_entry,
10073                                        VME_OFFSET(tmp_entry) - adjustment);
10074                         copy_addr -= adjustment;
10075                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
10076                         /* ... adjust for mis-aligned start of copy range */
10077                         adjustment =
10078                                 (vm_map_trunc_page(copy->offset,
10079                                                    PAGE_MASK) -
10080                                  vm_map_trunc_page(copy->offset,
10081                                                    VM_MAP_PAGE_MASK(src_map)));
10082                         if (adjustment) {
10083                                 assert(page_aligned(adjustment));
10084                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10085                                 tmp_entry->vme_start += adjustment;
10086                                 VME_OFFSET_SET(tmp_entry,
10087                                                (VME_OFFSET(tmp_entry) +
10088                                                 adjustment));
10089                                 copy_addr += adjustment;
10090                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10091                         }
10092
10093                         /*
10094                          * Assert that the adjustments haven't exposed
10095                          * more than was originally copied...
10096                          */
10097                         assert(tmp_entry->vme_start >= original_start);
10098                         assert(VME_OFFSET(tmp_entry) >= original_offset);
10099                         /*
10100                          * ... and that it did not adjust outside of a
10101                          * a single 16K page.
10102                          */
10103                         assert(vm_map_trunc_page(tmp_entry->vme_start,
10104                                                  VM_MAP_PAGE_MASK(src_map)) ==
10105                                vm_map_trunc_page(original_start,
10106                                                  VM_MAP_PAGE_MASK(src_map)));
10107                 }
10108
10109                 /* adjust alignment of last copy_entry's "vme_end" */
10110                 tmp_entry = vm_map_copy_last_entry(copy);
10111                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10112                         vm_map_offset_t adjustment;
10113
10114                         original_end = tmp_entry->vme_end;
10115
10116                         /* map-align the end of the last copy entry... */
10117                         tmp_entry->vme_end =
10118                                 vm_map_round_page(tmp_entry->vme_end,
10119                                                   VM_MAP_PAGE_MASK(src_map));
10120                         /* ... adjust for mis-aligned end of copy range */
10121                         adjustment =
10122                                 (vm_map_round_page((copy->offset +
10123                                                     copy->size),
10124                                                    VM_MAP_PAGE_MASK(src_map)) -
10125                                  vm_map_round_page((copy->offset +
10126                                                     copy->size),
10127                                                    PAGE_MASK));
10128                         if (adjustment) {
10129                                 assert(page_aligned(adjustment));
10130                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10131                                 tmp_entry->vme_end -= adjustment;
10132                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10133                         }
10134
10135                         /*
10136                          * Assert that the adjustments haven't exposed
10137                          * more than was originally copied...
10138                          */
10139                         assert(tmp_entry->vme_end <= original_end);
10140                         /*
10141                          * ... and that it did not adjust outside of a
10142                          * a single 16K page.
10143                          */
10144                         assert(vm_map_round_page(tmp_entry->vme_end,
10145                                                  VM_MAP_PAGE_MASK(src_map)) ==
10146                                vm_map_round_page(original_end,
10147                                                  VM_MAP_PAGE_MASK(src_map)));
10148                 }
10149         }
10150
10151         /* Fix-up start and end points in copy.  This is necessary */
10152         /* when the various entries in the copy object were picked */
10153         /* up from different sub-maps */
10154
10155         tmp_entry = vm_map_copy_first_entry(copy);
10156         copy_size = 0; /* compute actual size */
10157         while (tmp_entry != vm_map_copy_to_entry(copy)) {
10158                 assert(VM_MAP_PAGE_ALIGNED(
10159                                copy_addr + (tmp_entry->vme_end -
10160                                             tmp_entry->vme_start),
10161                                VM_MAP_COPY_PAGE_MASK(copy)));
10162                 assert(VM_MAP_PAGE_ALIGNED(
10163                                copy_addr,
10164                                VM_MAP_COPY_PAGE_MASK(copy)));
10165
10166                 /*
10167                  * The copy_entries will be injected directly into the
10168                  * destination map and might not be "map aligned" there...
10169                  */
10170                 tmp_entry->map_aligned = FALSE;
10171
10172                 tmp_entry->vme_end = copy_addr +
10173                         (tmp_entry->vme_end - tmp_entry->vme_start);
10174                 tmp_entry->vme_start = copy_addr;
10175                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10176                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
10177                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
10178                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
10179         }
10180
10181         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
10182             copy_size < copy->size) {
10183                 /*
10184                  * The actual size of the VM map copy is smaller than what
10185                  * was requested by the caller.  This must be because some
10186                  * PAGE_SIZE-sized pages are missing at the end of the last
10187                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
10188                  * The caller might not have been aware of those missing
10189                  * pages and might not want to be aware of it, which is
10190                  * fine as long as they don't try to access (and crash on)
10191                  * those missing pages.
10192                  * Let's adjust the size of the "copy", to avoid failing
10193                  * in vm_map_copyout() or vm_map_copy_overwrite().
10194                  */
10195                 assert(vm_map_round_page(copy_size,
10196                                          VM_MAP_PAGE_MASK(src_map)) ==
10197                        vm_map_round_page(copy->size,
10198                                          VM_MAP_PAGE_MASK(src_map)));
10199                 copy->size = copy_size;
10200         }
10201
10202         *copy_result = copy;
10203         return(KERN_SUCCESS);
10204
10205 #undef  RETURN
10206 }
10207
10208 kern_return_t
10209 vm_map_copy_extract(
10210         vm_map_t                src_map,
10211         vm_map_address_t        src_addr,
10212         vm_map_size_t           len,
10213         vm_map_copy_t           *copy_result,   /* OUT */
10214         vm_prot_t               *cur_prot,      /* OUT */
10215         vm_prot_t               *max_prot)
10216 {
10217         vm_map_offset_t src_start, src_end;
10218         vm_map_copy_t   copy;
10219         kern_return_t   kr;
10220
10221         /*
10222          *      Check for copies of zero bytes.
10223          */
10224
10225         if (len == 0) {
10226                 *copy_result = VM_MAP_COPY_NULL;
10227                 return(KERN_SUCCESS);
10228         }
10229
10230         /*
10231          *      Check that the end address doesn't overflow
10232          */
10233         src_end = src_addr + len;
10234         if (src_end < src_addr)
10235                 return KERN_INVALID_ADDRESS;
10236
10237         /*
10238          *      Compute (page aligned) start and end of region
10239          */
10240         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10241         src_end = vm_map_round_page(src_end, PAGE_MASK);
10242
10243         /*
10244          *      Allocate a header element for the list.
10245          *
10246          *      Use the start and end in the header to
10247          *      remember the endpoints prior to rounding.
10248          */
10249
10250         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10251         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10252         vm_map_copy_first_entry(copy) =
10253                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10254         copy->type = VM_MAP_COPY_ENTRY_LIST;
10255         copy->cpy_hdr.nentries = 0;
10256         copy->cpy_hdr.entries_pageable = TRUE;
10257
10258         vm_map_store_init(&copy->cpy_hdr);
10259
10260         copy->offset = 0;
10261         copy->size = len;
10262
10263         kr = vm_map_remap_extract(src_map,
10264                                   src_addr,
10265                                   len,
10266                                   FALSE, /* copy */
10267                                   &copy->cpy_hdr,
10268                                   cur_prot,
10269                                   max_prot,
10270                                   VM_INHERIT_SHARE,
10271                                   TRUE, /* pageable */
10272                                   FALSE); /* same_map */
10273         if (kr != KERN_SUCCESS) {
10274                 vm_map_copy_discard(copy);
10275                 return kr;
10276         }
10277
10278         *copy_result = copy;
10279         return KERN_SUCCESS;
10280 }
10281
10282 /*
10283  *      vm_map_copyin_object:
10284  *
10285  *      Create a copy object from an object.
10286  *      Our caller donates an object reference.
10287  */
10288
10289 kern_return_t
10290 vm_map_copyin_object(
10291         vm_object_t             object,
10292         vm_object_offset_t      offset, /* offset of region in object */
10293         vm_object_size_t        size,   /* size of region in object */
10294         vm_map_copy_t   *copy_result)   /* OUT */
10295 {
10296         vm_map_copy_t   copy;           /* Resulting copy */
10297
10298         /*
10299          *      We drop the object into a special copy object
10300          *      that contains the object directly.
10301          */
10302
10303         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10304         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10305         copy->type = VM_MAP_COPY_OBJECT;
10306         copy->cpy_object = object;
10307         copy->offset = offset;
10308         copy->size = size;
10309
10310         *copy_result = copy;
10311         return(KERN_SUCCESS);
10312 }
10313
10314 static void
10315 vm_map_fork_share(
10316         vm_map_t        old_map,
10317         vm_map_entry_t  old_entry,
10318         vm_map_t        new_map)
10319 {
10320         vm_object_t     object;
10321         vm_map_entry_t  new_entry;
10322
10323         /*
10324          *      New sharing code.  New map entry
10325          *      references original object.  Internal
10326          *      objects use asynchronous copy algorithm for
10327          *      future copies.  First make sure we have
10328          *      the right object.  If we need a shadow,
10329          *      or someone else already has one, then
10330          *      make a new shadow and share it.
10331          */
10332
10333         object = VME_OBJECT(old_entry);
10334         if (old_entry->is_sub_map) {
10335                 assert(old_entry->wired_count == 0);
10336 #ifndef NO_NESTED_PMAP
10337                 if(old_entry->use_pmap) {
10338                         kern_return_t   result;
10339
10340                         result = pmap_nest(new_map->pmap,
10341                                            (VME_SUBMAP(old_entry))->pmap,
10342                                            (addr64_t)old_entry->vme_start,
10343                                            (addr64_t)old_entry->vme_start,
10344                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
10345                         if(result)
10346                                 panic("vm_map_fork_share: pmap_nest failed!");
10347                 }
10348 #endif  /* NO_NESTED_PMAP */
10349         } else if (object == VM_OBJECT_NULL) {
10350                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
10351                                                             old_entry->vme_start));
10352                 VME_OFFSET_SET(old_entry, 0);
10353                 VME_OBJECT_SET(old_entry, object);
10354                 old_entry->use_pmap = TRUE;
10355                 assert(!old_entry->needs_copy);
10356         } else if (object->copy_strategy !=
10357                    MEMORY_OBJECT_COPY_SYMMETRIC) {
10358
10359                 /*
10360                  *      We are already using an asymmetric
10361                  *      copy, and therefore we already have
10362                  *      the right object.
10363                  */
10364
10365                 assert(! old_entry->needs_copy);
10366         }
10367         else if (old_entry->needs_copy ||       /* case 1 */
10368                  object->shadowed ||            /* case 2 */
10369                  (!object->true_share &&        /* case 3 */
10370                   !old_entry->is_shared &&
10371                   (object->vo_size >
10372                    (vm_map_size_t)(old_entry->vme_end -
10373                                    old_entry->vme_start)))) {
10374
10375                 /*
10376                  *      We need to create a shadow.
10377                  *      There are three cases here.
10378                  *      In the first case, we need to
10379                  *      complete a deferred symmetrical
10380                  *      copy that we participated in.
10381                  *      In the second and third cases,
10382                  *      we need to create the shadow so
10383                  *      that changes that we make to the
10384                  *      object do not interfere with
10385                  *      any symmetrical copies which
10386                  *      have occured (case 2) or which
10387                  *      might occur (case 3).
10388                  *
10389                  *      The first case is when we had
10390                  *      deferred shadow object creation
10391                  *      via the entry->needs_copy mechanism.
10392                  *      This mechanism only works when
10393                  *      only one entry points to the source
10394                  *      object, and we are about to create
10395                  *      a second entry pointing to the
10396                  *      same object. The problem is that
10397                  *      there is no way of mapping from
10398                  *      an object to the entries pointing
10399                  *      to it. (Deferred shadow creation
10400                  *      works with one entry because occurs
10401                  *      at fault time, and we walk from the
10402                  *      entry to the object when handling
10403                  *      the fault.)
10404                  *
10405                  *      The second case is when the object
10406                  *      to be shared has already been copied
10407                  *      with a symmetric copy, but we point
10408                  *      directly to the object without
10409                  *      needs_copy set in our entry. (This
10410                  *      can happen because different ranges
10411                  *      of an object can be pointed to by
10412                  *      different entries. In particular,
10413                  *      a single entry pointing to an object
10414                  *      can be split by a call to vm_inherit,
10415                  *      which, combined with task_create, can
10416                  *      result in the different entries
10417                  *      having different needs_copy values.)
10418                  *      The shadowed flag in the object allows
10419                  *      us to detect this case. The problem
10420                  *      with this case is that if this object
10421                  *      has or will have shadows, then we
10422                  *      must not perform an asymmetric copy
10423                  *      of this object, since such a copy
10424                  *      allows the object to be changed, which
10425                  *      will break the previous symmetrical
10426                  *      copies (which rely upon the object
10427                  *      not changing). In a sense, the shadowed
10428                  *      flag says "don't change this object".
10429                  *      We fix this by creating a shadow
10430                  *      object for this object, and sharing
10431                  *      that. This works because we are free
10432                  *      to change the shadow object (and thus
10433                  *      to use an asymmetric copy strategy);
10434                  *      this is also semantically correct,
10435                  *      since this object is temporary, and
10436                  *      therefore a copy of the object is
10437                  *      as good as the object itself. (This
10438                  *      is not true for permanent objects,
10439                  *      since the pager needs to see changes,
10440                  *      which won't happen if the changes
10441                  *      are made to a copy.)
10442                  *
10443                  *      The third case is when the object
10444                  *      to be shared has parts sticking
10445                  *      outside of the entry we're working
10446                  *      with, and thus may in the future
10447                  *      be subject to a symmetrical copy.
10448                  *      (This is a preemptive version of
10449                  *      case 2.)
10450                  */
10451                 VME_OBJECT_SHADOW(old_entry,
10452                                   (vm_map_size_t) (old_entry->vme_end -
10453                                                    old_entry->vme_start));
10454
10455                 /*
10456                  *      If we're making a shadow for other than
10457                  *      copy on write reasons, then we have
10458                  *      to remove write permission.
10459                  */
10460
10461                 if (!old_entry->needs_copy &&
10462                     (old_entry->protection & VM_PROT_WRITE)) {
10463                         vm_prot_t prot;
10464
10465                         prot = old_entry->protection & ~VM_PROT_WRITE;
10466
10467                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
10468                                 prot |= VM_PROT_EXECUTE;
10469
10470                         if (old_map->mapped_in_other_pmaps) {
10471                                 vm_object_pmap_protect(
10472                                         VME_OBJECT(old_entry),
10473                                         VME_OFFSET(old_entry),
10474                                         (old_entry->vme_end -
10475                                          old_entry->vme_start),
10476                                         PMAP_NULL,
10477                                         old_entry->vme_start,
10478                                         prot);
10479                         } else {
10480                                 pmap_protect(old_map->pmap,
10481                                              old_entry->vme_start,
10482                                              old_entry->vme_end,
10483                                              prot);
10484                         }
10485                 }
10486
10487                 old_entry->needs_copy = FALSE;
10488                 object = VME_OBJECT(old_entry);
10489         }
10490
10491
10492         /*
10493          *      If object was using a symmetric copy strategy,
10494          *      change its copy strategy to the default
10495          *      asymmetric copy strategy, which is copy_delay
10496          *      in the non-norma case and copy_call in the
10497          *      norma case. Bump the reference count for the
10498          *      new entry.
10499          */
10500
10501         if(old_entry->is_sub_map) {
10502                 vm_map_lock(VME_SUBMAP(old_entry));
10503                 vm_map_reference(VME_SUBMAP(old_entry));
10504                 vm_map_unlock(VME_SUBMAP(old_entry));
10505         } else {
10506                 vm_object_lock(object);
10507                 vm_object_reference_locked(object);
10508                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10509                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10510                 }
10511                 vm_object_unlock(object);
10512         }
10513
10514         /*
10515          *      Clone the entry, using object ref from above.
10516          *      Mark both entries as shared.
10517          */
10518
10519         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10520                                                           * map or descendants */
10521         vm_map_entry_copy(new_entry, old_entry);
10522         old_entry->is_shared = TRUE;
10523         new_entry->is_shared = TRUE;
10524
10525         /*
10526          *      If old entry's inheritence is VM_INHERIT_NONE,
10527          *      the new entry is for corpse fork, remove the
10528          *      write permission from the new entry.
10529          */
10530         if (old_entry->inheritance == VM_INHERIT_NONE) {
10531
10532                 new_entry->protection &= ~VM_PROT_WRITE;
10533                 new_entry->max_protection &= ~VM_PROT_WRITE;
10534         }
10535
10536         /*
10537          *      Insert the entry into the new map -- we
10538          *      know we're inserting at the end of the new
10539          *      map.
10540          */
10541
10542         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
10543
10544         /*
10545          *      Update the physical map
10546          */
10547
10548         if (old_entry->is_sub_map) {
10549                 /* Bill Angell pmap support goes here */
10550         } else {
10551                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
10552                           old_entry->vme_end - old_entry->vme_start,
10553                           old_entry->vme_start);
10554         }
10555 }
10556
10557 static boolean_t
10558 vm_map_fork_copy(
10559         vm_map_t        old_map,
10560         vm_map_entry_t  *old_entry_p,
10561         vm_map_t        new_map,
10562         int             vm_map_copyin_flags)
10563 {
10564         vm_map_entry_t old_entry = *old_entry_p;
10565         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10566         vm_map_offset_t start = old_entry->vme_start;
10567         vm_map_copy_t copy;
10568         vm_map_entry_t last = vm_map_last_entry(new_map);
10569
10570         vm_map_unlock(old_map);
10571         /*
10572          *      Use maxprot version of copyin because we
10573          *      care about whether this memory can ever
10574          *      be accessed, not just whether it's accessible
10575          *      right now.
10576          */
10577         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
10578         if (vm_map_copyin_internal(old_map, start, entry_size,
10579                                    vm_map_copyin_flags, &copy)
10580             != KERN_SUCCESS) {
10581                 /*
10582                  *      The map might have changed while it
10583                  *      was unlocked, check it again.  Skip
10584                  *      any blank space or permanently
10585                  *      unreadable region.
10586                  */
10587                 vm_map_lock(old_map);
10588                 if (!vm_map_lookup_entry(old_map, start, &last) ||
10589                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
10590                         last = last->vme_next;
10591                 }
10592                 *old_entry_p = last;
10593
10594                 /*
10595                  * XXX  For some error returns, want to
10596                  * XXX  skip to the next element.  Note
10597                  *      that INVALID_ADDRESS and
10598                  *      PROTECTION_FAILURE are handled above.
10599                  */
10600
10601                 return FALSE;
10602         }
10603
10604         /*
10605          *      Insert the copy into the new map
10606          */
10607
10608         vm_map_copy_insert(new_map, last, copy);
10609
10610         /*
10611          *      Pick up the traversal at the end of
10612          *      the copied region.
10613          */
10614
10615         vm_map_lock(old_map);
10616         start += entry_size;
10617         if (! vm_map_lookup_entry(old_map, start, &last)) {
10618                 last = last->vme_next;
10619         } else {
10620                 if (last->vme_start == start) {
10621                         /*
10622                          * No need to clip here and we don't
10623                          * want to cause any unnecessary
10624                          * unnesting...
10625                          */
10626                 } else {
10627                         vm_map_clip_start(old_map, last, start);
10628                 }
10629         }
10630         *old_entry_p = last;
10631
10632         return TRUE;
10633 }
10634
10635 /*
10636  *      vm_map_fork:
10637  *
10638  *      Create and return a new map based on the old
10639  *      map, according to the inheritance values on the
10640  *      regions in that map and the options.
10641  *
10642  *      The source map must not be locked.
10643  */
10644 vm_map_t
10645 vm_map_fork(
10646         ledger_t        ledger,
10647         vm_map_t        old_map,
10648         int             options)
10649 {
10650         pmap_t          new_pmap;
10651         vm_map_t        new_map;
10652         vm_map_entry_t  old_entry;
10653         vm_map_size_t   new_size = 0, entry_size;
10654         vm_map_entry_t  new_entry;
10655         boolean_t       src_needs_copy;
10656         boolean_t       new_entry_needs_copy;
10657         boolean_t       pmap_is64bit;
10658         int             vm_map_copyin_flags;
10659
10660         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
10661                         VM_MAP_FORK_PRESERVE_PURGEABLE)) {
10662                 /* unsupported option */
10663                 return VM_MAP_NULL;
10664         }
10665
10666         pmap_is64bit =
10667 #if defined(__i386__) || defined(__x86_64__)
10668                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
10669 #else
10670 #error Unknown architecture.
10671 #endif
10672
10673         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
10674
10675         vm_map_reference_swap(old_map);
10676         vm_map_lock(old_map);
10677
10678         new_map = vm_map_create(new_pmap,
10679                                 old_map->min_offset,
10680                                 old_map->max_offset,
10681                                 old_map->hdr.entries_pageable);
10682         vm_commit_pagezero_status(new_map);
10683         /* inherit the parent map's page size */
10684         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
10685         for (
10686                 old_entry = vm_map_first_entry(old_map);
10687                 old_entry != vm_map_to_entry(old_map);
10688                 ) {
10689
10690                 entry_size = old_entry->vme_end - old_entry->vme_start;
10691
10692                 switch (old_entry->inheritance) {
10693                 case VM_INHERIT_NONE:
10694                         /*
10695                          * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
10696                          * is not passed or it is backed by a device pager.
10697                          */
10698                         if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
10699                                 (!old_entry->is_sub_map &&
10700                                 VME_OBJECT(old_entry) != NULL &&
10701                                 VME_OBJECT(old_entry)->pager != NULL &&
10702                                 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
10703                                 break;
10704                         }
10705                         /* FALLTHROUGH */
10706
10707                 case VM_INHERIT_SHARE:
10708                         vm_map_fork_share(old_map, old_entry, new_map);
10709                         new_size += entry_size;
10710                         break;
10711
10712                 case VM_INHERIT_COPY:
10713
10714                         /*
10715                          *      Inline the copy_quickly case;
10716                          *      upon failure, fall back on call
10717                          *      to vm_map_fork_copy.
10718                          */
10719
10720                         if(old_entry->is_sub_map)
10721                                 break;
10722                         if ((old_entry->wired_count != 0) ||
10723                             ((VME_OBJECT(old_entry) != NULL) &&
10724                              (VME_OBJECT(old_entry)->true_share))) {
10725                                 goto slow_vm_map_fork_copy;
10726                         }
10727
10728                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
10729                         vm_map_entry_copy(new_entry, old_entry);
10730                         if (new_entry->is_sub_map) {
10731                                 /* clear address space specifics */
10732                                 new_entry->use_pmap = FALSE;
10733                         }
10734
10735                         if (! vm_object_copy_quickly(
10736                                     &VME_OBJECT(new_entry),
10737                                     VME_OFFSET(old_entry),
10738                                     (old_entry->vme_end -
10739                                      old_entry->vme_start),
10740                                     &src_needs_copy,
10741                                     &new_entry_needs_copy)) {
10742                                 vm_map_entry_dispose(new_map, new_entry);
10743                                 goto slow_vm_map_fork_copy;
10744                         }
10745
10746                         /*
10747                          *      Handle copy-on-write obligations
10748                          */
10749
10750                         if (src_needs_copy && !old_entry->needs_copy) {
10751                                 vm_prot_t prot;
10752
10753                                 prot = old_entry->protection & ~VM_PROT_WRITE;
10754
10755                                 if (override_nx(old_map, VME_ALIAS(old_entry))
10756                                     && prot)
10757                                         prot |= VM_PROT_EXECUTE;
10758
10759                                 vm_object_pmap_protect(
10760                                         VME_OBJECT(old_entry),
10761                                         VME_OFFSET(old_entry),
10762                                         (old_entry->vme_end -
10763                                          old_entry->vme_start),
10764                                         ((old_entry->is_shared
10765                                           || old_map->mapped_in_other_pmaps)
10766                                          ? PMAP_NULL :
10767                                          old_map->pmap),
10768                                         old_entry->vme_start,
10769                                         prot);
10770
10771                                 assert(old_entry->wired_count == 0);
10772                                 old_entry->needs_copy = TRUE;
10773                         }
10774                         new_entry->needs_copy = new_entry_needs_copy;
10775
10776                         /*
10777                          *      Insert the entry at the end
10778                          *      of the map.
10779                          */
10780
10781                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
10782                                           new_entry);
10783                         new_size += entry_size;
10784                         break;
10785
10786                 slow_vm_map_fork_copy:
10787                         vm_map_copyin_flags = 0;
10788                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
10789                                 vm_map_copyin_flags |=
10790                                         VM_MAP_COPYIN_PRESERVE_PURGEABLE;
10791                         }
10792                         if (vm_map_fork_copy(old_map,
10793                                              &old_entry,
10794                                              new_map,
10795                                              vm_map_copyin_flags)) {
10796                                 new_size += entry_size;
10797                         }
10798                         continue;
10799                 }
10800                 old_entry = old_entry->vme_next;
10801         }
10802
10803
10804         new_map->size = new_size;
10805         vm_map_unlock(old_map);
10806         vm_map_deallocate(old_map);
10807
10808         return(new_map);
10809 }
10810
10811 /*
10812  * vm_map_exec:
10813  *
10814  *      Setup the "new_map" with the proper execution environment according
10815  *      to the type of executable (platform, 64bit, chroot environment).
10816  *      Map the comm page and shared region, etc...
10817  */
10818 kern_return_t
10819 vm_map_exec(
10820         vm_map_t        new_map,
10821         task_t          task,
10822         boolean_t       is64bit,
10823         void            *fsroot,
10824         cpu_type_t      cpu)
10825 {
10826         SHARED_REGION_TRACE_DEBUG(
10827                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
10828                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10829                  (void *)VM_KERNEL_ADDRPERM(new_map),
10830                  (void *)VM_KERNEL_ADDRPERM(task),
10831                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10832                  cpu));
10833         (void) vm_commpage_enter(new_map, task, is64bit);
10834         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
10835         SHARED_REGION_TRACE_DEBUG(
10836                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
10837                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10838                  (void *)VM_KERNEL_ADDRPERM(new_map),
10839                  (void *)VM_KERNEL_ADDRPERM(task),
10840                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10841                  cpu));
10842         return KERN_SUCCESS;
10843 }
10844
10845 /*
10846  *      vm_map_lookup_locked:
10847  *
10848  *      Finds the VM object, offset, and
10849  *      protection for a given virtual address in the
10850  *      specified map, assuming a page fault of the
10851  *      type specified.
10852  *
10853  *      Returns the (object, offset, protection) for
10854  *      this address, whether it is wired down, and whether
10855  *      this map has the only reference to the data in question.
10856  *      In order to later verify this lookup, a "version"
10857  *      is returned.
10858  *
10859  *      The map MUST be locked by the caller and WILL be
10860  *      locked on exit.  In order to guarantee the
10861  *      existence of the returned object, it is returned
10862  *      locked.
10863  *
10864  *      If a lookup is requested with "write protection"
10865  *      specified, the map may be changed to perform virtual
10866  *      copying operations, although the data referenced will
10867  *      remain the same.
10868  */
10869 kern_return_t
10870 vm_map_lookup_locked(
10871         vm_map_t                *var_map,       /* IN/OUT */
10872         vm_map_offset_t         vaddr,
10873         vm_prot_t               fault_type,
10874         int                     object_lock_type,
10875         vm_map_version_t        *out_version,   /* OUT */
10876         vm_object_t             *object,        /* OUT */
10877         vm_object_offset_t      *offset,        /* OUT */
10878         vm_prot_t               *out_prot,      /* OUT */
10879         boolean_t               *wired,         /* OUT */
10880         vm_object_fault_info_t  fault_info,     /* OUT */
10881         vm_map_t                *real_map)
10882 {
10883         vm_map_entry_t                  entry;
10884         vm_map_t                        map = *var_map;
10885         vm_map_t                        old_map = *var_map;
10886         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
10887         vm_map_offset_t                 cow_parent_vaddr = 0;
10888         vm_map_offset_t                 old_start = 0;
10889         vm_map_offset_t                 old_end = 0;
10890         vm_prot_t                       prot;
10891         boolean_t                       mask_protections;
10892         boolean_t                       force_copy;
10893         vm_prot_t                       original_fault_type;
10894
10895         /*
10896          * VM_PROT_MASK means that the caller wants us to use "fault_type"
10897          * as a mask against the mapping's actual protections, not as an
10898          * absolute value.
10899          */
10900         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
10901         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10902         fault_type &= VM_PROT_ALL;
10903         original_fault_type = fault_type;
10904
10905         *real_map = map;
10906
10907 RetryLookup:
10908         fault_type = original_fault_type;
10909
10910         /*
10911          *      If the map has an interesting hint, try it before calling
10912          *      full blown lookup routine.
10913          */
10914         entry = map->hint;
10915
10916         if ((entry == vm_map_to_entry(map)) ||
10917             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10918                 vm_map_entry_t  tmp_entry;
10919
10920                 /*
10921                  *      Entry was either not a valid hint, or the vaddr
10922                  *      was not contained in the entry, so do a full lookup.
10923                  */
10924                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10925                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10926                                 vm_map_unlock(cow_sub_map_parent);
10927                         if((*real_map != map)
10928                            && (*real_map != cow_sub_map_parent))
10929                                 vm_map_unlock(*real_map);
10930                         return KERN_INVALID_ADDRESS;
10931                 }
10932
10933                 entry = tmp_entry;
10934         }
10935         if(map == old_map) {
10936                 old_start = entry->vme_start;
10937                 old_end = entry->vme_end;
10938         }
10939
10940         /*
10941          *      Handle submaps.  Drop lock on upper map, submap is
10942          *      returned locked.
10943          */
10944
10945 submap_recurse:
10946         if (entry->is_sub_map) {
10947                 vm_map_offset_t         local_vaddr;
10948                 vm_map_offset_t         end_delta;
10949                 vm_map_offset_t         start_delta;
10950                 vm_map_entry_t          submap_entry;
10951                 boolean_t               mapped_needs_copy=FALSE;
10952
10953                 local_vaddr = vaddr;
10954
10955                 if ((entry->use_pmap &&
10956                      ! ((fault_type & VM_PROT_WRITE) ||
10957                         force_copy))) {
10958                         /* if real_map equals map we unlock below */
10959                         if ((*real_map != map) &&
10960                             (*real_map != cow_sub_map_parent))
10961                                 vm_map_unlock(*real_map);
10962                         *real_map = VME_SUBMAP(entry);
10963                 }
10964
10965                 if(entry->needs_copy &&
10966                    ((fault_type & VM_PROT_WRITE) ||
10967                     force_copy)) {
10968                         if (!mapped_needs_copy) {
10969                                 if (vm_map_lock_read_to_write(map)) {
10970                                         vm_map_lock_read(map);
10971                                         *real_map = map;
10972                                         goto RetryLookup;
10973                                 }
10974                                 vm_map_lock_read(VME_SUBMAP(entry));
10975                                 *var_map = VME_SUBMAP(entry);
10976                                 cow_sub_map_parent = map;
10977                                 /* reset base to map before cow object */
10978                                 /* this is the map which will accept   */
10979                                 /* the new cow object */
10980                                 old_start = entry->vme_start;
10981                                 old_end = entry->vme_end;
10982                                 cow_parent_vaddr = vaddr;
10983                                 mapped_needs_copy = TRUE;
10984                         } else {
10985                                 vm_map_lock_read(VME_SUBMAP(entry));
10986                                 *var_map = VME_SUBMAP(entry);
10987                                 if((cow_sub_map_parent != map) &&
10988                                    (*real_map != map))
10989                                         vm_map_unlock(map);
10990                         }
10991                 } else {
10992                         vm_map_lock_read(VME_SUBMAP(entry));
10993                         *var_map = VME_SUBMAP(entry);
10994                         /* leave map locked if it is a target */
10995                         /* cow sub_map above otherwise, just  */
10996                         /* follow the maps down to the object */
10997                         /* here we unlock knowing we are not  */
10998                         /* revisiting the map.  */
10999                         if((*real_map != map) && (map != cow_sub_map_parent))
11000                                 vm_map_unlock_read(map);
11001                 }
11002
11003                 map = *var_map;
11004
11005                 /* calculate the offset in the submap for vaddr */
11006                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
11007
11008         RetrySubMap:
11009                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
11010                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
11011                                 vm_map_unlock(cow_sub_map_parent);
11012                         }
11013                         if((*real_map != map)
11014                            && (*real_map != cow_sub_map_parent)) {
11015                                 vm_map_unlock(*real_map);
11016                         }
11017                         *real_map = map;
11018                         return KERN_INVALID_ADDRESS;
11019                 }
11020
11021                 /* find the attenuated shadow of the underlying object */
11022                 /* on our target map */
11023
11024                 /* in english the submap object may extend beyond the     */
11025                 /* region mapped by the entry or, may only fill a portion */
11026                 /* of it.  For our purposes, we only care if the object   */
11027                 /* doesn't fill.  In this case the area which will        */
11028                 /* ultimately be clipped in the top map will only need    */
11029                 /* to be as big as the portion of the underlying entry    */
11030                 /* which is mapped */
11031                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
11032                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
11033
11034                 end_delta =
11035                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
11036                         submap_entry->vme_end ?
11037                         0 : (VME_OFFSET(entry) +
11038                              (old_end - old_start))
11039                         - submap_entry->vme_end;
11040
11041                 old_start += start_delta;
11042                 old_end -= end_delta;
11043
11044                 if(submap_entry->is_sub_map) {
11045                         entry = submap_entry;
11046                         vaddr = local_vaddr;
11047                         goto submap_recurse;
11048                 }
11049
11050                 if (((fault_type & VM_PROT_WRITE) ||
11051                      force_copy)
11052                     && cow_sub_map_parent) {
11053
11054                         vm_object_t     sub_object, copy_object;
11055                         vm_object_offset_t copy_offset;
11056                         vm_map_offset_t local_start;
11057                         vm_map_offset_t local_end;
11058                         boolean_t               copied_slowly = FALSE;
11059
11060                         if (vm_map_lock_read_to_write(map)) {
11061                                 vm_map_lock_read(map);
11062                                 old_start -= start_delta;
11063                                 old_end += end_delta;
11064                                 goto RetrySubMap;
11065                         }
11066
11067
11068                         sub_object = VME_OBJECT(submap_entry);
11069                         if (sub_object == VM_OBJECT_NULL) {
11070                                 sub_object =
11071                                         vm_object_allocate(
11072                                                 (vm_map_size_t)
11073                                                 (submap_entry->vme_end -
11074                                                  submap_entry->vme_start));
11075                                 VME_OBJECT_SET(submap_entry, sub_object);
11076                                 VME_OFFSET_SET(submap_entry, 0);
11077                         }
11078                         local_start =  local_vaddr -
11079                                 (cow_parent_vaddr - old_start);
11080                         local_end = local_vaddr +
11081                                 (old_end - cow_parent_vaddr);
11082                         vm_map_clip_start(map, submap_entry, local_start);
11083                         vm_map_clip_end(map, submap_entry, local_end);
11084                         if (submap_entry->is_sub_map) {
11085                                 /* unnesting was done when clipping */
11086                                 assert(!submap_entry->use_pmap);
11087                         }
11088
11089                         /* This is the COW case, lets connect */
11090                         /* an entry in our space to the underlying */
11091                         /* object in the submap, bypassing the  */
11092                         /* submap. */
11093
11094
11095                         if(submap_entry->wired_count != 0 ||
11096                            (sub_object->copy_strategy ==
11097                             MEMORY_OBJECT_COPY_NONE)) {
11098                                 vm_object_lock(sub_object);
11099                                 vm_object_copy_slowly(sub_object,
11100                                                       VME_OFFSET(submap_entry),
11101                                                       (submap_entry->vme_end -
11102                                                        submap_entry->vme_start),
11103                                                       FALSE,
11104                                                       &copy_object);
11105                                 copied_slowly = TRUE;
11106                         } else {
11107
11108                                 /* set up shadow object */
11109                                 copy_object = sub_object;
11110                                 vm_object_lock(sub_object);
11111                                 vm_object_reference_locked(sub_object);
11112                                 sub_object->shadowed = TRUE;
11113                                 vm_object_unlock(sub_object);
11114
11115                                 assert(submap_entry->wired_count == 0);
11116                                 submap_entry->needs_copy = TRUE;
11117
11118                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
11119
11120                                 if (override_nx(old_map,
11121                                                 VME_ALIAS(submap_entry))
11122                                     && prot)
11123                                         prot |= VM_PROT_EXECUTE;
11124
11125                                 vm_object_pmap_protect(
11126                                         sub_object,
11127                                         VME_OFFSET(submap_entry),
11128                                         submap_entry->vme_end -
11129                                         submap_entry->vme_start,
11130                                         (submap_entry->is_shared
11131                                          || map->mapped_in_other_pmaps) ?
11132                                         PMAP_NULL : map->pmap,
11133                                         submap_entry->vme_start,
11134                                         prot);
11135                         }
11136
11137                         /*
11138                          * Adjust the fault offset to the submap entry.
11139                          */
11140                         copy_offset = (local_vaddr -
11141                                        submap_entry->vme_start +
11142                                        VME_OFFSET(submap_entry));
11143
11144                         /* This works diffently than the   */
11145                         /* normal submap case. We go back  */
11146                         /* to the parent of the cow map and*/
11147                         /* clip out the target portion of  */
11148                         /* the sub_map, substituting the   */
11149                         /* new copy object,                */
11150
11151                         vm_map_unlock(map);
11152                         local_start = old_start;
11153                         local_end = old_end;
11154                         map = cow_sub_map_parent;
11155                         *var_map = cow_sub_map_parent;
11156                         vaddr = cow_parent_vaddr;
11157                         cow_sub_map_parent = NULL;
11158
11159                         if(!vm_map_lookup_entry(map,
11160                                                 vaddr, &entry)) {
11161                                 vm_object_deallocate(
11162                                         copy_object);
11163                                 vm_map_lock_write_to_read(map);
11164                                 return KERN_INVALID_ADDRESS;
11165                         }
11166
11167                         /* clip out the portion of space */
11168                         /* mapped by the sub map which   */
11169                         /* corresponds to the underlying */
11170                         /* object */
11171
11172                         /*
11173                          * Clip (and unnest) the smallest nested chunk
11174                          * possible around the faulting address...
11175                          */
11176                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
11177                         local_end = local_start + pmap_nesting_size_min;
11178                         /*
11179                          * ... but don't go beyond the "old_start" to "old_end"
11180                          * range, to avoid spanning over another VM region
11181                          * with a possibly different VM object and/or offset.
11182                          */
11183                         if (local_start < old_start) {
11184                                 local_start = old_start;
11185                         }
11186                         if (local_end > old_end) {
11187                                 local_end = old_end;
11188                         }
11189                         /*
11190                          * Adjust copy_offset to the start of the range.
11191                          */
11192                         copy_offset -= (vaddr - local_start);
11193
11194                         vm_map_clip_start(map, entry, local_start);
11195                         vm_map_clip_end(map, entry, local_end);
11196                         if (entry->is_sub_map) {
11197                                 /* unnesting was done when clipping */
11198                                 assert(!entry->use_pmap);
11199                         }
11200
11201                         /* substitute copy object for */
11202                         /* shared map entry           */
11203                         vm_map_deallocate(VME_SUBMAP(entry));
11204                         assert(!entry->iokit_acct);
11205                         entry->is_sub_map = FALSE;
11206                         entry->use_pmap = TRUE;
11207                         VME_OBJECT_SET(entry, copy_object);
11208
11209                         /* propagate the submap entry's protections */
11210                         entry->protection |= submap_entry->protection;
11211                         entry->max_protection |= submap_entry->max_protection;
11212
11213                         if(copied_slowly) {
11214                                 VME_OFFSET_SET(entry, local_start - old_start);
11215                                 entry->needs_copy = FALSE;
11216                                 entry->is_shared = FALSE;
11217                         } else {
11218                                 VME_OFFSET_SET(entry, copy_offset);
11219                                 assert(entry->wired_count == 0);
11220                                 entry->needs_copy = TRUE;
11221                                 if(entry->inheritance == VM_INHERIT_SHARE)
11222                                         entry->inheritance = VM_INHERIT_COPY;
11223                                 if (map != old_map)
11224                                         entry->is_shared = TRUE;
11225                         }
11226                         if(entry->inheritance == VM_INHERIT_SHARE)
11227                                 entry->inheritance = VM_INHERIT_COPY;
11228
11229                         vm_map_lock_write_to_read(map);
11230                 } else {
11231                         if((cow_sub_map_parent)
11232                            && (cow_sub_map_parent != *real_map)
11233                            && (cow_sub_map_parent != map)) {
11234                                 vm_map_unlock(cow_sub_map_parent);
11235                         }
11236                         entry = submap_entry;
11237                         vaddr = local_vaddr;
11238                 }
11239         }
11240
11241         /*
11242          *      Check whether this task is allowed to have
11243          *      this page.
11244          */
11245
11246         prot = entry->protection;
11247
11248         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
11249                 /*
11250                  * HACK -- if not a stack, then allow execution
11251                  */
11252                 prot |= VM_PROT_EXECUTE;
11253         }
11254
11255         if (mask_protections) {
11256                 fault_type &= prot;
11257                 if (fault_type == VM_PROT_NONE) {
11258                         goto protection_failure;
11259                 }
11260         }
11261         if (((fault_type & prot) != fault_type)
11262             ) {
11263         protection_failure:
11264                 if (*real_map != map) {
11265                         vm_map_unlock(*real_map);
11266                 }
11267                 *real_map = map;
11268
11269                 if ((fault_type & VM_PROT_EXECUTE) && prot)
11270                         log_stack_execution_failure((addr64_t)vaddr, prot);
11271
11272                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
11273                 return KERN_PROTECTION_FAILURE;
11274         }
11275
11276         /*
11277          *      If this page is not pageable, we have to get
11278          *      it for all possible accesses.
11279          */
11280
11281         *wired = (entry->wired_count != 0);
11282         if (*wired)
11283                 fault_type = prot;
11284
11285         /*
11286          *      If the entry was copy-on-write, we either ...
11287          */
11288
11289         if (entry->needs_copy) {
11290                 /*
11291                  *      If we want to write the page, we may as well
11292                  *      handle that now since we've got the map locked.
11293                  *
11294                  *      If we don't need to write the page, we just
11295                  *      demote the permissions allowed.
11296                  */
11297
11298                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
11299                         /*
11300                          *      Make a new object, and place it in the
11301                          *      object chain.  Note that no new references
11302                          *      have appeared -- one just moved from the
11303                          *      map to the new object.
11304                          */
11305
11306                         if (vm_map_lock_read_to_write(map)) {
11307                                 vm_map_lock_read(map);
11308                                 goto RetryLookup;
11309                         }
11310
11311                         if (VME_OBJECT(entry)->shadowed == FALSE) {
11312                                 vm_object_lock(VME_OBJECT(entry));
11313                                 VME_OBJECT(entry)->shadowed = TRUE;
11314                                 vm_object_unlock(VME_OBJECT(entry));
11315                         }
11316                         VME_OBJECT_SHADOW(entry,
11317                                           (vm_map_size_t) (entry->vme_end -
11318                                                            entry->vme_start));
11319                         entry->needs_copy = FALSE;
11320
11321                         vm_map_lock_write_to_read(map);
11322                 }
11323                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
11324                         /*
11325                          *      We're attempting to read a copy-on-write
11326                          *      page -- don't allow writes.
11327                          */
11328
11329                         prot &= (~VM_PROT_WRITE);
11330                 }
11331         }
11332
11333         /*
11334          *      Create an object if necessary.
11335          */
11336         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
11337
11338                 if (vm_map_lock_read_to_write(map)) {
11339                         vm_map_lock_read(map);
11340                         goto RetryLookup;
11341                 }
11342
11343                 VME_OBJECT_SET(entry,
11344                                vm_object_allocate(
11345                                        (vm_map_size_t)(entry->vme_end -
11346                                                        entry->vme_start)));
11347                 VME_OFFSET_SET(entry, 0);
11348                 vm_map_lock_write_to_read(map);
11349         }
11350
11351         /*
11352          *      Return the object/offset from this entry.  If the entry
11353          *      was copy-on-write or empty, it has been fixed up.  Also
11354          *      return the protection.
11355          */
11356
11357         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11358         *object = VME_OBJECT(entry);
11359         *out_prot = prot;
11360
11361         if (fault_info) {
11362                 fault_info->interruptible = THREAD_UNINT; /* for now... */
11363                 /* ... the caller will change "interruptible" if needed */
11364                 fault_info->cluster_size = 0;
11365                 fault_info->user_tag = VME_ALIAS(entry);
11366                 fault_info->pmap_options = 0;
11367                 if (entry->iokit_acct ||
11368                     (!entry->is_sub_map && !entry->use_pmap)) {
11369                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11370                 }
11371                 fault_info->behavior = entry->behavior;
11372                 fault_info->lo_offset = VME_OFFSET(entry);
11373                 fault_info->hi_offset =
11374                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
11375                 fault_info->no_cache  = entry->no_cache;
11376                 fault_info->stealth = FALSE;
11377                 fault_info->io_sync = FALSE;
11378                 if (entry->used_for_jit ||
11379                     entry->vme_resilient_codesign) {
11380                         fault_info->cs_bypass = TRUE;
11381                 } else {
11382                         fault_info->cs_bypass = FALSE;
11383                 }
11384                 fault_info->mark_zf_absent = FALSE;
11385                 fault_info->batch_pmap_op = FALSE;
11386         }
11387
11388         /*
11389          *      Lock the object to prevent it from disappearing
11390          */
11391         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11392                 vm_object_lock(*object);
11393         else
11394                 vm_object_lock_shared(*object);
11395
11396         /*
11397          *      Save the version number
11398          */
11399
11400         out_version->main_timestamp = map->timestamp;
11401
11402         return KERN_SUCCESS;
11403 }
11404
11405
11406 /*
11407  *      vm_map_verify:
11408  *
11409  *      Verifies that the map in question has not changed
11410  *      since the given version.  If successful, the map
11411  *      will not change until vm_map_verify_done() is called.
11412  */
11413 boolean_t
11414 vm_map_verify(
11415         vm_map_t                map,
11416         vm_map_version_t        *version)       /* REF */
11417 {
11418         boolean_t       result;
11419
11420         vm_map_lock_read(map);
11421         result = (map->timestamp == version->main_timestamp);
11422
11423         if (!result)
11424                 vm_map_unlock_read(map);
11425
11426         return(result);
11427 }
11428
11429 /*
11430  *      vm_map_verify_done:
11431  *
11432  *      Releases locks acquired by a vm_map_verify.
11433  *
11434  *      This is now a macro in vm/vm_map.h.  It does a
11435  *      vm_map_unlock_read on the map.
11436  */
11437
11438
11439 /*
11440  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11441  *      Goes away after regular vm_region_recurse function migrates to
11442  *      64 bits
11443  *      vm_region_recurse: A form of vm_region which follows the
11444  *      submaps in a target map
11445  *
11446  */
11447
11448 #if DEVELOPMENT || DEBUG
11449 int vm_region_footprint = 0;
11450 #endif /* DEVELOPMENT || DEBUG */
11451
11452 kern_return_t
11453 vm_map_region_recurse_64(
11454         vm_map_t                 map,
11455         vm_map_offset_t *address,               /* IN/OUT */
11456         vm_map_size_t           *size,                  /* OUT */
11457         natural_t               *nesting_depth, /* IN/OUT */
11458         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
11459         mach_msg_type_number_t  *count) /* IN/OUT */
11460 {
11461         mach_msg_type_number_t  original_count;
11462         vm_region_extended_info_data_t  extended;
11463         vm_map_entry_t                  tmp_entry;
11464         vm_map_offset_t                 user_address;
11465         unsigned int                    user_max_depth;
11466
11467         /*
11468          * "curr_entry" is the VM map entry preceding or including the
11469          * address we're looking for.
11470          * "curr_map" is the map or sub-map containing "curr_entry".
11471          * "curr_address" is the equivalent of the top map's "user_address"
11472          * in the current map.
11473          * "curr_offset" is the cumulated offset of "curr_map" in the
11474          * target task's address space.
11475          * "curr_depth" is the depth of "curr_map" in the chain of
11476          * sub-maps.
11477          *
11478          * "curr_max_below" and "curr_max_above" limit the range (around
11479          * "curr_address") we should take into account in the current (sub)map.
11480          * They limit the range to what's visible through the map entries
11481          * we've traversed from the top map to the current map.
11482
11483          */
11484         vm_map_entry_t                  curr_entry;
11485         vm_map_address_t                curr_address;
11486         vm_map_offset_t                 curr_offset;
11487         vm_map_t                        curr_map;
11488         unsigned int                    curr_depth;
11489         vm_map_offset_t                 curr_max_below, curr_max_above;
11490         vm_map_offset_t                 curr_skip;
11491
11492         /*
11493          * "next_" is the same as "curr_" but for the VM region immediately
11494          * after the address we're looking for.  We need to keep track of this
11495          * too because we want to return info about that region if the
11496          * address we're looking for is not mapped.
11497          */
11498         vm_map_entry_t                  next_entry;
11499         vm_map_offset_t                 next_offset;
11500         vm_map_offset_t                 next_address;
11501         vm_map_t                        next_map;
11502         unsigned int                    next_depth;
11503         vm_map_offset_t                 next_max_below, next_max_above;
11504         vm_map_offset_t                 next_skip;
11505
11506         boolean_t                       look_for_pages;
11507         vm_region_submap_short_info_64_t short_info;
11508
11509         if (map == VM_MAP_NULL) {
11510                 /* no address space to work on */
11511                 return KERN_INVALID_ARGUMENT;
11512         }
11513
11514
11515         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11516                 /*
11517                  * "info" structure is not big enough and
11518                  * would overflow
11519                  */
11520                 return KERN_INVALID_ARGUMENT;
11521         }
11522
11523         original_count = *count;
11524
11525         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11526                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11527                 look_for_pages = FALSE;
11528                 short_info = (vm_region_submap_short_info_64_t) submap_info;
11529                 submap_info = NULL;
11530         } else {
11531                 look_for_pages = TRUE;
11532                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
11533                 short_info = NULL;
11534
11535                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11536                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11537                 }
11538         }
11539
11540         user_address = *address;
11541         user_max_depth = *nesting_depth;
11542
11543         if (not_in_kdp) {
11544                 vm_map_lock_read(map);
11545         }
11546
11547 recurse_again:
11548         curr_entry = NULL;
11549         curr_map = map;
11550         curr_address = user_address;
11551         curr_offset = 0;
11552         curr_skip = 0;
11553         curr_depth = 0;
11554         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11555         curr_max_below = curr_address;
11556
11557         next_entry = NULL;
11558         next_map = NULL;
11559         next_address = 0;
11560         next_offset = 0;
11561         next_skip = 0;
11562         next_depth = 0;
11563         next_max_above = (vm_map_offset_t) -1;
11564         next_max_below = (vm_map_offset_t) -1;
11565
11566         for (;;) {
11567                 if (vm_map_lookup_entry(curr_map,
11568                                         curr_address,
11569                                         &tmp_entry)) {
11570                         /* tmp_entry contains the address we're looking for */
11571                         curr_entry = tmp_entry;
11572                 } else {
11573                         vm_map_offset_t skip;
11574                         /*
11575                          * The address is not mapped.  "tmp_entry" is the
11576                          * map entry preceding the address.  We want the next
11577                          * one, if it exists.
11578                          */
11579                         curr_entry = tmp_entry->vme_next;
11580
11581                         if (curr_entry == vm_map_to_entry(curr_map) ||
11582                             (curr_entry->vme_start >=
11583                              curr_address + curr_max_above)) {
11584                                 /* no next entry at this level: stop looking */
11585                                 if (not_in_kdp) {
11586                                         vm_map_unlock_read(curr_map);
11587                                 }
11588                                 curr_entry = NULL;
11589                                 curr_map = NULL;
11590                                 curr_skip = 0;
11591                                 curr_offset = 0;
11592                                 curr_depth = 0;
11593                                 curr_max_above = 0;
11594                                 curr_max_below = 0;
11595                                 break;
11596                         }
11597
11598                         /* adjust current address and offset */
11599                         skip = curr_entry->vme_start - curr_address;
11600                         curr_address = curr_entry->vme_start;
11601                         curr_skip += skip;
11602                         curr_offset += skip;
11603                         curr_max_above -= skip;
11604                         curr_max_below = 0;
11605                 }
11606
11607                 /*
11608                  * Is the next entry at this level closer to the address (or
11609                  * deeper in the submap chain) than the one we had
11610                  * so far ?
11611                  */
11612                 tmp_entry = curr_entry->vme_next;
11613                 if (tmp_entry == vm_map_to_entry(curr_map)) {
11614                         /* no next entry at this level */
11615                 } else if (tmp_entry->vme_start >=
11616                            curr_address + curr_max_above) {
11617                         /*
11618                          * tmp_entry is beyond the scope of what we mapped of
11619                          * this submap in the upper level: ignore it.
11620                          */
11621                 } else if ((next_entry == NULL) ||
11622                            (tmp_entry->vme_start + curr_offset <=
11623                             next_entry->vme_start + next_offset)) {
11624                         /*
11625                          * We didn't have a "next_entry" or this one is
11626                          * closer to the address we're looking for:
11627                          * use this "tmp_entry" as the new "next_entry".
11628                          */
11629                         if (next_entry != NULL) {
11630                                 /* unlock the last "next_map" */
11631                                 if (next_map != curr_map && not_in_kdp) {
11632                                         vm_map_unlock_read(next_map);
11633                                 }
11634                         }
11635                         next_entry = tmp_entry;
11636                         next_map = curr_map;
11637                         next_depth = curr_depth;
11638                         next_address = next_entry->vme_start;
11639                         next_skip = curr_skip;
11640                         next_skip += (next_address - curr_address);
11641                         next_offset = curr_offset;
11642                         next_offset += (next_address - curr_address);
11643                         next_max_above = MIN(next_max_above, curr_max_above);
11644                         next_max_above = MIN(next_max_above,
11645                                              next_entry->vme_end - next_address);
11646                         next_max_below = MIN(next_max_below, curr_max_below);
11647                         next_max_below = MIN(next_max_below,
11648                                              next_address - next_entry->vme_start);
11649                 }
11650
11651                 /*
11652                  * "curr_max_{above,below}" allow us to keep track of the
11653                  * portion of the submap that is actually mapped at this level:
11654                  * the rest of that submap is irrelevant to us, since it's not
11655                  * mapped here.
11656                  * The relevant portion of the map starts at
11657                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
11658                  */
11659                 curr_max_above = MIN(curr_max_above,
11660                                      curr_entry->vme_end - curr_address);
11661                 curr_max_below = MIN(curr_max_below,
11662                                      curr_address - curr_entry->vme_start);
11663
11664                 if (!curr_entry->is_sub_map ||
11665                     curr_depth >= user_max_depth) {
11666                         /*
11667                          * We hit a leaf map or we reached the maximum depth
11668                          * we could, so stop looking.  Keep the current map
11669                          * locked.
11670                          */
11671                         break;
11672                 }
11673
11674                 /*
11675                  * Get down to the next submap level.
11676                  */
11677
11678                 /*
11679                  * Lock the next level and unlock the current level,
11680                  * unless we need to keep it locked to access the "next_entry"
11681                  * later.
11682                  */
11683                 if (not_in_kdp) {
11684                         vm_map_lock_read(VME_SUBMAP(curr_entry));
11685                 }
11686                 if (curr_map == next_map) {
11687                         /* keep "next_map" locked in case we need it */
11688                 } else {
11689                         /* release this map */
11690                         if (not_in_kdp)
11691                                 vm_map_unlock_read(curr_map);
11692                 }
11693
11694                 /*
11695                  * Adjust the offset.  "curr_entry" maps the submap
11696                  * at relative address "curr_entry->vme_start" in the
11697                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
11698                  * bytes of the submap.
11699                  * "curr_offset" always represents the offset of a virtual
11700                  * address in the curr_map relative to the absolute address
11701                  * space (i.e. the top-level VM map).
11702                  */
11703                 curr_offset +=
11704                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
11705                 curr_address = user_address + curr_offset;
11706                 /* switch to the submap */
11707                 curr_map = VME_SUBMAP(curr_entry);
11708                 curr_depth++;
11709                 curr_entry = NULL;
11710         }
11711
11712         if (curr_entry == NULL) {
11713                 /* no VM region contains the address... */
11714 #if DEVELOPMENT || DEBUG
11715                 if (vm_region_footprint && /* we want footprint numbers */
11716                     look_for_pages && /* & we want page counts */
11717                     next_entry == NULL && /* & there are no more regions */
11718                     /* & we haven't already provided our fake region: */
11719                     user_address == vm_map_last_entry(map)->vme_end) {
11720                         ledger_amount_t nonvol, nonvol_compressed;
11721                         /*
11722                          * Add a fake memory region to account for
11723                          * purgeable memory that counts towards this
11724                          * task's memory footprint, i.e. the resident
11725                          * compressed pages of non-volatile objects
11726                          * owned by that task.
11727                          */
11728                         ledger_get_balance(
11729                                 map->pmap->ledger,
11730                                 task_ledgers.purgeable_nonvolatile,
11731                                 &nonvol);
11732                         ledger_get_balance(
11733                                 map->pmap->ledger,
11734                                 task_ledgers.purgeable_nonvolatile_compressed,
11735                                 &nonvol_compressed);
11736                         if (nonvol + nonvol_compressed == 0) {
11737                                 /* no purgeable memory usage to report */
11738                                 return KERN_FAILURE;
11739                         }
11740                         /* fake region to show nonvolatile footprint */
11741                         submap_info->protection = VM_PROT_DEFAULT;
11742                         submap_info->max_protection = VM_PROT_DEFAULT;
11743                         submap_info->inheritance = VM_INHERIT_DEFAULT;
11744                         submap_info->offset = 0;
11745                         submap_info->user_tag = 0;
11746                         submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
11747                         submap_info->pages_shared_now_private = 0;
11748                         submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
11749                         submap_info->pages_dirtied = submap_info->pages_resident;
11750                         submap_info->ref_count = 1;
11751                         submap_info->shadow_depth = 0;
11752                         submap_info->external_pager = 0;
11753                         submap_info->share_mode = SM_PRIVATE;
11754                         submap_info->is_submap = 0;
11755                         submap_info->behavior = VM_BEHAVIOR_DEFAULT;
11756                         submap_info->object_id = 0x11111111;
11757                         submap_info->user_wired_count = 0;
11758                         submap_info->pages_reusable = 0;
11759                         *nesting_depth = 0;
11760                         *size = (vm_map_size_t) (nonvol + nonvol_compressed);
11761                         *address = user_address;
11762                         return KERN_SUCCESS;
11763                 }
11764 #endif /* DEVELOPMENT || DEBUG */
11765                 if (next_entry == NULL) {
11766                         /* ... and no VM region follows it either */
11767                         return KERN_INVALID_ADDRESS;
11768                 }
11769                 /* ... gather info about the next VM region */
11770                 curr_entry = next_entry;
11771                 curr_map = next_map;    /* still locked ... */
11772                 curr_address = next_address;
11773                 curr_skip = next_skip;
11774                 curr_offset = next_offset;
11775                 curr_depth = next_depth;
11776                 curr_max_above = next_max_above;
11777                 curr_max_below = next_max_below;
11778         } else {
11779                 /* we won't need "next_entry" after all */
11780                 if (next_entry != NULL) {
11781                         /* release "next_map" */
11782                         if (next_map != curr_map && not_in_kdp) {
11783                                 vm_map_unlock_read(next_map);
11784                         }
11785                 }
11786         }
11787         next_entry = NULL;
11788         next_map = NULL;
11789         next_offset = 0;
11790         next_skip = 0;
11791         next_depth = 0;
11792         next_max_below = -1;
11793         next_max_above = -1;
11794
11795         if (curr_entry->is_sub_map &&
11796             curr_depth < user_max_depth) {
11797                 /*
11798                  * We're not as deep as we could be:  we must have
11799                  * gone back up after not finding anything mapped
11800                  * below the original top-level map entry's.
11801                  * Let's move "curr_address" forward and recurse again.
11802                  */
11803                 user_address = curr_address;
11804                 goto recurse_again;
11805         }
11806
11807         *nesting_depth = curr_depth;
11808         *size = curr_max_above + curr_max_below;
11809         *address = user_address + curr_skip - curr_max_below;
11810
11811 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
11812 // so probably should be a real 32b ID vs. ptr.
11813 // Current users just check for equality
11814 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
11815
11816         if (look_for_pages) {
11817                 submap_info->user_tag = VME_ALIAS(curr_entry);
11818                 submap_info->offset = VME_OFFSET(curr_entry);
11819                 submap_info->protection = curr_entry->protection;
11820                 submap_info->inheritance = curr_entry->inheritance;
11821                 submap_info->max_protection = curr_entry->max_protection;
11822                 submap_info->behavior = curr_entry->behavior;
11823                 submap_info->user_wired_count = curr_entry->user_wired_count;
11824                 submap_info->is_submap = curr_entry->is_sub_map;
11825                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11826         } else {
11827                 short_info->user_tag = VME_ALIAS(curr_entry);
11828                 short_info->offset = VME_OFFSET(curr_entry);
11829                 short_info->protection = curr_entry->protection;
11830                 short_info->inheritance = curr_entry->inheritance;
11831                 short_info->max_protection = curr_entry->max_protection;
11832                 short_info->behavior = curr_entry->behavior;
11833                 short_info->user_wired_count = curr_entry->user_wired_count;
11834                 short_info->is_submap = curr_entry->is_sub_map;
11835                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11836         }
11837
11838         extended.pages_resident = 0;
11839         extended.pages_swapped_out = 0;
11840         extended.pages_shared_now_private = 0;
11841         extended.pages_dirtied = 0;
11842         extended.pages_reusable = 0;
11843         extended.external_pager = 0;
11844         extended.shadow_depth = 0;
11845         extended.share_mode = SM_EMPTY;
11846         extended.ref_count = 0;
11847
11848         if (not_in_kdp) {
11849                 if (!curr_entry->is_sub_map) {
11850                         vm_map_offset_t range_start, range_end;
11851                         range_start = MAX((curr_address - curr_max_below),
11852                                           curr_entry->vme_start);
11853                         range_end = MIN((curr_address + curr_max_above),
11854                                         curr_entry->vme_end);
11855                         vm_map_region_walk(curr_map,
11856                                            range_start,
11857                                            curr_entry,
11858                                            (VME_OFFSET(curr_entry) +
11859                                             (range_start -
11860                                              curr_entry->vme_start)),
11861                                            range_end - range_start,
11862                                            &extended,
11863                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
11864                         if (extended.external_pager &&
11865                             extended.ref_count == 2 &&
11866                             extended.share_mode == SM_SHARED) {
11867                                 extended.share_mode = SM_PRIVATE;
11868                         }
11869                 } else {
11870                         if (curr_entry->use_pmap) {
11871                                 extended.share_mode = SM_TRUESHARED;
11872                         } else {
11873                                 extended.share_mode = SM_PRIVATE;
11874                         }
11875                         extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
11876                 }
11877         }
11878
11879         if (look_for_pages) {
11880                 submap_info->pages_resident = extended.pages_resident;
11881                 submap_info->pages_swapped_out = extended.pages_swapped_out;
11882                 submap_info->pages_shared_now_private =
11883                         extended.pages_shared_now_private;
11884                 submap_info->pages_dirtied = extended.pages_dirtied;
11885                 submap_info->external_pager = extended.external_pager;
11886                 submap_info->shadow_depth = extended.shadow_depth;
11887                 submap_info->share_mode = extended.share_mode;
11888                 submap_info->ref_count = extended.ref_count;
11889
11890                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11891                         submap_info->pages_reusable = extended.pages_reusable;
11892                 }
11893         } else {
11894                 short_info->external_pager = extended.external_pager;
11895                 short_info->shadow_depth = extended.shadow_depth;
11896                 short_info->share_mode = extended.share_mode;
11897                 short_info->ref_count = extended.ref_count;
11898         }
11899
11900         if (not_in_kdp) {
11901                 vm_map_unlock_read(curr_map);
11902         }
11903
11904         return KERN_SUCCESS;
11905 }
11906
11907 /*
11908  *      vm_region:
11909  *
11910  *      User call to obtain information about a region in
11911  *      a task's address map. Currently, only one flavor is
11912  *      supported.
11913  *
11914  *      XXX The reserved and behavior fields cannot be filled
11915  *          in until the vm merge from the IK is completed, and
11916  *          vm_reserve is implemented.
11917  */
11918
11919 kern_return_t
11920 vm_map_region(
11921         vm_map_t                 map,
11922         vm_map_offset_t *address,               /* IN/OUT */
11923         vm_map_size_t           *size,                  /* OUT */
11924         vm_region_flavor_t       flavor,                /* IN */
11925         vm_region_info_t         info,                  /* OUT */
11926         mach_msg_type_number_t  *count, /* IN/OUT */
11927         mach_port_t             *object_name)           /* OUT */
11928 {
11929         vm_map_entry_t          tmp_entry;
11930         vm_map_entry_t          entry;
11931         vm_map_offset_t         start;
11932
11933         if (map == VM_MAP_NULL)
11934                 return(KERN_INVALID_ARGUMENT);
11935
11936         switch (flavor) {
11937
11938         case VM_REGION_BASIC_INFO:
11939                 /* legacy for old 32-bit objects info */
11940         {
11941                 vm_region_basic_info_t  basic;
11942
11943                 if (*count < VM_REGION_BASIC_INFO_COUNT)
11944                         return(KERN_INVALID_ARGUMENT);
11945
11946                 basic = (vm_region_basic_info_t) info;
11947                 *count = VM_REGION_BASIC_INFO_COUNT;
11948
11949                 vm_map_lock_read(map);
11950
11951                 start = *address;
11952                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11953                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11954                                 vm_map_unlock_read(map);
11955                                 return(KERN_INVALID_ADDRESS);
11956                         }
11957                 } else {
11958                         entry = tmp_entry;
11959                 }
11960
11961                 start = entry->vme_start;
11962
11963                 basic->offset = (uint32_t)VME_OFFSET(entry);
11964                 basic->protection = entry->protection;
11965                 basic->inheritance = entry->inheritance;
11966                 basic->max_protection = entry->max_protection;
11967                 basic->behavior = entry->behavior;
11968                 basic->user_wired_count = entry->user_wired_count;
11969                 basic->reserved = entry->is_sub_map;
11970                 *address = start;
11971                 *size = (entry->vme_end - start);
11972
11973                 if (object_name) *object_name = IP_NULL;
11974                 if (entry->is_sub_map) {
11975                         basic->shared = FALSE;
11976                 } else {
11977                         basic->shared = entry->is_shared;
11978                 }
11979
11980                 vm_map_unlock_read(map);
11981                 return(KERN_SUCCESS);
11982         }
11983
11984         case VM_REGION_BASIC_INFO_64:
11985         {
11986                 vm_region_basic_info_64_t       basic;
11987
11988                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11989                         return(KERN_INVALID_ARGUMENT);
11990
11991                 basic = (vm_region_basic_info_64_t) info;
11992                 *count = VM_REGION_BASIC_INFO_COUNT_64;
11993
11994                 vm_map_lock_read(map);
11995
11996                 start = *address;
11997                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11998                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11999                                 vm_map_unlock_read(map);
12000                                 return(KERN_INVALID_ADDRESS);
12001                         }
12002                 } else {
12003                         entry = tmp_entry;
12004                 }
12005
12006                 start = entry->vme_start;
12007
12008                 basic->offset = VME_OFFSET(entry);
12009                 basic->protection = entry->protection;
12010                 basic->inheritance = entry->inheritance;
12011                 basic->max_protection = entry->max_protection;
12012                 basic->behavior = entry->behavior;
12013                 basic->user_wired_count = entry->user_wired_count;
12014                 basic->reserved = entry->is_sub_map;
12015                 *address = start;
12016                 *size = (entry->vme_end - start);
12017
12018                 if (object_name) *object_name = IP_NULL;
12019                 if (entry->is_sub_map) {
12020                         basic->shared = FALSE;
12021                 } else {
12022                         basic->shared = entry->is_shared;
12023                 }
12024
12025                 vm_map_unlock_read(map);
12026                 return(KERN_SUCCESS);
12027         }
12028         case VM_REGION_EXTENDED_INFO:
12029                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
12030                         return(KERN_INVALID_ARGUMENT);
12031                 /*fallthru*/
12032         case VM_REGION_EXTENDED_INFO__legacy:
12033                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
12034                         return KERN_INVALID_ARGUMENT;
12035
12036         {
12037                 vm_region_extended_info_t       extended;
12038                 mach_msg_type_number_t original_count;
12039
12040                 extended = (vm_region_extended_info_t) info;
12041
12042                 vm_map_lock_read(map);
12043
12044                 start = *address;
12045                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12046                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12047                                 vm_map_unlock_read(map);
12048                                 return(KERN_INVALID_ADDRESS);
12049                         }
12050                 } else {
12051                         entry = tmp_entry;
12052                 }
12053                 start = entry->vme_start;
12054
12055                 extended->protection = entry->protection;
12056                 extended->user_tag = VME_ALIAS(entry);
12057                 extended->pages_resident = 0;
12058                 extended->pages_swapped_out = 0;
12059                 extended->pages_shared_now_private = 0;
12060                 extended->pages_dirtied = 0;
12061                 extended->external_pager = 0;
12062                 extended->shadow_depth = 0;
12063
12064                 original_count = *count;
12065                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
12066                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
12067                 } else {
12068                         extended->pages_reusable = 0;
12069                         *count = VM_REGION_EXTENDED_INFO_COUNT;
12070                 }
12071
12072                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
12073
12074                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
12075                         extended->share_mode = SM_PRIVATE;
12076
12077                 if (object_name)
12078                         *object_name = IP_NULL;
12079                 *address = start;
12080                 *size = (entry->vme_end - start);
12081
12082                 vm_map_unlock_read(map);
12083                 return(KERN_SUCCESS);
12084         }
12085         case VM_REGION_TOP_INFO:
12086         {
12087                 vm_region_top_info_t    top;
12088
12089                 if (*count < VM_REGION_TOP_INFO_COUNT)
12090                         return(KERN_INVALID_ARGUMENT);
12091
12092                 top = (vm_region_top_info_t) info;
12093                 *count = VM_REGION_TOP_INFO_COUNT;
12094
12095                 vm_map_lock_read(map);
12096
12097                 start = *address;
12098                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12099                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12100                                 vm_map_unlock_read(map);
12101                                 return(KERN_INVALID_ADDRESS);
12102                         }
12103                 } else {
12104                         entry = tmp_entry;
12105
12106                 }
12107                 start = entry->vme_start;
12108
12109                 top->private_pages_resident = 0;
12110                 top->shared_pages_resident = 0;
12111
12112                 vm_map_region_top_walk(entry, top);
12113
12114                 if (object_name)
12115                         *object_name = IP_NULL;
12116                 *address = start;
12117                 *size = (entry->vme_end - start);
12118
12119                 vm_map_unlock_read(map);
12120                 return(KERN_SUCCESS);
12121         }
12122         default:
12123                 return(KERN_INVALID_ARGUMENT);
12124         }
12125 }
12126
12127 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
12128         MIN((entry_size),                                               \
12129             ((obj)->all_reusable ?                                      \
12130              (obj)->wired_page_count :                                  \
12131              (obj)->resident_page_count - (obj)->reusable_page_count))
12132
12133 void
12134 vm_map_region_top_walk(
12135         vm_map_entry_t             entry,
12136         vm_region_top_info_t       top)
12137 {
12138
12139         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
12140                 top->share_mode = SM_EMPTY;
12141                 top->ref_count = 0;
12142                 top->obj_id = 0;
12143                 return;
12144         }
12145
12146         {
12147                 struct  vm_object *obj, *tmp_obj;
12148                 int             ref_count;
12149                 uint32_t        entry_size;
12150
12151                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
12152
12153                 obj = VME_OBJECT(entry);
12154
12155                 vm_object_lock(obj);
12156
12157                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12158                         ref_count--;
12159
12160                 assert(obj->reusable_page_count <= obj->resident_page_count);
12161                 if (obj->shadow) {
12162                         if (ref_count == 1)
12163                                 top->private_pages_resident =
12164                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12165                         else
12166                                 top->shared_pages_resident =
12167                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12168                         top->ref_count  = ref_count;
12169                         top->share_mode = SM_COW;
12170
12171                         while ((tmp_obj = obj->shadow)) {
12172                                 vm_object_lock(tmp_obj);
12173                                 vm_object_unlock(obj);
12174                                 obj = tmp_obj;
12175
12176                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12177                                         ref_count--;
12178
12179                                 assert(obj->reusable_page_count <= obj->resident_page_count);
12180                                 top->shared_pages_resident +=
12181                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12182                                 top->ref_count += ref_count - 1;
12183                         }
12184                 } else {
12185                         if (entry->superpage_size) {
12186                                 top->share_mode = SM_LARGE_PAGE;
12187                                 top->shared_pages_resident = 0;
12188                                 top->private_pages_resident = entry_size;
12189                         } else if (entry->needs_copy) {
12190                                 top->share_mode = SM_COW;
12191                                 top->shared_pages_resident =
12192                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12193                         } else {
12194                                 if (ref_count == 1 ||
12195                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
12196                                         top->share_mode = SM_PRIVATE;
12197                                                 top->private_pages_resident =
12198                                                         OBJ_RESIDENT_COUNT(obj,
12199                                                                            entry_size);
12200                                 } else {
12201                                         top->share_mode = SM_SHARED;
12202                                         top->shared_pages_resident =
12203                                                 OBJ_RESIDENT_COUNT(obj,
12204                                                                   entry_size);
12205                                 }
12206                         }
12207                         top->ref_count = ref_count;
12208                 }
12209                 /* XXX K64: obj_id will be truncated */
12210                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
12211
12212                 vm_object_unlock(obj);
12213         }
12214 }
12215
12216 void
12217 vm_map_region_walk(
12218         vm_map_t                        map,
12219         vm_map_offset_t                 va,
12220         vm_map_entry_t                  entry,
12221         vm_object_offset_t              offset,
12222         vm_object_size_t                range,
12223         vm_region_extended_info_t       extended,
12224         boolean_t                       look_for_pages,
12225         mach_msg_type_number_t count)
12226 {
12227         struct vm_object *obj, *tmp_obj;
12228         vm_map_offset_t       last_offset;
12229         int               i;
12230         int               ref_count;
12231         struct vm_object        *shadow_object;
12232         int                     shadow_depth;
12233
12234         if ((VME_OBJECT(entry) == 0) ||
12235             (entry->is_sub_map) ||
12236             (VME_OBJECT(entry)->phys_contiguous &&
12237              !entry->superpage_size)) {
12238                 extended->share_mode = SM_EMPTY;
12239                 extended->ref_count = 0;
12240                 return;
12241         }
12242
12243         if (entry->superpage_size) {
12244                 extended->shadow_depth = 0;
12245                 extended->share_mode = SM_LARGE_PAGE;
12246                 extended->ref_count = 1;
12247                 extended->external_pager = 0;
12248                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
12249                 extended->shadow_depth = 0;
12250                 return;
12251         }
12252
12253         obj = VME_OBJECT(entry);
12254
12255         vm_object_lock(obj);
12256
12257         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12258                 ref_count--;
12259
12260         if (look_for_pages) {
12261                 for (last_offset = offset + range;
12262                      offset < last_offset;
12263                      offset += PAGE_SIZE_64, va += PAGE_SIZE) {
12264 #if DEVELOPMENT || DEBUG
12265                         if (vm_region_footprint) {
12266                                 if (obj->purgable != VM_PURGABLE_DENY) {
12267                                         /* alternate accounting */
12268                                 } else if (entry->iokit_acct) {
12269                                         /* alternate accounting */
12270                                         extended->pages_resident++;
12271                                         extended->pages_dirtied++;
12272                                 } else {
12273                                         int disp;
12274
12275                                         disp = 0;
12276                                         pmap_query_page_info(map->pmap, va, &disp);
12277                                         if (disp & PMAP_QUERY_PAGE_PRESENT) {
12278                                                 extended->pages_resident++;
12279                                                 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
12280                                                         extended->pages_reusable++;
12281                                                 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
12282                                                            (disp & PMAP_QUERY_PAGE_ALTACCT)) {
12283                                                         /* alternate accounting */
12284                                                 } else {
12285                                                         extended->pages_dirtied++;
12286                                                 }
12287                                         } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
12288                                                 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
12289                                                         /* alternate accounting */
12290                                                 } else {
12291                                                         extended->pages_swapped_out++;
12292                                                 }
12293                                         }
12294                                 }
12295                                 continue;
12296                         }
12297 #endif /* DEVELOPMENT || DEBUG */
12298                         vm_map_region_look_for_page(map, va, obj,
12299                                                     offset, ref_count,
12300                                                     0, extended, count);
12301                 }
12302 #if DEVELOPMENT || DEBUG
12303                 if (vm_region_footprint) {
12304                         goto collect_object_info;
12305                 }
12306 #endif /* DEVELOPMENT || DEBUG */
12307         } else {
12308 #if DEVELOPMENT || DEBUG
12309         collect_object_info:
12310 #endif /* DEVELOPMENT || DEBUG */
12311                 shadow_object = obj->shadow;
12312                 shadow_depth = 0;
12313
12314                 if ( !(obj->pager_trusted) && !(obj->internal))
12315                         extended->external_pager = 1;
12316
12317                 if (shadow_object != VM_OBJECT_NULL) {
12318                         vm_object_lock(shadow_object);
12319                         for (;
12320                              shadow_object != VM_OBJECT_NULL;
12321                              shadow_depth++) {
12322                                 vm_object_t     next_shadow;
12323
12324                                 if ( !(shadow_object->pager_trusted) &&
12325                                      !(shadow_object->internal))
12326                                         extended->external_pager = 1;
12327
12328                                 next_shadow = shadow_object->shadow;
12329                                 if (next_shadow) {
12330                                         vm_object_lock(next_shadow);
12331                                 }
12332                                 vm_object_unlock(shadow_object);
12333                                 shadow_object = next_shadow;
12334                         }
12335                 }
12336                 extended->shadow_depth = shadow_depth;
12337         }
12338
12339         if (extended->shadow_depth || entry->needs_copy)
12340                 extended->share_mode = SM_COW;
12341         else {
12342                 if (ref_count == 1)
12343                         extended->share_mode = SM_PRIVATE;
12344                 else {
12345                         if (obj->true_share)
12346                                 extended->share_mode = SM_TRUESHARED;
12347                         else
12348                                 extended->share_mode = SM_SHARED;
12349                 }
12350         }
12351         extended->ref_count = ref_count - extended->shadow_depth;
12352
12353         for (i = 0; i < extended->shadow_depth; i++) {
12354                 if ((tmp_obj = obj->shadow) == 0)
12355                         break;
12356                 vm_object_lock(tmp_obj);
12357                 vm_object_unlock(obj);
12358
12359                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
12360                         ref_count--;
12361
12362                 extended->ref_count += ref_count;
12363                 obj = tmp_obj;
12364         }
12365         vm_object_unlock(obj);
12366
12367         if (extended->share_mode == SM_SHARED) {
12368                 vm_map_entry_t       cur;
12369                 vm_map_entry_t       last;
12370                 int      my_refs;
12371
12372                 obj = VME_OBJECT(entry);
12373                 last = vm_map_to_entry(map);
12374                 my_refs = 0;
12375
12376                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12377                         ref_count--;
12378                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
12379                         my_refs += vm_map_region_count_obj_refs(cur, obj);
12380
12381                 if (my_refs == ref_count)
12382                         extended->share_mode = SM_PRIVATE_ALIASED;
12383                 else if (my_refs > 1)
12384                         extended->share_mode = SM_SHARED_ALIASED;
12385         }
12386 }
12387
12388
12389 /* object is locked on entry and locked on return */
12390
12391
12392 static void
12393 vm_map_region_look_for_page(
12394         __unused vm_map_t               map,
12395         __unused vm_map_offset_t        va,
12396         vm_object_t                     object,
12397         vm_object_offset_t              offset,
12398         int                             max_refcnt,
12399         int                             depth,
12400         vm_region_extended_info_t       extended,
12401         mach_msg_type_number_t count)
12402 {
12403         vm_page_t       p;
12404         vm_object_t     shadow;
12405         int             ref_count;
12406         vm_object_t     caller_object;
12407
12408         shadow = object->shadow;
12409         caller_object = object;
12410
12411
12412         while (TRUE) {
12413
12414                 if ( !(object->pager_trusted) && !(object->internal))
12415                         extended->external_pager = 1;
12416
12417                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12418                         if (shadow && (max_refcnt == 1))
12419                                 extended->pages_shared_now_private++;
12420
12421                         if (!p->fictitious &&
12422                             (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
12423                                 extended->pages_dirtied++;
12424                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12425                                 if (p->reusable || object->all_reusable) {
12426                                         extended->pages_reusable++;
12427                                 }
12428                         }
12429
12430                         extended->pages_resident++;
12431
12432                         if(object != caller_object)
12433                                 vm_object_unlock(object);
12434
12435                         return;
12436                 }
12437                 if (object->internal &&
12438                     object->alive &&
12439                     !object->terminating &&
12440                     object->pager_ready) {
12441
12442                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
12443                             == VM_EXTERNAL_STATE_EXISTS) {
12444                                 /* the pager has that page */
12445                                 extended->pages_swapped_out++;
12446                                 if (object != caller_object)
12447                                         vm_object_unlock(object);
12448                                 return;
12449                         }
12450                 }
12451
12452                 if (shadow) {
12453                         vm_object_lock(shadow);
12454
12455                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12456                                 ref_count--;
12457
12458                         if (++depth > extended->shadow_depth)
12459                                 extended->shadow_depth = depth;
12460
12461                         if (ref_count > max_refcnt)
12462                                 max_refcnt = ref_count;
12463
12464                         if(object != caller_object)
12465                                 vm_object_unlock(object);
12466
12467                         offset = offset + object->vo_shadow_offset;
12468                         object = shadow;
12469                         shadow = object->shadow;
12470                         continue;
12471                 }
12472                 if(object != caller_object)
12473                         vm_object_unlock(object);
12474                 break;
12475         }
12476 }
12477
12478 static int
12479 vm_map_region_count_obj_refs(
12480         vm_map_entry_t    entry,
12481         vm_object_t       object)
12482 {
12483         int ref_count;
12484         vm_object_t chk_obj;
12485         vm_object_t tmp_obj;
12486
12487         if (VME_OBJECT(entry) == 0)
12488                 return(0);
12489
12490         if (entry->is_sub_map)
12491                 return(0);
12492         else {
12493                 ref_count = 0;
12494
12495                 chk_obj = VME_OBJECT(entry);
12496                 vm_object_lock(chk_obj);
12497
12498                 while (chk_obj) {
12499                         if (chk_obj == object)
12500                                 ref_count++;
12501                         tmp_obj = chk_obj->shadow;
12502                         if (tmp_obj)
12503                                 vm_object_lock(tmp_obj);
12504                         vm_object_unlock(chk_obj);
12505
12506                         chk_obj = tmp_obj;
12507                 }
12508         }
12509         return(ref_count);
12510 }
12511
12512
12513 /*
12514  *      Routine:        vm_map_simplify
12515  *
12516  *      Description:
12517  *              Attempt to simplify the map representation in
12518  *              the vicinity of the given starting address.
12519  *      Note:
12520  *              This routine is intended primarily to keep the
12521  *              kernel maps more compact -- they generally don't
12522  *              benefit from the "expand a map entry" technology
12523  *              at allocation time because the adjacent entry
12524  *              is often wired down.
12525  */
12526 void
12527 vm_map_simplify_entry(
12528         vm_map_t        map,
12529         vm_map_entry_t  this_entry)
12530 {
12531         vm_map_entry_t  prev_entry;
12532
12533         counter(c_vm_map_simplify_entry_called++);
12534
12535         prev_entry = this_entry->vme_prev;
12536
12537         if ((this_entry != vm_map_to_entry(map)) &&
12538             (prev_entry != vm_map_to_entry(map)) &&
12539
12540             (prev_entry->vme_end == this_entry->vme_start) &&
12541
12542             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
12543             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12544             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
12545                                     prev_entry->vme_start))
12546              == VME_OFFSET(this_entry)) &&
12547
12548             (prev_entry->behavior == this_entry->behavior) &&
12549             (prev_entry->needs_copy == this_entry->needs_copy) &&
12550             (prev_entry->protection == this_entry->protection) &&
12551             (prev_entry->max_protection == this_entry->max_protection) &&
12552             (prev_entry->inheritance == this_entry->inheritance) &&
12553             (prev_entry->use_pmap == this_entry->use_pmap) &&
12554             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
12555             (prev_entry->no_cache == this_entry->no_cache) &&
12556             (prev_entry->permanent == this_entry->permanent) &&
12557             (prev_entry->map_aligned == this_entry->map_aligned) &&
12558             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12559             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12560             /* from_reserved_zone: OK if that field doesn't match */
12561             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
12562             (prev_entry->vme_resilient_codesign ==
12563              this_entry->vme_resilient_codesign) &&
12564             (prev_entry->vme_resilient_media ==
12565              this_entry->vme_resilient_media) &&
12566
12567             (prev_entry->wired_count == this_entry->wired_count) &&
12568             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
12569
12570             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
12571             (prev_entry->in_transition == FALSE) &&
12572             (this_entry->in_transition == FALSE) &&
12573             (prev_entry->needs_wakeup == FALSE) &&
12574             (this_entry->needs_wakeup == FALSE) &&
12575             (prev_entry->is_shared == FALSE) &&
12576             (this_entry->is_shared == FALSE) &&
12577             (prev_entry->superpage_size == FALSE) &&
12578             (this_entry->superpage_size == FALSE)
12579                 ) {
12580                 vm_map_store_entry_unlink(map, prev_entry);
12581                 assert(prev_entry->vme_start < this_entry->vme_end);
12582                 if (prev_entry->map_aligned)
12583                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12584                                                    VM_MAP_PAGE_MASK(map)));
12585                 this_entry->vme_start = prev_entry->vme_start;
12586                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12587
12588                 if (map->holelistenabled) {
12589                         vm_map_store_update_first_free(map, this_entry, TRUE);
12590                 }
12591
12592                 if (prev_entry->is_sub_map) {
12593                         vm_map_deallocate(VME_SUBMAP(prev_entry));
12594                 } else {
12595                         vm_object_deallocate(VME_OBJECT(prev_entry));
12596                 }
12597                 vm_map_entry_dispose(map, prev_entry);
12598                 SAVE_HINT_MAP_WRITE(map, this_entry);
12599                 counter(c_vm_map_simplified++);
12600         }
12601 }
12602
12603 void
12604 vm_map_simplify(
12605         vm_map_t        map,
12606         vm_map_offset_t start)
12607 {
12608         vm_map_entry_t  this_entry;
12609
12610         vm_map_lock(map);
12611         if (vm_map_lookup_entry(map, start, &this_entry)) {
12612                 vm_map_simplify_entry(map, this_entry);
12613                 vm_map_simplify_entry(map, this_entry->vme_next);
12614         }
12615         counter(c_vm_map_simplify_called++);
12616         vm_map_unlock(map);
12617 }
12618
12619 static void
12620 vm_map_simplify_range(
12621         vm_map_t        map,
12622         vm_map_offset_t start,
12623         vm_map_offset_t end)
12624 {
12625         vm_map_entry_t  entry;
12626
12627         /*
12628          * The map should be locked (for "write") by the caller.
12629          */
12630
12631         if (start >= end) {
12632                 /* invalid address range */
12633                 return;
12634         }
12635
12636         start = vm_map_trunc_page(start,
12637                                   VM_MAP_PAGE_MASK(map));
12638         end = vm_map_round_page(end,
12639                                 VM_MAP_PAGE_MASK(map));
12640
12641         if (!vm_map_lookup_entry(map, start, &entry)) {
12642                 /* "start" is not mapped and "entry" ends before "start" */
12643                 if (entry == vm_map_to_entry(map)) {
12644                         /* start with first entry in the map */
12645                         entry = vm_map_first_entry(map);
12646                 } else {
12647                         /* start with next entry */
12648                         entry = entry->vme_next;
12649                 }
12650         }
12651
12652         while (entry != vm_map_to_entry(map) &&
12653                entry->vme_start <= end) {
12654                 /* try and coalesce "entry" with its previous entry */
12655                 vm_map_simplify_entry(map, entry);
12656                 entry = entry->vme_next;
12657         }
12658 }
12659
12660
12661 /*
12662  *      Routine:        vm_map_machine_attribute
12663  *      Purpose:
12664  *              Provide machine-specific attributes to mappings,
12665  *              such as cachability etc. for machines that provide
12666  *              them.  NUMA architectures and machines with big/strange
12667  *              caches will use this.
12668  *      Note:
12669  *              Responsibilities for locking and checking are handled here,
12670  *              everything else in the pmap module. If any non-volatile
12671  *              information must be kept, the pmap module should handle
12672  *              it itself. [This assumes that attributes do not
12673  *              need to be inherited, which seems ok to me]
12674  */
12675 kern_return_t
12676 vm_map_machine_attribute(
12677         vm_map_t                        map,
12678         vm_map_offset_t         start,
12679         vm_map_offset_t         end,
12680         vm_machine_attribute_t  attribute,
12681         vm_machine_attribute_val_t* value)              /* IN/OUT */
12682 {
12683         kern_return_t   ret;
12684         vm_map_size_t sync_size;
12685         vm_map_entry_t entry;
12686
12687         if (start < vm_map_min(map) || end > vm_map_max(map))
12688                 return KERN_INVALID_ADDRESS;
12689
12690         /* Figure how much memory we need to flush (in page increments) */
12691         sync_size = end - start;
12692
12693         vm_map_lock(map);
12694
12695         if (attribute != MATTR_CACHE) {
12696                 /* If we don't have to find physical addresses, we */
12697                 /* don't have to do an explicit traversal here.    */
12698                 ret = pmap_attribute(map->pmap, start, end-start,
12699                                      attribute, value);
12700                 vm_map_unlock(map);
12701                 return ret;
12702         }
12703
12704         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
12705
12706         while(sync_size) {
12707                 if (vm_map_lookup_entry(map, start, &entry)) {
12708                         vm_map_size_t   sub_size;
12709                         if((entry->vme_end - start) > sync_size) {
12710                                 sub_size = sync_size;
12711                                 sync_size = 0;
12712                         } else {
12713                                 sub_size = entry->vme_end - start;
12714                                 sync_size -= sub_size;
12715                         }
12716                         if(entry->is_sub_map) {
12717                                 vm_map_offset_t sub_start;
12718                                 vm_map_offset_t sub_end;
12719
12720                                 sub_start = (start - entry->vme_start)
12721                                         + VME_OFFSET(entry);
12722                                 sub_end = sub_start + sub_size;
12723                                 vm_map_machine_attribute(
12724                                         VME_SUBMAP(entry),
12725                                         sub_start,
12726                                         sub_end,
12727                                         attribute, value);
12728                         } else {
12729                                 if (VME_OBJECT(entry)) {
12730                                         vm_page_t               m;
12731                                         vm_object_t             object;
12732                                         vm_object_t             base_object;
12733                                         vm_object_t             last_object;
12734                                         vm_object_offset_t      offset;
12735                                         vm_object_offset_t      base_offset;
12736                                         vm_map_size_t           range;
12737                                         range = sub_size;
12738                                         offset = (start - entry->vme_start)
12739                                                 + VME_OFFSET(entry);
12740                                         base_offset = offset;
12741                                         object = VME_OBJECT(entry);
12742                                         base_object = object;
12743                                         last_object = NULL;
12744
12745                                         vm_object_lock(object);
12746
12747                                         while (range) {
12748                                                 m = vm_page_lookup(
12749                                                         object, offset);
12750
12751                                                 if (m && !m->fictitious) {
12752                                                         ret =
12753                                                                 pmap_attribute_cache_sync(
12754                                                                         VM_PAGE_GET_PHYS_PAGE(m),
12755                                                                         PAGE_SIZE,
12756                                                                         attribute, value);
12757
12758                                                 } else if (object->shadow) {
12759                                                         offset = offset + object->vo_shadow_offset;
12760                                                         last_object = object;
12761                                                         object = object->shadow;
12762                                                         vm_object_lock(last_object->shadow);
12763                                                         vm_object_unlock(last_object);
12764                                                         continue;
12765                                                 }
12766                                                 range -= PAGE_SIZE;
12767
12768                                                 if (base_object != object) {
12769                                                         vm_object_unlock(object);
12770                                                         vm_object_lock(base_object);
12771                                                         object = base_object;
12772                                                 }
12773                                                 /* Bump to the next page */
12774                                                 base_offset += PAGE_SIZE;
12775                                                 offset = base_offset;
12776                                         }
12777                                         vm_object_unlock(object);
12778                                 }
12779                         }
12780                         start += sub_size;
12781                 } else {
12782                         vm_map_unlock(map);
12783                         return KERN_FAILURE;
12784                 }
12785
12786         }
12787
12788         vm_map_unlock(map);
12789
12790         return ret;
12791 }
12792
12793 /*
12794  *      vm_map_behavior_set:
12795  *
12796  *      Sets the paging reference behavior of the specified address
12797  *      range in the target map.  Paging reference behavior affects
12798  *      how pagein operations resulting from faults on the map will be
12799  *      clustered.
12800  */
12801 kern_return_t
12802 vm_map_behavior_set(
12803         vm_map_t        map,
12804         vm_map_offset_t start,
12805         vm_map_offset_t end,
12806         vm_behavior_t   new_behavior)
12807 {
12808         vm_map_entry_t  entry;
12809         vm_map_entry_t  temp_entry;
12810
12811         XPR(XPR_VM_MAP,
12812             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
12813             map, start, end, new_behavior, 0);
12814
12815         if (start > end ||
12816             start < vm_map_min(map) ||
12817             end > vm_map_max(map)) {
12818                 return KERN_NO_SPACE;
12819         }
12820
12821         switch (new_behavior) {
12822
12823         /*
12824          * This first block of behaviors all set a persistent state on the specified
12825          * memory range.  All we have to do here is to record the desired behavior
12826          * in the vm_map_entry_t's.
12827          */
12828
12829         case VM_BEHAVIOR_DEFAULT:
12830         case VM_BEHAVIOR_RANDOM:
12831         case VM_BEHAVIOR_SEQUENTIAL:
12832         case VM_BEHAVIOR_RSEQNTL:
12833         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12834                 vm_map_lock(map);
12835
12836                 /*
12837                  *      The entire address range must be valid for the map.
12838                  *      Note that vm_map_range_check() does a
12839                  *      vm_map_lookup_entry() internally and returns the
12840                  *      entry containing the start of the address range if
12841                  *      the entire range is valid.
12842                  */
12843                 if (vm_map_range_check(map, start, end, &temp_entry)) {
12844                         entry = temp_entry;
12845                         vm_map_clip_start(map, entry, start);
12846                 }
12847                 else {
12848                         vm_map_unlock(map);
12849                         return(KERN_INVALID_ADDRESS);
12850                 }
12851
12852                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12853                         vm_map_clip_end(map, entry, end);
12854                         if (entry->is_sub_map) {
12855                                 assert(!entry->use_pmap);
12856                         }
12857
12858                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12859                                 entry->zero_wired_pages = TRUE;
12860                         } else {
12861                                 entry->behavior = new_behavior;
12862                         }
12863                         entry = entry->vme_next;
12864                 }
12865
12866                 vm_map_unlock(map);
12867                 break;
12868
12869         /*
12870          * The rest of these are different from the above in that they cause
12871          * an immediate action to take place as opposed to setting a behavior that
12872          * affects future actions.
12873          */
12874
12875         case VM_BEHAVIOR_WILLNEED:
12876                 return vm_map_willneed(map, start, end);
12877
12878         case VM_BEHAVIOR_DONTNEED:
12879                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12880
12881         case VM_BEHAVIOR_FREE:
12882                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12883
12884         case VM_BEHAVIOR_REUSABLE:
12885                 return vm_map_reusable_pages(map, start, end);
12886
12887         case VM_BEHAVIOR_REUSE:
12888                 return vm_map_reuse_pages(map, start, end);
12889
12890         case VM_BEHAVIOR_CAN_REUSE:
12891                 return vm_map_can_reuse(map, start, end);
12892
12893 #if MACH_ASSERT
12894         case VM_BEHAVIOR_PAGEOUT:
12895                 return vm_map_pageout(map, start, end);
12896 #endif /* MACH_ASSERT */
12897
12898         default:
12899                 return(KERN_INVALID_ARGUMENT);
12900         }
12901
12902         return(KERN_SUCCESS);
12903 }
12904
12905
12906 /*
12907  * Internals for madvise(MADV_WILLNEED) system call.
12908  *
12909  * The present implementation is to do a read-ahead if the mapping corresponds
12910  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
12911  * and basically ignore the "advice" (which we are always free to do).
12912  */
12913
12914
12915 static kern_return_t
12916 vm_map_willneed(
12917         vm_map_t        map,
12918         vm_map_offset_t start,
12919         vm_map_offset_t end
12920 )
12921 {
12922         vm_map_entry_t                  entry;
12923         vm_object_t                     object;
12924         memory_object_t                 pager;
12925         struct vm_object_fault_info     fault_info;
12926         kern_return_t                   kr;
12927         vm_object_size_t                len;
12928         vm_object_offset_t              offset;
12929
12930         /*
12931          * Fill in static values in fault_info.  Several fields get ignored by the code
12932          * we call, but we'll fill them in anyway since uninitialized fields are bad
12933          * when it comes to future backwards compatibility.
12934          */
12935
12936         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
12937         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
12938         fault_info.no_cache      = FALSE;                       /* ignored value */
12939         fault_info.stealth       = TRUE;
12940         fault_info.io_sync = FALSE;
12941         fault_info.cs_bypass = FALSE;
12942         fault_info.mark_zf_absent = FALSE;
12943         fault_info.batch_pmap_op = FALSE;
12944
12945         /*
12946          * The MADV_WILLNEED operation doesn't require any changes to the
12947          * vm_map_entry_t's, so the read lock is sufficient.
12948          */
12949
12950         vm_map_lock_read(map);
12951
12952         /*
12953          * The madvise semantics require that the address range be fully
12954          * allocated with no holes.  Otherwise, we're required to return
12955          * an error.
12956          */
12957
12958         if (! vm_map_range_check(map, start, end, &entry)) {
12959                 vm_map_unlock_read(map);
12960                 return KERN_INVALID_ADDRESS;
12961         }
12962
12963         /*
12964          * Examine each vm_map_entry_t in the range.
12965          */
12966         for (; entry != vm_map_to_entry(map) && start < end; ) {
12967
12968                 /*
12969                  * The first time through, the start address could be anywhere
12970                  * within the vm_map_entry we found.  So adjust the offset to
12971                  * correspond.  After that, the offset will always be zero to
12972                  * correspond to the beginning of the current vm_map_entry.
12973                  */
12974                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
12975
12976                 /*
12977                  * Set the length so we don't go beyond the end of the
12978                  * map_entry or beyond the end of the range we were given.
12979                  * This range could span also multiple map entries all of which
12980                  * map different files, so make sure we only do the right amount
12981                  * of I/O for each object.  Note that it's possible for there
12982                  * to be multiple map entries all referring to the same object
12983                  * but with different page permissions, but it's not worth
12984                  * trying to optimize that case.
12985                  */
12986                 len = MIN(entry->vme_end - start, end - start);
12987
12988                 if ((vm_size_t) len != len) {
12989                         /* 32-bit overflow */
12990                         len = (vm_size_t) (0 - PAGE_SIZE);
12991                 }
12992                 fault_info.cluster_size = (vm_size_t) len;
12993                 fault_info.lo_offset    = offset;
12994                 fault_info.hi_offset    = offset + len;
12995                 fault_info.user_tag     = VME_ALIAS(entry);
12996                 fault_info.pmap_options = 0;
12997                 if (entry->iokit_acct ||
12998                     (!entry->is_sub_map && !entry->use_pmap)) {
12999                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13000                 }
13001
13002                 /*
13003                  * If there's no read permission to this mapping, then just
13004                  * skip it.
13005                  */
13006                 if ((entry->protection & VM_PROT_READ) == 0) {
13007                         entry = entry->vme_next;
13008                         start = entry->vme_start;
13009                         continue;
13010                 }
13011
13012                 /*
13013                  * Find the file object backing this map entry.  If there is
13014                  * none, then we simply ignore the "will need" advice for this
13015                  * entry and go on to the next one.
13016                  */
13017                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
13018                         entry = entry->vme_next;
13019                         start = entry->vme_start;
13020                         continue;
13021                 }
13022
13023                 /*
13024                  * The data_request() could take a long time, so let's
13025                  * release the map lock to avoid blocking other threads.
13026                  */
13027                 vm_map_unlock_read(map);
13028
13029                 vm_object_paging_begin(object);
13030                 pager = object->pager;
13031                 vm_object_unlock(object);
13032
13033                 /*
13034                  * Get the data from the object asynchronously.
13035                  *
13036                  * Note that memory_object_data_request() places limits on the
13037                  * amount of I/O it will do.  Regardless of the len we
13038                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
13039                  * silently truncates the len to that size.  This isn't
13040                  * necessarily bad since madvise shouldn't really be used to
13041                  * page in unlimited amounts of data.  Other Unix variants
13042                  * limit the willneed case as well.  If this turns out to be an
13043                  * issue for developers, then we can always adjust the policy
13044                  * here and still be backwards compatible since this is all
13045                  * just "advice".
13046                  */
13047                 kr = memory_object_data_request(
13048                         pager,
13049                         offset + object->paging_offset,
13050                         0,      /* ignored */
13051                         VM_PROT_READ,
13052                         (memory_object_fault_info_t)&fault_info);
13053
13054                 vm_object_lock(object);
13055                 vm_object_paging_end(object);
13056                 vm_object_unlock(object);
13057
13058                 /*
13059                  * If we couldn't do the I/O for some reason, just give up on
13060                  * the madvise.  We still return success to the user since
13061                  * madvise isn't supposed to fail when the advice can't be
13062                  * taken.
13063                  */
13064                 if (kr != KERN_SUCCESS) {
13065                         return KERN_SUCCESS;
13066                 }
13067
13068                 start += len;
13069                 if (start >= end) {
13070                         /* done */
13071                         return KERN_SUCCESS;
13072                 }
13073
13074                 /* look up next entry */
13075                 vm_map_lock_read(map);
13076                 if (! vm_map_lookup_entry(map, start, &entry)) {
13077                         /*
13078                          * There's a new hole in the address range.
13079                          */
13080                         vm_map_unlock_read(map);
13081                         return KERN_INVALID_ADDRESS;
13082                 }
13083         }
13084
13085         vm_map_unlock_read(map);
13086         return KERN_SUCCESS;
13087 }
13088
13089 static boolean_t
13090 vm_map_entry_is_reusable(
13091         vm_map_entry_t entry)
13092 {
13093         /* Only user map entries */
13094
13095         vm_object_t object;
13096
13097         if (entry->is_sub_map) {
13098                 return FALSE;
13099         }
13100
13101         switch (VME_ALIAS(entry)) {
13102         case VM_MEMORY_MALLOC:
13103         case VM_MEMORY_MALLOC_SMALL:
13104         case VM_MEMORY_MALLOC_LARGE:
13105         case VM_MEMORY_REALLOC:
13106         case VM_MEMORY_MALLOC_TINY:
13107         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
13108         case VM_MEMORY_MALLOC_LARGE_REUSED:
13109                 /*
13110                  * This is a malloc() memory region: check if it's still
13111                  * in its original state and can be re-used for more
13112                  * malloc() allocations.
13113                  */
13114                 break;
13115         default:
13116                 /*
13117                  * Not a malloc() memory region: let the caller decide if
13118                  * it's re-usable.
13119                  */
13120                 return TRUE;
13121         }
13122
13123         if (entry->is_shared ||
13124             entry->is_sub_map ||
13125             entry->in_transition ||
13126             entry->protection != VM_PROT_DEFAULT ||
13127             entry->max_protection != VM_PROT_ALL ||
13128             entry->inheritance != VM_INHERIT_DEFAULT ||
13129             entry->no_cache ||
13130             entry->permanent ||
13131             entry->superpage_size != FALSE ||
13132             entry->zero_wired_pages ||
13133             entry->wired_count != 0 ||
13134             entry->user_wired_count != 0) {
13135                 return FALSE;
13136         }
13137
13138         object = VME_OBJECT(entry);
13139         if (object == VM_OBJECT_NULL) {
13140                 return TRUE;
13141         }
13142         if (
13143 #if 0
13144                 /*
13145                  * Let's proceed even if the VM object is potentially
13146                  * shared.
13147                  * We check for this later when processing the actual
13148                  * VM pages, so the contents will be safe if shared.
13149                  *
13150                  * But we can still mark this memory region as "reusable" to
13151                  * acknowledge that the caller did let us know that the memory
13152                  * could be re-used and should not be penalized for holding
13153                  * on to it.  This allows its "resident size" to not include
13154                  * the reusable range.
13155                  */
13156             object->ref_count == 1 &&
13157 #endif
13158             object->wired_page_count == 0 &&
13159             object->copy == VM_OBJECT_NULL &&
13160             object->shadow == VM_OBJECT_NULL &&
13161             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
13162             object->internal &&
13163             !object->true_share &&
13164             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
13165             !object->code_signed) {
13166                 return TRUE;
13167         }
13168         return FALSE;
13169
13170
13171 }
13172
13173 static kern_return_t
13174 vm_map_reuse_pages(
13175         vm_map_t        map,
13176         vm_map_offset_t start,
13177         vm_map_offset_t end)
13178 {
13179         vm_map_entry_t                  entry;
13180         vm_object_t                     object;
13181         vm_object_offset_t              start_offset, end_offset;
13182
13183         /*
13184          * The MADV_REUSE operation doesn't require any changes to the
13185          * vm_map_entry_t's, so the read lock is sufficient.
13186          */
13187
13188         vm_map_lock_read(map);
13189         assert(map->pmap != kernel_pmap);       /* protect alias access */
13190
13191         /*
13192          * The madvise semantics require that the address range be fully
13193          * allocated with no holes.  Otherwise, we're required to return
13194          * an error.
13195          */
13196
13197         if (!vm_map_range_check(map, start, end, &entry)) {
13198                 vm_map_unlock_read(map);
13199                 vm_page_stats_reusable.reuse_pages_failure++;
13200                 return KERN_INVALID_ADDRESS;
13201         }
13202
13203         /*
13204          * Examine each vm_map_entry_t in the range.
13205          */
13206         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13207              entry = entry->vme_next) {
13208                 /*
13209                  * Sanity check on the VM map entry.
13210                  */
13211                 if (! vm_map_entry_is_reusable(entry)) {
13212                         vm_map_unlock_read(map);
13213                         vm_page_stats_reusable.reuse_pages_failure++;
13214                         return KERN_INVALID_ADDRESS;
13215                 }
13216
13217                 /*
13218                  * The first time through, the start address could be anywhere
13219                  * within the vm_map_entry we found.  So adjust the offset to
13220                  * correspond.
13221                  */
13222                 if (entry->vme_start < start) {
13223                         start_offset = start - entry->vme_start;
13224                 } else {
13225                         start_offset = 0;
13226                 }
13227                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
13228                 start_offset += VME_OFFSET(entry);
13229                 end_offset += VME_OFFSET(entry);
13230
13231                 assert(!entry->is_sub_map);
13232                 object = VME_OBJECT(entry);
13233                 if (object != VM_OBJECT_NULL) {
13234                         vm_object_lock(object);
13235                         vm_object_reuse_pages(object, start_offset, end_offset,
13236                                               TRUE);
13237                         vm_object_unlock(object);
13238                 }
13239
13240                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
13241                         /*
13242                          * XXX
13243                          * We do not hold the VM map exclusively here.
13244                          * The "alias" field is not that critical, so it's
13245                          * safe to update it here, as long as it is the only
13246                          * one that can be modified while holding the VM map
13247                          * "shared".
13248                          */
13249                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
13250                 }
13251         }
13252
13253         vm_map_unlock_read(map);
13254         vm_page_stats_reusable.reuse_pages_success++;
13255         return KERN_SUCCESS;
13256 }
13257
13258
13259 static kern_return_t
13260 vm_map_reusable_pages(
13261         vm_map_t        map,
13262         vm_map_offset_t start,
13263         vm_map_offset_t end)
13264 {
13265         vm_map_entry_t                  entry;
13266         vm_object_t                     object;
13267         vm_object_offset_t              start_offset, end_offset;
13268         vm_map_offset_t                 pmap_offset;
13269
13270         /*
13271          * The MADV_REUSABLE operation doesn't require any changes to the
13272          * vm_map_entry_t's, so the read lock is sufficient.
13273          */
13274
13275         vm_map_lock_read(map);
13276         assert(map->pmap != kernel_pmap);       /* protect alias access */
13277
13278         /*
13279          * The madvise semantics require that the address range be fully
13280          * allocated with no holes.  Otherwise, we're required to return
13281          * an error.
13282          */
13283
13284         if (!vm_map_range_check(map, start, end, &entry)) {
13285                 vm_map_unlock_read(map);
13286                 vm_page_stats_reusable.reusable_pages_failure++;
13287                 return KERN_INVALID_ADDRESS;
13288         }
13289
13290         /*
13291          * Examine each vm_map_entry_t in the range.
13292          */
13293         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13294              entry = entry->vme_next) {
13295                 int kill_pages = 0;
13296
13297                 /*
13298                  * Sanity check on the VM map entry.
13299                  */
13300                 if (! vm_map_entry_is_reusable(entry)) {
13301                         vm_map_unlock_read(map);
13302                         vm_page_stats_reusable.reusable_pages_failure++;
13303                         return KERN_INVALID_ADDRESS;
13304                 }
13305
13306                 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
13307                         /* not writable: can't discard contents */
13308                         vm_map_unlock_read(map);
13309                         vm_page_stats_reusable.reusable_nonwritable++;
13310                         vm_page_stats_reusable.reusable_pages_failure++;
13311                         return KERN_PROTECTION_FAILURE;
13312                 }
13313
13314                 /*
13315                  * The first time through, the start address could be anywhere
13316                  * within the vm_map_entry we found.  So adjust the offset to
13317                  * correspond.
13318                  */
13319                 if (entry->vme_start < start) {
13320                         start_offset = start - entry->vme_start;
13321                         pmap_offset = start;
13322                 } else {
13323                         start_offset = 0;
13324                         pmap_offset = entry->vme_start;
13325                 }
13326                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
13327                 start_offset += VME_OFFSET(entry);
13328                 end_offset += VME_OFFSET(entry);
13329
13330                 assert(!entry->is_sub_map);
13331                 object = VME_OBJECT(entry);
13332                 if (object == VM_OBJECT_NULL)
13333                         continue;
13334
13335
13336                 vm_object_lock(object);
13337                 if (((object->ref_count == 1) ||
13338                      (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
13339                       object->copy == VM_OBJECT_NULL)) &&
13340                     object->shadow == VM_OBJECT_NULL &&
13341                     /*
13342                      * "iokit_acct" entries are billed for their virtual size
13343                      * (rather than for their resident pages only), so they
13344                      * wouldn't benefit from making pages reusable, and it
13345                      * would be hard to keep track of pages that are both
13346                      * "iokit_acct" and "reusable" in the pmap stats and
13347                      * ledgers.
13348                      */
13349                     !(entry->iokit_acct ||
13350                       (!entry->is_sub_map && !entry->use_pmap))) {
13351                         if (object->ref_count != 1) {
13352                                 vm_page_stats_reusable.reusable_shared++;
13353                         }
13354                         kill_pages = 1;
13355                 } else {
13356                         kill_pages = -1;
13357                 }
13358                 if (kill_pages != -1) {
13359                         vm_object_deactivate_pages(object,
13360                                                    start_offset,
13361                                                    end_offset - start_offset,
13362                                                    kill_pages,
13363                                                    TRUE /*reusable_pages*/,
13364                                                    map->pmap,
13365                                                    pmap_offset);
13366                 } else {
13367                         vm_page_stats_reusable.reusable_pages_shared++;
13368                 }
13369                 vm_object_unlock(object);
13370
13371                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13372                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
13373                         /*
13374                          * XXX
13375                          * We do not hold the VM map exclusively here.
13376                          * The "alias" field is not that critical, so it's
13377                          * safe to update it here, as long as it is the only
13378                          * one that can be modified while holding the VM map
13379                          * "shared".
13380                          */
13381                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
13382                 }
13383         }
13384
13385         vm_map_unlock_read(map);
13386         vm_page_stats_reusable.reusable_pages_success++;
13387         return KERN_SUCCESS;
13388 }
13389
13390
13391 static kern_return_t
13392 vm_map_can_reuse(
13393         vm_map_t        map,
13394         vm_map_offset_t start,
13395         vm_map_offset_t end)
13396 {
13397         vm_map_entry_t                  entry;
13398
13399         /*
13400          * The MADV_REUSABLE operation doesn't require any changes to the
13401          * vm_map_entry_t's, so the read lock is sufficient.
13402          */
13403
13404         vm_map_lock_read(map);
13405         assert(map->pmap != kernel_pmap);       /* protect alias access */
13406
13407         /*
13408          * The madvise semantics require that the address range be fully
13409          * allocated with no holes.  Otherwise, we're required to return
13410          * an error.
13411          */
13412
13413         if (!vm_map_range_check(map, start, end, &entry)) {
13414                 vm_map_unlock_read(map);
13415                 vm_page_stats_reusable.can_reuse_failure++;
13416                 return KERN_INVALID_ADDRESS;
13417         }
13418
13419         /*
13420          * Examine each vm_map_entry_t in the range.
13421          */
13422         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13423              entry = entry->vme_next) {
13424                 /*
13425                  * Sanity check on the VM map entry.
13426                  */
13427                 if (! vm_map_entry_is_reusable(entry)) {
13428                         vm_map_unlock_read(map);
13429                         vm_page_stats_reusable.can_reuse_failure++;
13430                         return KERN_INVALID_ADDRESS;
13431                 }
13432         }
13433
13434         vm_map_unlock_read(map);
13435         vm_page_stats_reusable.can_reuse_success++;
13436         return KERN_SUCCESS;
13437 }
13438
13439
13440 #if MACH_ASSERT
13441 static kern_return_t
13442 vm_map_pageout(
13443         vm_map_t        map,
13444         vm_map_offset_t start,
13445         vm_map_offset_t end)
13446 {
13447         vm_map_entry_t                  entry;
13448
13449         /*
13450          * The MADV_PAGEOUT operation doesn't require any changes to the
13451          * vm_map_entry_t's, so the read lock is sufficient.
13452          */
13453
13454         vm_map_lock_read(map);
13455
13456         /*
13457          * The madvise semantics require that the address range be fully
13458          * allocated with no holes.  Otherwise, we're required to return
13459          * an error.
13460          */
13461
13462         if (!vm_map_range_check(map, start, end, &entry)) {
13463                 vm_map_unlock_read(map);
13464                 return KERN_INVALID_ADDRESS;
13465         }
13466
13467         /*
13468          * Examine each vm_map_entry_t in the range.
13469          */
13470         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13471              entry = entry->vme_next) {
13472                 vm_object_t     object;
13473
13474                 /*
13475                  * Sanity check on the VM map entry.
13476                  */
13477                 if (entry->is_sub_map) {
13478                         vm_map_t submap;
13479                         vm_map_offset_t submap_start;
13480                         vm_map_offset_t submap_end;
13481                         vm_map_entry_t submap_entry;
13482
13483                         submap = VME_SUBMAP(entry);
13484                         submap_start = VME_OFFSET(entry);
13485                         submap_end = submap_start + (entry->vme_end -
13486                                                      entry->vme_start);
13487
13488                         vm_map_lock_read(submap);
13489
13490                         if (! vm_map_range_check(submap,
13491                                                  submap_start,
13492                                                  submap_end,
13493                                                  &submap_entry)) {
13494                                 vm_map_unlock_read(submap);
13495                                 vm_map_unlock_read(map);
13496                                 return KERN_INVALID_ADDRESS;
13497                         }
13498
13499                         object = VME_OBJECT(submap_entry);
13500                         if (submap_entry->is_sub_map ||
13501                             object == VM_OBJECT_NULL ||
13502                             !object->internal) {
13503                                 vm_map_unlock_read(submap);
13504                                 continue;
13505                         }
13506
13507                         vm_object_pageout(object);
13508
13509                         vm_map_unlock_read(submap);
13510                         submap = VM_MAP_NULL;
13511                         submap_entry = VM_MAP_ENTRY_NULL;
13512                         continue;
13513                 }
13514
13515                 object = VME_OBJECT(entry);
13516                 if (entry->is_sub_map ||
13517                     object == VM_OBJECT_NULL ||
13518                     !object->internal) {
13519                         continue;
13520                 }
13521
13522                 vm_object_pageout(object);
13523         }
13524
13525         vm_map_unlock_read(map);
13526         return KERN_SUCCESS;
13527 }
13528 #endif /* MACH_ASSERT */
13529
13530
13531 /*
13532  *      Routine:        vm_map_entry_insert
13533  *
13534  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
13535  */
13536 vm_map_entry_t
13537 vm_map_entry_insert(
13538         vm_map_t                map,
13539         vm_map_entry_t          insp_entry,
13540         vm_map_offset_t         start,
13541         vm_map_offset_t         end,
13542         vm_object_t             object,
13543         vm_object_offset_t      offset,
13544         boolean_t               needs_copy,
13545         boolean_t               is_shared,
13546         boolean_t               in_transition,
13547         vm_prot_t               cur_protection,
13548         vm_prot_t               max_protection,
13549         vm_behavior_t           behavior,
13550         vm_inherit_t            inheritance,
13551         unsigned                wired_count,
13552         boolean_t               no_cache,
13553         boolean_t               permanent,
13554         unsigned int            superpage_size,
13555         boolean_t               clear_map_aligned,
13556         boolean_t               is_submap)
13557 {
13558         vm_map_entry_t  new_entry;
13559
13560         assert(insp_entry != (vm_map_entry_t)0);
13561
13562         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
13563
13564         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13565                 new_entry->map_aligned = TRUE;
13566         } else {
13567                 new_entry->map_aligned = FALSE;
13568         }
13569         if (clear_map_aligned &&
13570             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13571              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
13572                 new_entry->map_aligned = FALSE;
13573         }
13574
13575         new_entry->vme_start = start;
13576         new_entry->vme_end = end;
13577         assert(page_aligned(new_entry->vme_start));
13578         assert(page_aligned(new_entry->vme_end));
13579         if (new_entry->map_aligned) {
13580                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13581                                            VM_MAP_PAGE_MASK(map)));
13582                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13583                                            VM_MAP_PAGE_MASK(map)));
13584         }
13585         assert(new_entry->vme_start < new_entry->vme_end);
13586
13587         VME_OBJECT_SET(new_entry, object);
13588         VME_OFFSET_SET(new_entry, offset);
13589         new_entry->is_shared = is_shared;
13590         new_entry->is_sub_map = is_submap;
13591         new_entry->needs_copy = needs_copy;
13592         new_entry->in_transition = in_transition;
13593         new_entry->needs_wakeup = FALSE;
13594         new_entry->inheritance = inheritance;
13595         new_entry->protection = cur_protection;
13596         new_entry->max_protection = max_protection;
13597         new_entry->behavior = behavior;
13598         new_entry->wired_count = wired_count;
13599         new_entry->user_wired_count = 0;
13600         if (is_submap) {
13601                 /*
13602                  * submap: "use_pmap" means "nested".
13603                  * default: false.
13604                  */
13605                 new_entry->use_pmap = FALSE;
13606         } else {
13607                 /*
13608                  * object: "use_pmap" means "use pmap accounting" for footprint.
13609                  * default: true.
13610                  */
13611                 new_entry->use_pmap = TRUE;
13612         }
13613         VME_ALIAS_SET(new_entry, 0);
13614         new_entry->zero_wired_pages = FALSE;
13615         new_entry->no_cache = no_cache;
13616         new_entry->permanent = permanent;
13617         if (superpage_size)
13618                 new_entry->superpage_size = TRUE;
13619         else
13620                 new_entry->superpage_size = FALSE;
13621         new_entry->used_for_jit = FALSE;
13622         new_entry->iokit_acct = FALSE;
13623         new_entry->vme_resilient_codesign = FALSE;
13624         new_entry->vme_resilient_media = FALSE;
13625         new_entry->vme_atomic = FALSE;
13626
13627         /*
13628          *      Insert the new entry into the list.
13629          */
13630
13631         vm_map_store_entry_link(map, insp_entry, new_entry);
13632         map->size += end - start;
13633
13634         /*
13635          *      Update the free space hint and the lookup hint.
13636          */
13637
13638         SAVE_HINT_MAP_WRITE(map, new_entry);
13639         return new_entry;
13640 }
13641
13642 /*
13643  *      Routine:        vm_map_remap_extract
13644  *
13645  *      Descritpion:    This routine returns a vm_entry list from a map.
13646  */
13647 static kern_return_t
13648 vm_map_remap_extract(
13649         vm_map_t                map,
13650         vm_map_offset_t         addr,
13651         vm_map_size_t           size,
13652         boolean_t               copy,
13653         struct vm_map_header    *map_header,
13654         vm_prot_t               *cur_protection,
13655         vm_prot_t               *max_protection,
13656         /* What, no behavior? */
13657         vm_inherit_t            inheritance,
13658         boolean_t               pageable,
13659         boolean_t               same_map)
13660 {
13661         kern_return_t           result;
13662         vm_map_size_t           mapped_size;
13663         vm_map_size_t           tmp_size;
13664         vm_map_entry_t          src_entry;     /* result of last map lookup */
13665         vm_map_entry_t          new_entry;
13666         vm_object_offset_t      offset;
13667         vm_map_offset_t         map_address;
13668         vm_map_offset_t         src_start;     /* start of entry to map */
13669         vm_map_offset_t         src_end;       /* end of region to be mapped */
13670         vm_object_t             object;
13671         vm_map_version_t        version;
13672         boolean_t               src_needs_copy;
13673         boolean_t               new_entry_needs_copy;
13674
13675         assert(map != VM_MAP_NULL);
13676         assert(size != 0);
13677         assert(size == vm_map_round_page(size, PAGE_MASK));
13678         assert(inheritance == VM_INHERIT_NONE ||
13679                inheritance == VM_INHERIT_COPY ||
13680                inheritance == VM_INHERIT_SHARE);
13681
13682         /*
13683          *      Compute start and end of region.
13684          */
13685         src_start = vm_map_trunc_page(addr, PAGE_MASK);
13686         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13687
13688
13689         /*
13690          *      Initialize map_header.
13691          */
13692         map_header->links.next = (struct vm_map_entry *)&map_header->links;
13693         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13694         map_header->nentries = 0;
13695         map_header->entries_pageable = pageable;
13696         map_header->page_shift = PAGE_SHIFT;
13697
13698         vm_map_store_init( map_header );
13699
13700         *cur_protection = VM_PROT_ALL;
13701         *max_protection = VM_PROT_ALL;
13702
13703         map_address = 0;
13704         mapped_size = 0;
13705         result = KERN_SUCCESS;
13706
13707         /*
13708          *      The specified source virtual space might correspond to
13709          *      multiple map entries, need to loop on them.
13710          */
13711         vm_map_lock(map);
13712         while (mapped_size != size) {
13713                 vm_map_size_t   entry_size;
13714
13715                 /*
13716                  *      Find the beginning of the region.
13717                  */
13718                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13719                         result = KERN_INVALID_ADDRESS;
13720                         break;
13721                 }
13722
13723                 if (src_start < src_entry->vme_start ||
13724                     (mapped_size && src_start != src_entry->vme_start)) {
13725                         result = KERN_INVALID_ADDRESS;
13726                         break;
13727                 }
13728
13729                 tmp_size = size - mapped_size;
13730                 if (src_end > src_entry->vme_end)
13731                         tmp_size -= (src_end - src_entry->vme_end);
13732
13733                 entry_size = (vm_map_size_t)(src_entry->vme_end -
13734                                              src_entry->vme_start);
13735
13736                 if(src_entry->is_sub_map) {
13737                         vm_map_reference(VME_SUBMAP(src_entry));
13738                         object = VM_OBJECT_NULL;
13739                 } else {
13740                         object = VME_OBJECT(src_entry);
13741                         if (src_entry->iokit_acct) {
13742                                 /*
13743                                  * This entry uses "IOKit accounting".
13744                                  */
13745                         } else if (object != VM_OBJECT_NULL &&
13746                                    object->purgable != VM_PURGABLE_DENY) {
13747                                 /*
13748                                  * Purgeable objects have their own accounting:
13749                                  * no pmap accounting for them.
13750                                  */
13751                                 assert(!src_entry->use_pmap);
13752                         } else {
13753                                 /*
13754                                  * Not IOKit or purgeable:
13755                                  * must be accounted by pmap stats.
13756                                  */
13757                                 assert(src_entry->use_pmap);
13758                         }
13759
13760                         if (object == VM_OBJECT_NULL) {
13761                                 object = vm_object_allocate(entry_size);
13762                                 VME_OFFSET_SET(src_entry, 0);
13763                                 VME_OBJECT_SET(src_entry, object);
13764                         } else if (object->copy_strategy !=
13765                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
13766                                 /*
13767                                  *      We are already using an asymmetric
13768                                  *      copy, and therefore we already have
13769                                  *      the right object.
13770                                  */
13771                                 assert(!src_entry->needs_copy);
13772                         } else if (src_entry->needs_copy || object->shadowed ||
13773                                    (object->internal && !object->true_share &&
13774                                     !src_entry->is_shared &&
13775                                     object->vo_size > entry_size)) {
13776
13777                                 VME_OBJECT_SHADOW(src_entry, entry_size);
13778
13779                                 if (!src_entry->needs_copy &&
13780                                     (src_entry->protection & VM_PROT_WRITE)) {
13781                                         vm_prot_t prot;
13782
13783                                         prot = src_entry->protection & ~VM_PROT_WRITE;
13784
13785                                         if (override_nx(map,
13786                                                         VME_ALIAS(src_entry))
13787                                             && prot)
13788                                                 prot |= VM_PROT_EXECUTE;
13789
13790                                         if(map->mapped_in_other_pmaps) {
13791                                                 vm_object_pmap_protect(
13792                                                         VME_OBJECT(src_entry),
13793                                                         VME_OFFSET(src_entry),
13794                                                         entry_size,
13795                                                         PMAP_NULL,
13796                                                         src_entry->vme_start,
13797                                                         prot);
13798                                         } else {
13799                                                 pmap_protect(vm_map_pmap(map),
13800                                                              src_entry->vme_start,
13801                                                              src_entry->vme_end,
13802                                                              prot);
13803                                         }
13804                                 }
13805
13806                                 object = VME_OBJECT(src_entry);
13807                                 src_entry->needs_copy = FALSE;
13808                         }
13809
13810
13811                         vm_object_lock(object);
13812                         vm_object_reference_locked(object); /* object ref. for new entry */
13813                         if (object->copy_strategy ==
13814                             MEMORY_OBJECT_COPY_SYMMETRIC) {
13815                                 object->copy_strategy =
13816                                         MEMORY_OBJECT_COPY_DELAY;
13817                         }
13818                         vm_object_unlock(object);
13819                 }
13820
13821                 offset = (VME_OFFSET(src_entry) +
13822                           (src_start - src_entry->vme_start));
13823
13824                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
13825                 vm_map_entry_copy(new_entry, src_entry);
13826                 if (new_entry->is_sub_map) {
13827                         /* clr address space specifics */
13828                         new_entry->use_pmap = FALSE;
13829                 }
13830
13831                 new_entry->map_aligned = FALSE;
13832
13833                 new_entry->vme_start = map_address;
13834                 new_entry->vme_end = map_address + tmp_size;
13835                 assert(new_entry->vme_start < new_entry->vme_end);
13836                 new_entry->inheritance = inheritance;
13837                 VME_OFFSET_SET(new_entry, offset);
13838
13839                 /*
13840                  * The new region has to be copied now if required.
13841                  */
13842         RestartCopy:
13843                 if (!copy) {
13844                         /*
13845                          * Cannot allow an entry describing a JIT
13846                          * region to be shared across address spaces.
13847                          */
13848                         if (src_entry->used_for_jit == TRUE && !same_map) {
13849                                 result = KERN_INVALID_ARGUMENT;
13850                                 break;
13851                         }
13852                         src_entry->is_shared = TRUE;
13853                         new_entry->is_shared = TRUE;
13854                         if (!(new_entry->is_sub_map))
13855                                 new_entry->needs_copy = FALSE;
13856
13857                 } else if (src_entry->is_sub_map) {
13858                         /* make this a COW sub_map if not already */
13859                         assert(new_entry->wired_count == 0);
13860                         new_entry->needs_copy = TRUE;
13861                         object = VM_OBJECT_NULL;
13862                 } else if (src_entry->wired_count == 0 &&
13863                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
13864                                                   VME_OFFSET(new_entry),
13865                                                   (new_entry->vme_end -
13866                                                    new_entry->vme_start),
13867                                                   &src_needs_copy,
13868                                                   &new_entry_needs_copy)) {
13869
13870                         new_entry->needs_copy = new_entry_needs_copy;
13871                         new_entry->is_shared = FALSE;
13872
13873                         /*
13874                          * Handle copy_on_write semantics.
13875                          */
13876                         if (src_needs_copy && !src_entry->needs_copy) {
13877                                 vm_prot_t prot;
13878
13879                                 prot = src_entry->protection & ~VM_PROT_WRITE;
13880
13881                                 if (override_nx(map,
13882                                                 VME_ALIAS(src_entry))
13883                                     && prot)
13884                                         prot |= VM_PROT_EXECUTE;
13885
13886                                 vm_object_pmap_protect(object,
13887                                                        offset,
13888                                                        entry_size,
13889                                                        ((src_entry->is_shared
13890                                                          || map->mapped_in_other_pmaps) ?
13891                                                         PMAP_NULL : map->pmap),
13892                                                        src_entry->vme_start,
13893                                                        prot);
13894
13895                                 assert(src_entry->wired_count == 0);
13896                                 src_entry->needs_copy = TRUE;
13897                         }
13898                         /*
13899                          * Throw away the old object reference of the new entry.
13900                          */
13901                         vm_object_deallocate(object);
13902
13903                 } else {
13904                         new_entry->is_shared = FALSE;
13905
13906                         /*
13907                          * The map can be safely unlocked since we
13908                          * already hold a reference on the object.
13909                          *
13910                          * Record the timestamp of the map for later
13911                          * verification, and unlock the map.
13912                          */
13913                         version.main_timestamp = map->timestamp;
13914                         vm_map_unlock(map);     /* Increments timestamp once! */
13915
13916                         /*
13917                          * Perform the copy.
13918                          */
13919                         if (src_entry->wired_count > 0) {
13920                                 vm_object_lock(object);
13921                                 result = vm_object_copy_slowly(
13922                                         object,
13923                                         offset,
13924                                         entry_size,
13925                                         THREAD_UNINT,
13926                                         &VME_OBJECT(new_entry));
13927
13928                                 VME_OFFSET_SET(new_entry, 0);
13929                                 new_entry->needs_copy = FALSE;
13930                         } else {
13931                                 vm_object_offset_t new_offset;
13932
13933                                 new_offset = VME_OFFSET(new_entry);
13934                                 result = vm_object_copy_strategically(
13935                                         object,
13936                                         offset,
13937                                         entry_size,
13938                                         &VME_OBJECT(new_entry),
13939                                         &new_offset,
13940                                         &new_entry_needs_copy);
13941                                 if (new_offset != VME_OFFSET(new_entry)) {
13942                                         VME_OFFSET_SET(new_entry, new_offset);
13943                                 }
13944
13945                                 new_entry->needs_copy = new_entry_needs_copy;
13946                         }
13947
13948                         /*
13949                          * Throw away the old object reference of the new entry.
13950                          */
13951                         vm_object_deallocate(object);
13952
13953                         if (result != KERN_SUCCESS &&
13954                             result != KERN_MEMORY_RESTART_COPY) {
13955                                 _vm_map_entry_dispose(map_header, new_entry);
13956                                 vm_map_lock(map);
13957                                 break;
13958                         }
13959
13960                         /*
13961                          * Verify that the map has not substantially
13962                          * changed while the copy was being made.
13963                          */
13964
13965                         vm_map_lock(map);
13966                         if (version.main_timestamp + 1 != map->timestamp) {
13967                                 /*
13968                                  * Simple version comparison failed.
13969                                  *
13970                                  * Retry the lookup and verify that the
13971                                  * same object/offset are still present.
13972                                  */
13973                                 vm_object_deallocate(VME_OBJECT(new_entry));
13974                                 _vm_map_entry_dispose(map_header, new_entry);
13975                                 if (result == KERN_MEMORY_RESTART_COPY)
13976                                         result = KERN_SUCCESS;
13977                                 continue;
13978                         }
13979
13980                         if (result == KERN_MEMORY_RESTART_COPY) {
13981                                 vm_object_reference(object);
13982                                 goto RestartCopy;
13983                         }
13984                 }
13985
13986                 _vm_map_store_entry_link(map_header,
13987                                    map_header->links.prev, new_entry);
13988
13989                 /*Protections for submap mapping are irrelevant here*/
13990                 if( !src_entry->is_sub_map ) {
13991                         *cur_protection &= src_entry->protection;
13992                         *max_protection &= src_entry->max_protection;
13993                 }
13994                 map_address += tmp_size;
13995                 mapped_size += tmp_size;
13996                 src_start += tmp_size;
13997
13998         } /* end while */
13999
14000         vm_map_unlock(map);
14001         if (result != KERN_SUCCESS) {
14002                 /*
14003                  * Free all allocated elements.
14004                  */
14005                 for (src_entry = map_header->links.next;
14006                      src_entry != (struct vm_map_entry *)&map_header->links;
14007                      src_entry = new_entry) {
14008                         new_entry = src_entry->vme_next;
14009                         _vm_map_store_entry_unlink(map_header, src_entry);
14010                         if (src_entry->is_sub_map) {
14011                                 vm_map_deallocate(VME_SUBMAP(src_entry));
14012                         } else {
14013                                 vm_object_deallocate(VME_OBJECT(src_entry));
14014                         }
14015                         _vm_map_entry_dispose(map_header, src_entry);
14016                 }
14017         }
14018         return result;
14019 }
14020
14021 /*
14022  *      Routine:        vm_remap
14023  *
14024  *                      Map portion of a task's address space.
14025  *                      Mapped region must not overlap more than
14026  *                      one vm memory object. Protections and
14027  *                      inheritance attributes remain the same
14028  *                      as in the original task and are out parameters.
14029  *                      Source and Target task can be identical
14030  *                      Other attributes are identical as for vm_map()
14031  */
14032 kern_return_t
14033 vm_map_remap(
14034         vm_map_t                target_map,
14035         vm_map_address_t        *address,
14036         vm_map_size_t           size,
14037         vm_map_offset_t         mask,
14038         int                     flags,
14039         vm_map_t                src_map,
14040         vm_map_offset_t         memory_address,
14041         boolean_t               copy,
14042         vm_prot_t               *cur_protection,
14043         vm_prot_t               *max_protection,
14044         vm_inherit_t            inheritance)
14045 {
14046         kern_return_t           result;
14047         vm_map_entry_t          entry;
14048         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
14049         vm_map_entry_t          new_entry;
14050         struct vm_map_header    map_header;
14051         vm_map_offset_t         offset_in_mapping;
14052
14053         if (target_map == VM_MAP_NULL)
14054                 return KERN_INVALID_ARGUMENT;
14055
14056         switch (inheritance) {
14057         case VM_INHERIT_NONE:
14058         case VM_INHERIT_COPY:
14059         case VM_INHERIT_SHARE:
14060                 if (size != 0 && src_map != VM_MAP_NULL)
14061                         break;
14062                 /*FALL THRU*/
14063         default:
14064                 return KERN_INVALID_ARGUMENT;
14065         }
14066
14067         /*
14068          * If the user is requesting that we return the address of the
14069          * first byte of the data (rather than the base of the page),
14070          * then we use different rounding semantics: specifically,
14071          * we assume that (memory_address, size) describes a region
14072          * all of whose pages we must cover, rather than a base to be truncated
14073          * down and a size to be added to that base.  So we figure out
14074          * the highest page that the requested region includes and make
14075          * sure that the size will cover it.
14076          *
14077          * The key example we're worried about it is of the form:
14078          *
14079          *              memory_address = 0x1ff0, size = 0x20
14080          *
14081          * With the old semantics, we round down the memory_address to 0x1000
14082          * and round up the size to 0x1000, resulting in our covering *only*
14083          * page 0x1000.  With the new semantics, we'd realize that the region covers
14084          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
14085          * 0x1000 and page 0x2000 in the region we remap.
14086          */
14087         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14088                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
14089                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
14090         } else {
14091                 size = vm_map_round_page(size, PAGE_MASK);
14092         }
14093
14094         result = vm_map_remap_extract(src_map, memory_address,
14095                                       size, copy, &map_header,
14096                                       cur_protection,
14097                                       max_protection,
14098                                       inheritance,
14099                                       target_map->hdr.entries_pageable,
14100                                       src_map == target_map);
14101
14102         if (result != KERN_SUCCESS) {
14103                 return result;
14104         }
14105
14106         /*
14107          * Allocate/check a range of free virtual address
14108          * space for the target
14109          */
14110         *address = vm_map_trunc_page(*address,
14111                                      VM_MAP_PAGE_MASK(target_map));
14112         vm_map_lock(target_map);
14113         result = vm_map_remap_range_allocate(target_map, address, size,
14114                                              mask, flags, &insp_entry);
14115
14116         for (entry = map_header.links.next;
14117              entry != (struct vm_map_entry *)&map_header.links;
14118              entry = new_entry) {
14119                 new_entry = entry->vme_next;
14120                 _vm_map_store_entry_unlink(&map_header, entry);
14121                 if (result == KERN_SUCCESS) {
14122                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14123                                 /* no codesigning -> read-only access */
14124                                 assert(!entry->used_for_jit);
14125                                 entry->max_protection = VM_PROT_READ;
14126                                 entry->protection = VM_PROT_READ;
14127                                 entry->vme_resilient_codesign = TRUE;
14128                         }
14129                         entry->vme_start += *address;
14130                         entry->vme_end += *address;
14131                         assert(!entry->map_aligned);
14132                         vm_map_store_entry_link(target_map, insp_entry, entry);
14133                         insp_entry = entry;
14134                 } else {
14135                         if (!entry->is_sub_map) {
14136                                 vm_object_deallocate(VME_OBJECT(entry));
14137                         } else {
14138                                 vm_map_deallocate(VME_SUBMAP(entry));
14139                         }
14140                         _vm_map_entry_dispose(&map_header, entry);
14141                 }
14142         }
14143
14144         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14145                 *cur_protection = VM_PROT_READ;
14146                 *max_protection = VM_PROT_READ;
14147         }
14148
14149         if( target_map->disable_vmentry_reuse == TRUE) {
14150                 assert(!target_map->is_nested_map);
14151                 if( target_map->highest_entry_end < insp_entry->vme_end ){
14152                         target_map->highest_entry_end = insp_entry->vme_end;
14153                 }
14154         }
14155
14156         if (result == KERN_SUCCESS) {
14157                 target_map->size += size;
14158                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
14159         }
14160         vm_map_unlock(target_map);
14161
14162         if (result == KERN_SUCCESS && target_map->wiring_required)
14163                 result = vm_map_wire(target_map, *address,
14164                                      *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
14165                                      TRUE);
14166
14167         /*
14168          * If requested, return the address of the data pointed to by the
14169          * request, rather than the base of the resulting page.
14170          */
14171         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14172                 *address += offset_in_mapping;
14173         }
14174
14175         return result;
14176 }
14177
14178 /*
14179  *      Routine:        vm_map_remap_range_allocate
14180  *
14181  *      Description:
14182  *              Allocate a range in the specified virtual address map.
14183  *              returns the address and the map entry just before the allocated
14184  *              range
14185  *
14186  *      Map must be locked.
14187  */
14188
14189 static kern_return_t
14190 vm_map_remap_range_allocate(
14191         vm_map_t                map,
14192         vm_map_address_t        *address,       /* IN/OUT */
14193         vm_map_size_t           size,
14194         vm_map_offset_t         mask,
14195         int                     flags,
14196         vm_map_entry_t          *map_entry)     /* OUT */
14197 {
14198         vm_map_entry_t  entry;
14199         vm_map_offset_t start;
14200         vm_map_offset_t end;
14201         kern_return_t   kr;
14202         vm_map_entry_t          hole_entry;
14203
14204 StartAgain: ;
14205
14206         start = *address;
14207
14208         if (flags & VM_FLAGS_ANYWHERE)
14209         {
14210                 if (flags & VM_FLAGS_RANDOM_ADDR)
14211                 {
14212                         /*
14213                          * Get a random start address.
14214                          */
14215                         kr = vm_map_random_address_for_size(map, address, size);
14216                         if (kr != KERN_SUCCESS) {
14217                                 return(kr);
14218                         }
14219                         start = *address;
14220                 }
14221
14222                 /*
14223                  *      Calculate the first possible address.
14224                  */
14225
14226                 if (start < map->min_offset)
14227                         start = map->min_offset;
14228                 if (start > map->max_offset)
14229                         return(KERN_NO_SPACE);
14230
14231                 /*
14232                  *      Look for the first possible address;
14233                  *      if there's already something at this
14234                  *      address, we have to start after it.
14235                  */
14236
14237                 if( map->disable_vmentry_reuse == TRUE) {
14238                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
14239                 } else {
14240
14241                         if (map->holelistenabled) {
14242                                 hole_entry = (vm_map_entry_t)map->holes_list;
14243
14244                                 if (hole_entry == NULL) {
14245                                         /*
14246                                          * No more space in the map?
14247                                          */
14248                                         return(KERN_NO_SPACE);
14249                                 } else {
14250
14251                                         boolean_t found_hole = FALSE;
14252
14253                                         do {
14254                                                 if (hole_entry->vme_start >= start) {
14255                                                         start = hole_entry->vme_start;
14256                                                         found_hole = TRUE;
14257                                                         break;
14258                                                 }
14259
14260                                                 if (hole_entry->vme_end > start) {
14261                                                         found_hole = TRUE;
14262                                                         break;
14263                                                 }
14264                                                 hole_entry = hole_entry->vme_next;
14265
14266                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
14267
14268                                         if (found_hole == FALSE) {
14269                                                 return (KERN_NO_SPACE);
14270                                         }
14271
14272                                         entry = hole_entry;
14273                                 }
14274                         } else {
14275                                 assert(first_free_is_valid(map));
14276                                 if (start == map->min_offset) {
14277                                         if ((entry = map->first_free) != vm_map_to_entry(map))
14278                                                 start = entry->vme_end;
14279                                 } else {
14280                                         vm_map_entry_t  tmp_entry;
14281                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
14282                                                 start = tmp_entry->vme_end;
14283                                         entry = tmp_entry;
14284                                 }
14285                         }
14286                         start = vm_map_round_page(start,
14287                                                   VM_MAP_PAGE_MASK(map));
14288                 }
14289
14290                 /*
14291                  *      In any case, the "entry" always precedes
14292                  *      the proposed new region throughout the
14293                  *      loop:
14294                  */
14295
14296                 while (TRUE) {
14297                         vm_map_entry_t  next;
14298
14299                         /*
14300                          *      Find the end of the proposed new region.
14301                          *      Be sure we didn't go beyond the end, or
14302                          *      wrap around the address.
14303                          */
14304
14305                         end = ((start + mask) & ~mask);
14306                         end = vm_map_round_page(end,
14307                                                 VM_MAP_PAGE_MASK(map));
14308                         if (end < start)
14309                                 return(KERN_NO_SPACE);
14310                         start = end;
14311                         end += size;
14312
14313                         if ((end > map->max_offset) || (end < start)) {
14314                                 if (map->wait_for_space) {
14315                                         if (size <= (map->max_offset -
14316                                                      map->min_offset)) {
14317                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
14318                                                 vm_map_unlock(map);
14319                                                 thread_block(THREAD_CONTINUE_NULL);
14320                                                 vm_map_lock(map);
14321                                                 goto StartAgain;
14322                                         }
14323                                 }
14324
14325                                 return(KERN_NO_SPACE);
14326                         }
14327
14328                         next = entry->vme_next;
14329
14330                         if (map->holelistenabled) {
14331                                 if (entry->vme_end >= end)
14332                                         break;
14333                         } else {
14334                                 /*
14335                                  *      If there are no more entries, we must win.
14336                                  *
14337                                  *      OR
14338                                  *
14339                                  *      If there is another entry, it must be
14340                                  *      after the end of the potential new region.
14341                                  */
14342
14343                                 if (next == vm_map_to_entry(map))
14344                                         break;
14345
14346                                 if (next->vme_start >= end)
14347                                         break;
14348                         }
14349
14350                         /*
14351                          *      Didn't fit -- move to the next entry.
14352                          */
14353
14354                         entry = next;
14355
14356                         if (map->holelistenabled) {
14357                                 if (entry == (vm_map_entry_t) map->holes_list) {
14358                                         /*
14359                                          * Wrapped around
14360                                          */
14361                                         return(KERN_NO_SPACE);
14362                                 }
14363                                 start = entry->vme_start;
14364                         } else {
14365                                 start = entry->vme_end;
14366                         }
14367                 }
14368
14369                 if (map->holelistenabled) {
14370
14371                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
14372                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
14373                         }
14374                 }
14375
14376                 *address = start;
14377
14378         } else {
14379                 vm_map_entry_t          temp_entry;
14380
14381                 /*
14382                  *      Verify that:
14383                  *              the address doesn't itself violate
14384                  *              the mask requirement.
14385                  */
14386
14387                 if ((start & mask) != 0)
14388                         return(KERN_NO_SPACE);
14389
14390
14391                 /*
14392                  *      ...     the address is within bounds
14393                  */
14394
14395                 end = start + size;
14396
14397                 if ((start < map->min_offset) ||
14398                     (end > map->max_offset) ||
14399                     (start >= end)) {
14400                         return(KERN_INVALID_ADDRESS);
14401                 }
14402
14403                 /*
14404                  * If we're asked to overwrite whatever was mapped in that
14405                  * range, first deallocate that range.
14406                  */
14407                 if (flags & VM_FLAGS_OVERWRITE) {
14408                         vm_map_t zap_map;
14409
14410                         /*
14411                          * We use a "zap_map" to avoid having to unlock
14412                          * the "map" in vm_map_delete(), which would compromise
14413                          * the atomicity of the "deallocate" and then "remap"
14414                          * combination.
14415                          */
14416                         zap_map = vm_map_create(PMAP_NULL,
14417                                                 start,
14418                                                 end,
14419                                                 map->hdr.entries_pageable);
14420                         if (zap_map == VM_MAP_NULL) {
14421                                 return KERN_RESOURCE_SHORTAGE;
14422                         }
14423                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
14424                         vm_map_disable_hole_optimization(zap_map);
14425
14426                         kr = vm_map_delete(map, start, end,
14427                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
14428                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
14429                                            zap_map);
14430                         if (kr == KERN_SUCCESS) {
14431                                 vm_map_destroy(zap_map,
14432                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14433                                 zap_map = VM_MAP_NULL;
14434                         }
14435                 }
14436
14437                 /*
14438                  *      ...     the starting address isn't allocated
14439                  */
14440
14441                 if (vm_map_lookup_entry(map, start, &temp_entry))
14442                         return(KERN_NO_SPACE);
14443
14444                 entry = temp_entry;
14445
14446                 /*
14447                  *      ...     the next region doesn't overlap the
14448                  *              end point.
14449                  */
14450
14451                 if ((entry->vme_next != vm_map_to_entry(map)) &&
14452                     (entry->vme_next->vme_start < end))
14453                         return(KERN_NO_SPACE);
14454         }
14455         *map_entry = entry;
14456         return(KERN_SUCCESS);
14457 }
14458
14459 /*
14460  *      vm_map_switch:
14461  *
14462  *      Set the address map for the current thread to the specified map
14463  */
14464
14465 vm_map_t
14466 vm_map_switch(
14467         vm_map_t        map)
14468 {
14469         int             mycpu;
14470         thread_t        thread = current_thread();
14471         vm_map_t        oldmap = thread->map;
14472
14473         mp_disable_preemption();
14474         mycpu = cpu_number();
14475
14476         /*
14477          *      Deactivate the current map and activate the requested map
14478          */
14479         PMAP_SWITCH_USER(thread, map, mycpu);
14480
14481         mp_enable_preemption();
14482         return(oldmap);
14483 }
14484
14485
14486 /*
14487  *      Routine:        vm_map_write_user
14488  *
14489  *      Description:
14490  *              Copy out data from a kernel space into space in the
14491  *              destination map. The space must already exist in the
14492  *              destination map.
14493  *              NOTE:  This routine should only be called by threads
14494  *              which can block on a page fault. i.e. kernel mode user
14495  *              threads.
14496  *
14497  */
14498 kern_return_t
14499 vm_map_write_user(
14500         vm_map_t                map,
14501         void                    *src_p,
14502         vm_map_address_t        dst_addr,
14503         vm_size_t               size)
14504 {
14505         kern_return_t   kr = KERN_SUCCESS;
14506
14507         if(current_map() == map) {
14508                 if (copyout(src_p, dst_addr, size)) {
14509                         kr = KERN_INVALID_ADDRESS;
14510                 }
14511         } else {
14512                 vm_map_t        oldmap;
14513
14514                 /* take on the identity of the target map while doing */
14515                 /* the transfer */
14516
14517                 vm_map_reference(map);
14518                 oldmap = vm_map_switch(map);
14519                 if (copyout(src_p, dst_addr, size)) {
14520                         kr = KERN_INVALID_ADDRESS;
14521                 }
14522                 vm_map_switch(oldmap);
14523                 vm_map_deallocate(map);
14524         }
14525         return kr;
14526 }
14527
14528 /*
14529  *      Routine:        vm_map_read_user
14530  *
14531  *      Description:
14532  *              Copy in data from a user space source map into the
14533  *              kernel map. The space must already exist in the
14534  *              kernel map.
14535  *              NOTE:  This routine should only be called by threads
14536  *              which can block on a page fault. i.e. kernel mode user
14537  *              threads.
14538  *
14539  */
14540 kern_return_t
14541 vm_map_read_user(
14542         vm_map_t                map,
14543         vm_map_address_t        src_addr,
14544         void                    *dst_p,
14545         vm_size_t               size)
14546 {
14547         kern_return_t   kr = KERN_SUCCESS;
14548
14549         if(current_map() == map) {
14550                 if (copyin(src_addr, dst_p, size)) {
14551                         kr = KERN_INVALID_ADDRESS;
14552                 }
14553         } else {
14554                 vm_map_t        oldmap;
14555
14556                 /* take on the identity of the target map while doing */
14557                 /* the transfer */
14558
14559                 vm_map_reference(map);
14560                 oldmap = vm_map_switch(map);
14561                 if (copyin(src_addr, dst_p, size)) {
14562                         kr = KERN_INVALID_ADDRESS;
14563                 }
14564                 vm_map_switch(oldmap);
14565                 vm_map_deallocate(map);
14566         }
14567         return kr;
14568 }
14569
14570
14571 /*
14572  *      vm_map_check_protection:
14573  *
14574  *      Assert that the target map allows the specified
14575  *      privilege on the entire address region given.
14576  *      The entire region must be allocated.
14577  */
14578 boolean_t
14579 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14580                         vm_map_offset_t end, vm_prot_t protection)
14581 {
14582         vm_map_entry_t entry;
14583         vm_map_entry_t tmp_entry;
14584
14585         vm_map_lock(map);
14586
14587         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
14588         {
14589                 vm_map_unlock(map);
14590                 return (FALSE);
14591         }
14592
14593         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14594                 vm_map_unlock(map);
14595                 return(FALSE);
14596         }
14597
14598         entry = tmp_entry;
14599
14600         while (start < end) {
14601                 if (entry == vm_map_to_entry(map)) {
14602                         vm_map_unlock(map);
14603                         return(FALSE);
14604                 }
14605
14606                 /*
14607                  *      No holes allowed!
14608                  */
14609
14610                 if (start < entry->vme_start) {
14611                         vm_map_unlock(map);
14612                         return(FALSE);
14613                 }
14614
14615                 /*
14616                  * Check protection associated with entry.
14617                  */
14618
14619                 if ((entry->protection & protection) != protection) {
14620                         vm_map_unlock(map);
14621                         return(FALSE);
14622                 }
14623
14624                 /* go to next entry */
14625
14626                 start = entry->vme_end;
14627                 entry = entry->vme_next;
14628         }
14629         vm_map_unlock(map);
14630         return(TRUE);
14631 }
14632
14633 kern_return_t
14634 vm_map_purgable_control(
14635         vm_map_t                map,
14636         vm_map_offset_t         address,
14637         vm_purgable_t           control,
14638         int                     *state)
14639 {
14640         vm_map_entry_t          entry;
14641         vm_object_t             object;
14642         kern_return_t           kr;
14643         boolean_t               was_nonvolatile;
14644
14645         /*
14646          * Vet all the input parameters and current type and state of the
14647          * underlaying object.  Return with an error if anything is amiss.
14648          */
14649         if (map == VM_MAP_NULL)
14650                 return(KERN_INVALID_ARGUMENT);
14651
14652         if (control != VM_PURGABLE_SET_STATE &&
14653             control != VM_PURGABLE_GET_STATE &&
14654             control != VM_PURGABLE_PURGE_ALL)
14655                 return(KERN_INVALID_ARGUMENT);
14656
14657         if (control == VM_PURGABLE_PURGE_ALL) {
14658                 vm_purgeable_object_purge_all();
14659                 return KERN_SUCCESS;
14660         }
14661
14662         if (control == VM_PURGABLE_SET_STATE &&
14663             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
14664              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
14665                 return(KERN_INVALID_ARGUMENT);
14666
14667         vm_map_lock_read(map);
14668
14669         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14670
14671                 /*
14672                  * Must pass a valid non-submap address.
14673                  */
14674                 vm_map_unlock_read(map);
14675                 return(KERN_INVALID_ADDRESS);
14676         }
14677
14678         if ((entry->protection & VM_PROT_WRITE) == 0) {
14679                 /*
14680                  * Can't apply purgable controls to something you can't write.
14681                  */
14682                 vm_map_unlock_read(map);
14683                 return(KERN_PROTECTION_FAILURE);
14684         }
14685
14686         object = VME_OBJECT(entry);
14687         if (object == VM_OBJECT_NULL ||
14688             object->purgable == VM_PURGABLE_DENY) {
14689                 /*
14690                  * Object must already be present and be purgeable.
14691                  */
14692                 vm_map_unlock_read(map);
14693                 return KERN_INVALID_ARGUMENT;
14694         }
14695
14696         vm_object_lock(object);
14697
14698 #if 00
14699         if (VME_OFFSET(entry) != 0 ||
14700             entry->vme_end - entry->vme_start != object->vo_size) {
14701                 /*
14702                  * Can only apply purgable controls to the whole (existing)
14703                  * object at once.
14704                  */
14705                 vm_map_unlock_read(map);
14706                 vm_object_unlock(object);
14707                 return KERN_INVALID_ARGUMENT;
14708         }
14709 #endif
14710
14711         assert(!entry->is_sub_map);
14712         assert(!entry->use_pmap); /* purgeable has its own accounting */
14713
14714         vm_map_unlock_read(map);
14715
14716         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14717
14718         kr = vm_object_purgable_control(object, control, state);
14719
14720         if (was_nonvolatile &&
14721             object->purgable != VM_PURGABLE_NONVOLATILE &&
14722             map->pmap == kernel_pmap) {
14723 #if DEBUG
14724                 object->vo_purgeable_volatilizer = kernel_task;
14725 #endif /* DEBUG */
14726         }
14727
14728         vm_object_unlock(object);
14729
14730         return kr;
14731 }
14732
14733 kern_return_t
14734 vm_map_page_query_internal(
14735         vm_map_t        target_map,
14736         vm_map_offset_t offset,
14737         int             *disposition,
14738         int             *ref_count)
14739 {
14740         kern_return_t                   kr;
14741         vm_page_info_basic_data_t       info;
14742         mach_msg_type_number_t          count;
14743
14744         count = VM_PAGE_INFO_BASIC_COUNT;
14745         kr = vm_map_page_info(target_map,
14746                               offset,
14747                               VM_PAGE_INFO_BASIC,
14748                               (vm_page_info_t) &info,
14749                               &count);
14750         if (kr == KERN_SUCCESS) {
14751                 *disposition = info.disposition;
14752                 *ref_count = info.ref_count;
14753         } else {
14754                 *disposition = 0;
14755                 *ref_count = 0;
14756         }
14757
14758         return kr;
14759 }
14760
14761 kern_return_t
14762 vm_map_page_info(
14763         vm_map_t                map,
14764         vm_map_offset_t         offset,
14765         vm_page_info_flavor_t   flavor,
14766         vm_page_info_t          info,
14767         mach_msg_type_number_t  *count)
14768 {
14769         vm_map_entry_t          map_entry;
14770         vm_object_t             object;
14771         vm_page_t               m;
14772         kern_return_t           retval = KERN_SUCCESS;
14773         boolean_t               top_object;
14774         int                     disposition;
14775         int                     ref_count;
14776         vm_page_info_basic_t    basic_info;
14777         int                     depth;
14778         vm_map_offset_t         offset_in_page;
14779
14780         switch (flavor) {
14781         case VM_PAGE_INFO_BASIC:
14782                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
14783                         /*
14784                          * The "vm_page_info_basic_data" structure was not
14785                          * properly padded, so allow the size to be off by
14786                          * one to maintain backwards binary compatibility...
14787                          */
14788                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14789                                 return KERN_INVALID_ARGUMENT;
14790                 }
14791                 break;
14792         default:
14793                 return KERN_INVALID_ARGUMENT;
14794         }
14795
14796         disposition = 0;
14797         ref_count = 0;
14798         top_object = TRUE;
14799         depth = 0;
14800
14801         retval = KERN_SUCCESS;
14802         offset_in_page = offset & PAGE_MASK;
14803         offset = vm_map_trunc_page(offset, PAGE_MASK);
14804
14805         vm_map_lock_read(map);
14806
14807         /*
14808          * First, find the map entry covering "offset", going down
14809          * submaps if necessary.
14810          */
14811         for (;;) {
14812                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14813                         vm_map_unlock_read(map);
14814                         return KERN_INVALID_ADDRESS;
14815                 }
14816                 /* compute offset from this map entry's start */
14817                 offset -= map_entry->vme_start;
14818                 /* compute offset into this map entry's object (or submap) */
14819                 offset += VME_OFFSET(map_entry);
14820
14821                 if (map_entry->is_sub_map) {
14822                         vm_map_t sub_map;
14823
14824                         sub_map = VME_SUBMAP(map_entry);
14825                         vm_map_lock_read(sub_map);
14826                         vm_map_unlock_read(map);
14827
14828                         map = sub_map;
14829
14830                         ref_count = MAX(ref_count, map->ref_count);
14831                         continue;
14832                 }
14833                 break;
14834         }
14835
14836         object = VME_OBJECT(map_entry);
14837         if (object == VM_OBJECT_NULL) {
14838                 /* no object -> no page */
14839                 vm_map_unlock_read(map);
14840                 goto done;
14841         }
14842
14843         vm_object_lock(object);
14844         vm_map_unlock_read(map);
14845
14846         /*
14847          * Go down the VM object shadow chain until we find the page
14848          * we're looking for.
14849          */
14850         for (;;) {
14851                 ref_count = MAX(ref_count, object->ref_count);
14852
14853                 m = vm_page_lookup(object, offset);
14854
14855                 if (m != VM_PAGE_NULL) {
14856                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
14857                         break;
14858                 } else {
14859                         if (object->internal &&
14860                             object->alive &&
14861                             !object->terminating &&
14862                             object->pager_ready) {
14863
14864                                 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14865                                     == VM_EXTERNAL_STATE_EXISTS) {
14866                                         /* the pager has that page */
14867                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14868                                         break;
14869                                 }
14870                         }
14871
14872                         if (object->shadow != VM_OBJECT_NULL) {
14873                                 vm_object_t shadow;
14874
14875                                 offset += object->vo_shadow_offset;
14876                                 shadow = object->shadow;
14877
14878                                 vm_object_lock(shadow);
14879                                 vm_object_unlock(object);
14880
14881                                 object = shadow;
14882                                 top_object = FALSE;
14883                                 depth++;
14884                         } else {
14885 //                              if (!object->internal)
14886 //                                      break;
14887 //                              retval = KERN_FAILURE;
14888 //                              goto done_with_object;
14889                                 break;
14890                         }
14891                 }
14892         }
14893         /* The ref_count is not strictly accurate, it measures the number   */
14894         /* of entities holding a ref on the object, they may not be mapping */
14895         /* the object or may not be mapping the section holding the         */
14896         /* target page but its still a ball park number and though an over- */
14897         /* count, it picks up the copy-on-write cases                       */
14898
14899         /* We could also get a picture of page sharing from pmap_attributes */
14900         /* but this would under count as only faulted-in mappings would     */
14901         /* show up.                                                         */
14902
14903         if (top_object == TRUE && object->shadow)
14904                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14905
14906         if (! object->internal)
14907                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
14908
14909         if (m == VM_PAGE_NULL)
14910                 goto done_with_object;
14911
14912         if (m->fictitious) {
14913                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14914                 goto done_with_object;
14915         }
14916         if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
14917                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
14918
14919         if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
14920                 disposition |= VM_PAGE_QUERY_PAGE_REF;
14921
14922         if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
14923                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
14924
14925         if (m->cs_validated)
14926                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
14927         if (m->cs_tainted)
14928                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
14929         if (m->cs_nx)
14930                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
14931
14932 done_with_object:
14933         vm_object_unlock(object);
14934 done:
14935
14936         switch (flavor) {
14937         case VM_PAGE_INFO_BASIC:
14938                 basic_info = (vm_page_info_basic_t) info;
14939                 basic_info->disposition = disposition;
14940                 basic_info->ref_count = ref_count;
14941                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14942                         VM_KERNEL_ADDRPERM(object);
14943                 basic_info->offset =
14944                         (memory_object_offset_t) offset + offset_in_page;
14945                 basic_info->depth = depth;
14946                 break;
14947         }
14948
14949         return retval;
14950 }
14951
14952 /*
14953  *      vm_map_msync
14954  *
14955  *      Synchronises the memory range specified with its backing store
14956  *      image by either flushing or cleaning the contents to the appropriate
14957  *      memory manager engaging in a memory object synchronize dialog with
14958  *      the manager.  The client doesn't return until the manager issues
14959  *      m_o_s_completed message.  MIG Magically converts user task parameter
14960  *      to the task's address map.
14961  *
14962  *      interpretation of sync_flags
14963  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
14964  *                                pages to manager.
14965  *
14966  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14967  *                              - discard pages, write dirty or precious
14968  *                                pages back to memory manager.
14969  *
14970  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14971  *                              - write dirty or precious pages back to
14972  *                                the memory manager.
14973  *
14974  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
14975  *                                is a hole in the region, and we would
14976  *                                have returned KERN_SUCCESS, return
14977  *                                KERN_INVALID_ADDRESS instead.
14978  *
14979  *      NOTE
14980  *      The memory object attributes have not yet been implemented, this
14981  *      function will have to deal with the invalidate attribute
14982  *
14983  *      RETURNS
14984  *      KERN_INVALID_TASK               Bad task parameter
14985  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
14986  *      KERN_SUCCESS                    The usual.
14987  *      KERN_INVALID_ADDRESS            There was a hole in the region.
14988  */
14989
14990 kern_return_t
14991 vm_map_msync(
14992         vm_map_t                map,
14993         vm_map_address_t        address,
14994         vm_map_size_t           size,
14995         vm_sync_t               sync_flags)
14996 {
14997         msync_req_t             msr;
14998         msync_req_t             new_msr;
14999         queue_chain_t           req_q;  /* queue of requests for this msync */
15000         vm_map_entry_t          entry;
15001         vm_map_size_t           amount_left;
15002         vm_object_offset_t      offset;
15003         boolean_t               do_sync_req;
15004         boolean_t               had_hole = FALSE;
15005         memory_object_t         pager;
15006         vm_map_offset_t         pmap_offset;
15007
15008         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
15009             (sync_flags & VM_SYNC_SYNCHRONOUS))
15010                 return(KERN_INVALID_ARGUMENT);
15011
15012         /*
15013          * align address and size on page boundaries
15014          */
15015         size = (vm_map_round_page(address + size,
15016                                   VM_MAP_PAGE_MASK(map)) -
15017                 vm_map_trunc_page(address,
15018                                   VM_MAP_PAGE_MASK(map)));
15019         address = vm_map_trunc_page(address,
15020                                     VM_MAP_PAGE_MASK(map));
15021
15022         if (map == VM_MAP_NULL)
15023                 return(KERN_INVALID_TASK);
15024
15025         if (size == 0)
15026                 return(KERN_SUCCESS);
15027
15028         queue_init(&req_q);
15029         amount_left = size;
15030
15031         while (amount_left > 0) {
15032                 vm_object_size_t        flush_size;
15033                 vm_object_t             object;
15034
15035                 vm_map_lock(map);
15036                 if (!vm_map_lookup_entry(map,
15037                                          address,
15038                                          &entry)) {
15039
15040                         vm_map_size_t   skip;
15041
15042                         /*
15043                          * hole in the address map.
15044                          */
15045                         had_hole = TRUE;
15046
15047                         if (sync_flags & VM_SYNC_KILLPAGES) {
15048                                 /*
15049                                  * For VM_SYNC_KILLPAGES, there should be
15050                                  * no holes in the range, since we couldn't
15051                                  * prevent someone else from allocating in
15052                                  * that hole and we wouldn't want to "kill"
15053                                  * their pages.
15054                                  */
15055                                 vm_map_unlock(map);
15056                                 break;
15057                         }
15058
15059                         /*
15060                          * Check for empty map.
15061                          */
15062                         if (entry == vm_map_to_entry(map) &&
15063                             entry->vme_next == entry) {
15064                                 vm_map_unlock(map);
15065                                 break;
15066                         }
15067                         /*
15068                          * Check that we don't wrap and that
15069                          * we have at least one real map entry.
15070                          */
15071                         if ((map->hdr.nentries == 0) ||
15072                             (entry->vme_next->vme_start < address)) {
15073                                 vm_map_unlock(map);
15074                                 break;
15075                         }
15076                         /*
15077                          * Move up to the next entry if needed
15078                          */
15079                         skip = (entry->vme_next->vme_start - address);
15080                         if (skip >= amount_left)
15081                                 amount_left = 0;
15082                         else
15083                                 amount_left -= skip;
15084                         address = entry->vme_next->vme_start;
15085                         vm_map_unlock(map);
15086                         continue;
15087                 }
15088
15089                 offset = address - entry->vme_start;
15090                 pmap_offset = address;
15091
15092                 /*
15093                  * do we have more to flush than is contained in this
15094                  * entry ?
15095                  */
15096                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
15097                         flush_size = entry->vme_end -
15098                                 (entry->vme_start + offset);
15099                 } else {
15100                         flush_size = amount_left;
15101                 }
15102                 amount_left -= flush_size;
15103                 address += flush_size;
15104
15105                 if (entry->is_sub_map == TRUE) {
15106                         vm_map_t        local_map;
15107                         vm_map_offset_t local_offset;
15108
15109                         local_map = VME_SUBMAP(entry);
15110                         local_offset = VME_OFFSET(entry);
15111                         vm_map_unlock(map);
15112                         if (vm_map_msync(
15113                                     local_map,
15114                                     local_offset,
15115                                     flush_size,
15116                                     sync_flags) == KERN_INVALID_ADDRESS) {
15117                                 had_hole = TRUE;
15118                         }
15119                         continue;
15120                 }
15121                 object = VME_OBJECT(entry);
15122
15123                 /*
15124                  * We can't sync this object if the object has not been
15125                  * created yet
15126                  */
15127                 if (object == VM_OBJECT_NULL) {
15128                         vm_map_unlock(map);
15129                         continue;
15130                 }
15131                 offset += VME_OFFSET(entry);
15132
15133                 vm_object_lock(object);
15134
15135                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
15136                         int kill_pages = 0;
15137                         boolean_t reusable_pages = FALSE;
15138
15139                         if (sync_flags & VM_SYNC_KILLPAGES) {
15140                                 if (((object->ref_count == 1) ||
15141                                      ((object->copy_strategy !=
15142                                        MEMORY_OBJECT_COPY_SYMMETRIC) &&
15143                                       (object->copy == VM_OBJECT_NULL))) &&
15144                                     (object->shadow == VM_OBJECT_NULL)) {
15145                                         if (object->ref_count != 1) {
15146                                                 vm_page_stats_reusable.free_shared++;
15147                                         }
15148                                         kill_pages = 1;
15149                                 } else {
15150                                         kill_pages = -1;
15151                                 }
15152                         }
15153                         if (kill_pages != -1)
15154                                 vm_object_deactivate_pages(
15155                                         object,
15156                                         offset,
15157                                         (vm_object_size_t) flush_size,
15158                                         kill_pages,
15159                                         reusable_pages,
15160                                         map->pmap,
15161                                         pmap_offset);
15162                         vm_object_unlock(object);
15163                         vm_map_unlock(map);
15164                         continue;
15165                 }
15166                 /*
15167                  * We can't sync this object if there isn't a pager.
15168                  * Don't bother to sync internal objects, since there can't
15169                  * be any "permanent" storage for these objects anyway.
15170                  */
15171                 if ((object->pager == MEMORY_OBJECT_NULL) ||
15172                     (object->internal) || (object->private)) {
15173                         vm_object_unlock(object);
15174                         vm_map_unlock(map);
15175                         continue;
15176                 }
15177                 /*
15178                  * keep reference on the object until syncing is done
15179                  */
15180                 vm_object_reference_locked(object);
15181                 vm_object_unlock(object);
15182
15183                 vm_map_unlock(map);
15184
15185                 do_sync_req = vm_object_sync(object,
15186                                              offset,
15187                                              flush_size,
15188                                              sync_flags & VM_SYNC_INVALIDATE,
15189                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
15190                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
15191                                              sync_flags & VM_SYNC_SYNCHRONOUS);
15192                 /*
15193                  * only send a m_o_s if we returned pages or if the entry
15194                  * is writable (ie dirty pages may have already been sent back)
15195                  */
15196                 if (!do_sync_req) {
15197                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
15198                                 /*
15199                                  * clear out the clustering and read-ahead hints
15200                                  */
15201                                 vm_object_lock(object);
15202
15203                                 object->pages_created = 0;
15204                                 object->pages_used = 0;
15205                                 object->sequential = 0;
15206                                 object->last_alloc = 0;
15207
15208                                 vm_object_unlock(object);
15209                         }
15210                         vm_object_deallocate(object);
15211                         continue;
15212                 }
15213                 msync_req_alloc(new_msr);
15214
15215                 vm_object_lock(object);
15216                 offset += object->paging_offset;
15217
15218                 new_msr->offset = offset;
15219                 new_msr->length = flush_size;
15220                 new_msr->object = object;
15221                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
15222         re_iterate:
15223
15224                 /*
15225                  * We can't sync this object if there isn't a pager.  The
15226                  * pager can disappear anytime we're not holding the object
15227                  * lock.  So this has to be checked anytime we goto re_iterate.
15228                  */
15229
15230                 pager = object->pager;
15231
15232                 if (pager == MEMORY_OBJECT_NULL) {
15233                         vm_object_unlock(object);
15234                         vm_object_deallocate(object);
15235                         msync_req_free(new_msr);
15236                         new_msr = NULL;
15237                         continue;
15238                 }
15239
15240                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
15241                         /*
15242                          * need to check for overlapping entry, if found, wait
15243                          * on overlapping msr to be done, then reiterate
15244                          */
15245                         msr_lock(msr);
15246                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
15247                             ((offset >= msr->offset &&
15248                               offset < (msr->offset + msr->length)) ||
15249                              (msr->offset >= offset &&
15250                               msr->offset < (offset + flush_size))))
15251                         {
15252                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
15253                                 msr_unlock(msr);
15254                                 vm_object_unlock(object);
15255                                 thread_block(THREAD_CONTINUE_NULL);
15256                                 vm_object_lock(object);
15257                                 goto re_iterate;
15258                         }
15259                         msr_unlock(msr);
15260                 }/* queue_iterate */
15261
15262                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
15263
15264                 vm_object_paging_begin(object);
15265                 vm_object_unlock(object);
15266
15267                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
15268
15269                 (void) memory_object_synchronize(
15270                         pager,
15271                         offset,
15272                         flush_size,
15273                         sync_flags & ~VM_SYNC_CONTIGUOUS);
15274
15275                 vm_object_lock(object);
15276                 vm_object_paging_end(object);
15277                 vm_object_unlock(object);
15278         }/* while */
15279
15280         /*
15281          * wait for memory_object_sychronize_completed messages from pager(s)
15282          */
15283
15284         while (!queue_empty(&req_q)) {
15285                 msr = (msync_req_t)queue_first(&req_q);
15286                 msr_lock(msr);
15287                 while(msr->flag != VM_MSYNC_DONE) {
15288                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
15289                         msr_unlock(msr);
15290                         thread_block(THREAD_CONTINUE_NULL);
15291                         msr_lock(msr);
15292                 }/* while */
15293                 queue_remove(&req_q, msr, msync_req_t, req_q);
15294                 msr_unlock(msr);
15295                 vm_object_deallocate(msr->object);
15296                 msync_req_free(msr);
15297         }/* queue_iterate */
15298
15299         /* for proper msync() behaviour */
15300         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
15301                 return(KERN_INVALID_ADDRESS);
15302
15303         return(KERN_SUCCESS);
15304 }/* vm_msync */
15305
15306 /*
15307  *      Routine:        convert_port_entry_to_map
15308  *      Purpose:
15309  *              Convert from a port specifying an entry or a task
15310  *              to a map. Doesn't consume the port ref; produces a map ref,
15311  *              which may be null.  Unlike convert_port_to_map, the
15312  *              port may be task or a named entry backed.
15313  *      Conditions:
15314  *              Nothing locked.
15315  */
15316
15317
15318 vm_map_t
15319 convert_port_entry_to_map(
15320         ipc_port_t      port)
15321 {
15322         vm_map_t map;
15323         vm_named_entry_t        named_entry;
15324         uint32_t        try_failed_count = 0;
15325
15326         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15327                 while(TRUE) {
15328                         ip_lock(port);
15329                         if(ip_active(port) && (ip_kotype(port)
15330                                                == IKOT_NAMED_ENTRY)) {
15331                                 named_entry =
15332                                         (vm_named_entry_t)port->ip_kobject;
15333                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15334                                         ip_unlock(port);
15335
15336                                         try_failed_count++;
15337                                         mutex_pause(try_failed_count);
15338                                         continue;
15339                                 }
15340                                 named_entry->ref_count++;
15341                                 lck_mtx_unlock(&(named_entry)->Lock);
15342                                 ip_unlock(port);
15343                                 if ((named_entry->is_sub_map) &&
15344                                     (named_entry->protection
15345                                      & VM_PROT_WRITE)) {
15346                                         map = named_entry->backing.map;
15347                                 } else {
15348                                         mach_destroy_memory_entry(port);
15349                                         return VM_MAP_NULL;
15350                                 }
15351                                 vm_map_reference_swap(map);
15352                                 mach_destroy_memory_entry(port);
15353                                 break;
15354                         }
15355                         else
15356                                 return VM_MAP_NULL;
15357                 }
15358         }
15359         else
15360                 map = convert_port_to_map(port);
15361
15362         return map;
15363 }
15364
15365 /*
15366  *      Routine:        convert_port_entry_to_object
15367  *      Purpose:
15368  *              Convert from a port specifying a named entry to an
15369  *              object. Doesn't consume the port ref; produces a map ref,
15370  *              which may be null.
15371  *      Conditions:
15372  *              Nothing locked.
15373  */
15374
15375
15376 vm_object_t
15377 convert_port_entry_to_object(
15378         ipc_port_t      port)
15379 {
15380         vm_object_t             object = VM_OBJECT_NULL;
15381         vm_named_entry_t        named_entry;
15382         uint32_t                try_failed_count = 0;
15383
15384         if (IP_VALID(port) &&
15385             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15386         try_again:
15387                 ip_lock(port);
15388                 if (ip_active(port) &&
15389                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15390                         named_entry = (vm_named_entry_t)port->ip_kobject;
15391                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15392                                 ip_unlock(port);
15393                                 try_failed_count++;
15394                                 mutex_pause(try_failed_count);
15395                                 goto try_again;
15396                         }
15397                         named_entry->ref_count++;
15398                         lck_mtx_unlock(&(named_entry)->Lock);
15399                         ip_unlock(port);
15400                         if (!(named_entry->is_sub_map) &&
15401                             !(named_entry->is_pager) &&
15402                             !(named_entry->is_copy) &&
15403                             (named_entry->protection & VM_PROT_WRITE)) {
15404                                 object = named_entry->backing.object;
15405                                 vm_object_reference(object);
15406                         }
15407                         mach_destroy_memory_entry(port);
15408                 }
15409         }
15410
15411         return object;
15412 }
15413
15414 /*
15415  * Export routines to other components for the things we access locally through
15416  * macros.
15417  */
15418 #undef current_map
15419 vm_map_t
15420 current_map(void)
15421 {
15422         return (current_map_fast());
15423 }
15424
15425 /*
15426  *      vm_map_reference:
15427  *
15428  *      Most code internal to the osfmk will go through a
15429  *      macro defining this.  This is always here for the
15430  *      use of other kernel components.
15431  */
15432 #undef vm_map_reference
15433 void
15434 vm_map_reference(
15435         vm_map_t        map)
15436 {
15437         if (map == VM_MAP_NULL)
15438                 return;
15439
15440         lck_mtx_lock(&map->s_lock);
15441 #if     TASK_SWAPPER
15442         assert(map->res_count > 0);
15443         assert(map->ref_count >= map->res_count);
15444         map->res_count++;
15445 #endif
15446         map->ref_count++;
15447         lck_mtx_unlock(&map->s_lock);
15448 }
15449
15450 /*
15451  *      vm_map_deallocate:
15452  *
15453  *      Removes a reference from the specified map,
15454  *      destroying it if no references remain.
15455  *      The map should not be locked.
15456  */
15457 void
15458 vm_map_deallocate(
15459         vm_map_t        map)
15460 {
15461         unsigned int            ref;
15462
15463         if (map == VM_MAP_NULL)
15464                 return;
15465
15466         lck_mtx_lock(&map->s_lock);
15467         ref = --map->ref_count;
15468         if (ref > 0) {
15469                 vm_map_res_deallocate(map);
15470                 lck_mtx_unlock(&map->s_lock);
15471                 return;
15472         }
15473         assert(map->ref_count == 0);
15474         lck_mtx_unlock(&map->s_lock);
15475
15476 #if     TASK_SWAPPER
15477         /*
15478          * The map residence count isn't decremented here because
15479          * the vm_map_delete below will traverse the entire map,
15480          * deleting entries, and the residence counts on objects
15481          * and sharing maps will go away then.
15482          */
15483 #endif
15484
15485         vm_map_destroy(map, VM_MAP_NO_FLAGS);
15486 }
15487
15488
15489 void
15490 vm_map_disable_NX(vm_map_t map)
15491 {
15492         if (map == NULL)
15493                 return;
15494         if (map->pmap == NULL)
15495                 return;
15496
15497         pmap_disable_NX(map->pmap);
15498 }
15499
15500 void
15501 vm_map_disallow_data_exec(vm_map_t map)
15502 {
15503     if (map == NULL)
15504         return;
15505
15506     map->map_disallow_data_exec = TRUE;
15507 }
15508
15509 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15510  * more descriptive.
15511  */
15512 void
15513 vm_map_set_32bit(vm_map_t map)
15514 {
15515         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15516 }
15517
15518
15519 void
15520 vm_map_set_64bit(vm_map_t map)
15521 {
15522         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15523 }
15524
15525 vm_map_offset_t
15526 vm_compute_max_offset(boolean_t is64)
15527 {
15528         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15529 }
15530
15531 uint64_t
15532 vm_map_get_max_aslr_slide_pages(vm_map_t map)
15533 {
15534         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15535 }
15536
15537 boolean_t
15538 vm_map_is_64bit(
15539                 vm_map_t map)
15540 {
15541         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15542 }
15543
15544 boolean_t
15545 vm_map_has_hard_pagezero(
15546                 vm_map_t        map,
15547                 vm_map_offset_t pagezero_size)
15548 {
15549         /*
15550          * XXX FBDP
15551          * We should lock the VM map (for read) here but we can get away
15552          * with it for now because there can't really be any race condition:
15553          * the VM map's min_offset is changed only when the VM map is created
15554          * and when the zero page is established (when the binary gets loaded),
15555          * and this routine gets called only when the task terminates and the
15556          * VM map is being torn down, and when a new map is created via
15557          * load_machfile()/execve().
15558          */
15559         return (map->min_offset >= pagezero_size);
15560 }
15561
15562 /*
15563  * Raise a VM map's maximun offset.
15564  */
15565 kern_return_t
15566 vm_map_raise_max_offset(
15567         vm_map_t        map,
15568         vm_map_offset_t new_max_offset)
15569 {
15570         kern_return_t   ret;
15571
15572         vm_map_lock(map);
15573         ret = KERN_INVALID_ADDRESS;
15574
15575         if (new_max_offset >= map->max_offset) {
15576                 if (!vm_map_is_64bit(map)) {
15577                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15578                                 map->max_offset = new_max_offset;
15579                                 ret = KERN_SUCCESS;
15580                         }
15581                 } else {
15582                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15583                                 map->max_offset = new_max_offset;
15584                                 ret = KERN_SUCCESS;
15585                         }
15586                 }
15587         }
15588
15589         vm_map_unlock(map);
15590         return ret;
15591 }
15592
15593
15594 /*
15595  * Raise a VM map's minimum offset.
15596  * To strictly enforce "page zero" reservation.
15597  */
15598 kern_return_t
15599 vm_map_raise_min_offset(
15600         vm_map_t        map,
15601         vm_map_offset_t new_min_offset)
15602 {
15603         vm_map_entry_t  first_entry;
15604
15605         new_min_offset = vm_map_round_page(new_min_offset,
15606                                            VM_MAP_PAGE_MASK(map));
15607
15608         vm_map_lock(map);
15609
15610         if (new_min_offset < map->min_offset) {
15611                 /*
15612                  * Can't move min_offset backwards, as that would expose
15613                  * a part of the address space that was previously, and for
15614                  * possibly good reasons, inaccessible.
15615                  */
15616                 vm_map_unlock(map);
15617                 return KERN_INVALID_ADDRESS;
15618         }
15619         if (new_min_offset >= map->max_offset) {
15620                 /* can't go beyond the end of the address space */
15621                 vm_map_unlock(map);
15622                 return KERN_INVALID_ADDRESS;
15623         }
15624
15625         first_entry = vm_map_first_entry(map);
15626         if (first_entry != vm_map_to_entry(map) &&
15627             first_entry->vme_start < new_min_offset) {
15628                 /*
15629                  * Some memory was already allocated below the new
15630                  * minimun offset.  It's too late to change it now...
15631                  */
15632                 vm_map_unlock(map);
15633                 return KERN_NO_SPACE;
15634         }
15635
15636         map->min_offset = new_min_offset;
15637
15638         assert(map->holes_list);
15639         map->holes_list->start = new_min_offset;
15640         assert(new_min_offset < map->holes_list->end);
15641
15642         vm_map_unlock(map);
15643
15644         return KERN_SUCCESS;
15645 }
15646
15647 /*
15648  * Set the limit on the maximum amount of user wired memory allowed for this map.
15649  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15650  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
15651  * don't have to reach over to the BSD data structures.
15652  */
15653
15654 void
15655 vm_map_set_user_wire_limit(vm_map_t     map,
15656                            vm_size_t    limit)
15657 {
15658         map->user_wire_limit = limit;
15659 }
15660
15661
15662 void vm_map_switch_protect(vm_map_t     map,
15663                            boolean_t    val)
15664 {
15665         vm_map_lock(map);
15666         map->switch_protect=val;
15667         vm_map_unlock(map);
15668 }
15669
15670 /*
15671  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15672  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15673  * bump both counters.
15674  */
15675 void
15676 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15677 {
15678         pmap_t pmap = vm_map_pmap(map);
15679
15680         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15681         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15682 }
15683
15684 void
15685 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15686 {
15687         pmap_t pmap = vm_map_pmap(map);
15688
15689         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15690         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15691 }
15692
15693 /* Add (generate) code signature for memory range */
15694 #if CONFIG_DYNAMIC_CODE_SIGNING
15695 kern_return_t vm_map_sign(vm_map_t map,
15696                  vm_map_offset_t start,
15697                  vm_map_offset_t end)
15698 {
15699         vm_map_entry_t entry;
15700         vm_page_t m;
15701         vm_object_t object;
15702
15703         /*
15704          * Vet all the input parameters and current type and state of the
15705          * underlaying object.  Return with an error if anything is amiss.
15706          */
15707         if (map == VM_MAP_NULL)
15708                 return(KERN_INVALID_ARGUMENT);
15709
15710         vm_map_lock_read(map);
15711
15712         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15713                 /*
15714                  * Must pass a valid non-submap address.
15715                  */
15716                 vm_map_unlock_read(map);
15717                 return(KERN_INVALID_ADDRESS);
15718         }
15719
15720         if((entry->vme_start > start) || (entry->vme_end < end)) {
15721                 /*
15722                  * Map entry doesn't cover the requested range. Not handling
15723                  * this situation currently.
15724                  */
15725                 vm_map_unlock_read(map);
15726                 return(KERN_INVALID_ARGUMENT);
15727         }
15728
15729         object = VME_OBJECT(entry);
15730         if (object == VM_OBJECT_NULL) {
15731                 /*
15732                  * Object must already be present or we can't sign.
15733                  */
15734                 vm_map_unlock_read(map);
15735                 return KERN_INVALID_ARGUMENT;
15736         }
15737
15738         vm_object_lock(object);
15739         vm_map_unlock_read(map);
15740
15741         while(start < end) {
15742                 uint32_t refmod;
15743
15744                 m = vm_page_lookup(object,
15745                                    start - entry->vme_start + VME_OFFSET(entry));
15746                 if (m==VM_PAGE_NULL) {
15747                         /* shoud we try to fault a page here? we can probably
15748                          * demand it exists and is locked for this request */
15749                         vm_object_unlock(object);
15750                         return KERN_FAILURE;
15751                 }
15752                 /* deal with special page status */
15753                 if (m->busy ||
15754                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15755                         vm_object_unlock(object);
15756                         return KERN_FAILURE;
15757                 }
15758
15759                 /* Page is OK... now "validate" it */
15760                 /* This is the place where we'll call out to create a code
15761                  * directory, later */
15762                 m->cs_validated = TRUE;
15763
15764                 /* The page is now "clean" for codesigning purposes. That means
15765                  * we don't consider it as modified (wpmapped) anymore. But
15766                  * we'll disconnect the page so we note any future modification
15767                  * attempts. */
15768                 m->wpmapped = FALSE;
15769                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
15770
15771                 /* Pull the dirty status from the pmap, since we cleared the
15772                  * wpmapped bit */
15773                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
15774                         SET_PAGE_DIRTY(m, FALSE);
15775                 }
15776
15777                 /* On to the next page */
15778                 start += PAGE_SIZE;
15779         }
15780         vm_object_unlock(object);
15781
15782         return KERN_SUCCESS;
15783 }
15784 #endif
15785
15786 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15787 {
15788         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
15789         vm_map_entry_t next_entry;
15790         kern_return_t   kr = KERN_SUCCESS;
15791         vm_map_t        zap_map;
15792
15793         vm_map_lock(map);
15794
15795         /*
15796          * We use a "zap_map" to avoid having to unlock
15797          * the "map" in vm_map_delete().
15798          */
15799         zap_map = vm_map_create(PMAP_NULL,
15800                                 map->min_offset,
15801                                 map->max_offset,
15802                                 map->hdr.entries_pageable);
15803
15804         if (zap_map == VM_MAP_NULL) {
15805                 return KERN_RESOURCE_SHORTAGE;
15806         }
15807
15808         vm_map_set_page_shift(zap_map,
15809                               VM_MAP_PAGE_SHIFT(map));
15810         vm_map_disable_hole_optimization(zap_map);
15811
15812         for (entry = vm_map_first_entry(map);
15813              entry != vm_map_to_entry(map);
15814              entry = next_entry) {
15815                 next_entry = entry->vme_next;
15816
15817                 if (VME_OBJECT(entry) &&
15818                     !entry->is_sub_map &&
15819                     (VME_OBJECT(entry)->internal == TRUE) &&
15820                     (VME_OBJECT(entry)->ref_count == 1)) {
15821
15822                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15823                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
15824
15825                         (void)vm_map_delete(map,
15826                                             entry->vme_start,
15827                                             entry->vme_end,
15828                                             VM_MAP_REMOVE_SAVE_ENTRIES,
15829                                             zap_map);
15830                 }
15831         }
15832
15833         vm_map_unlock(map);
15834
15835         /*
15836          * Get rid of the "zap_maps" and all the map entries that
15837          * they may still contain.
15838          */
15839         if (zap_map != VM_MAP_NULL) {
15840                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15841                 zap_map = VM_MAP_NULL;
15842         }
15843
15844         return kr;
15845 }
15846
15847
15848 #if DEVELOPMENT || DEBUG
15849
15850 int
15851 vm_map_disconnect_page_mappings(
15852         vm_map_t map,
15853         boolean_t do_unnest)
15854 {
15855         vm_map_entry_t entry;
15856         int     page_count = 0;
15857
15858         if (do_unnest == TRUE) {
15859 #ifndef NO_NESTED_PMAP
15860                 vm_map_lock(map);
15861
15862                 for (entry = vm_map_first_entry(map);
15863                      entry != vm_map_to_entry(map);
15864                      entry = entry->vme_next) {
15865
15866                         if (entry->is_sub_map && entry->use_pmap) {
15867                                 /*
15868                                  * Make sure the range between the start of this entry and
15869                                  * the end of this entry is no longer nested, so that
15870                                  * we will only remove mappings from the pmap in use by this
15871                                  * this task
15872                                  */
15873                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
15874                         }
15875                 }
15876                 vm_map_unlock(map);
15877 #endif
15878         }
15879         vm_map_lock_read(map);
15880
15881         page_count = map->pmap->stats.resident_count;
15882
15883         for (entry = vm_map_first_entry(map);
15884              entry != vm_map_to_entry(map);
15885              entry = entry->vme_next) {
15886
15887                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
15888                                            (VME_OBJECT(entry)->phys_contiguous))) {
15889                         continue;
15890                 }
15891                 if (entry->is_sub_map)
15892                         assert(!entry->use_pmap);
15893
15894                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
15895         }
15896         vm_map_unlock_read(map);
15897
15898         return page_count;
15899 }
15900
15901 #endif
15902
15903
15904 #if CONFIG_FREEZE
15905
15906
15907 int c_freezer_swapout_count;
15908 int c_freezer_compression_count = 0;
15909 AbsoluteTime c_freezer_last_yield_ts = 0;
15910
15911 kern_return_t vm_map_freeze(
15912                 vm_map_t map,
15913                 unsigned int *purgeable_count,
15914                 unsigned int *wired_count,
15915                 unsigned int *clean_count,
15916                 unsigned int *dirty_count,
15917                 __unused unsigned int dirty_budget,
15918                 boolean_t *has_shared)
15919 {
15920         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
15921         kern_return_t   kr = KERN_SUCCESS;
15922
15923         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15924         *has_shared = FALSE;
15925
15926         /*
15927          * We need the exclusive lock here so that we can
15928          * block any page faults or lookups while we are
15929          * in the middle of freezing this vm map.
15930          */
15931         vm_map_lock(map);
15932
15933         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
15934
15935         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15936                 kr = KERN_NO_SPACE;
15937                 goto done;
15938         }
15939
15940         c_freezer_compression_count = 0;
15941         clock_get_uptime(&c_freezer_last_yield_ts);
15942
15943         for (entry2 = vm_map_first_entry(map);
15944              entry2 != vm_map_to_entry(map);
15945              entry2 = entry2->vme_next) {
15946
15947                 vm_object_t     src_object = VME_OBJECT(entry2);
15948
15949                 if (src_object &&
15950                     !entry2->is_sub_map &&
15951                     !src_object->phys_contiguous) {
15952                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
15953
15954                         if (src_object->internal == TRUE) {
15955
15956                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
15957                                         /*
15958                                          * Pages belonging to this object could be swapped to disk.
15959                                          * Make sure it's not a shared object because we could end
15960                                          * up just bringing it back in again.
15961                                          */
15962                                         if (src_object->ref_count > 1) {
15963                                                 continue;
15964                                         }
15965                                 }
15966                                 vm_object_compressed_freezer_pageout(src_object);
15967
15968                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15969                                         kr = KERN_NO_SPACE;
15970                                         break;
15971                                 }
15972                         }
15973                 }
15974         }
15975 done:
15976         vm_map_unlock(map);
15977
15978         vm_object_compressed_freezer_done();
15979
15980         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
15981                 /*
15982                  * reset the counter tracking the # of swapped c_segs
15983                  * because we are now done with this freeze session and task.
15984                  */
15985                 c_freezer_swapout_count = 0;
15986         }
15987         return kr;
15988 }
15989
15990 #endif
15991
15992 /*
15993  * vm_map_entry_should_cow_for_true_share:
15994  *
15995  * Determines if the map entry should be clipped and setup for copy-on-write
15996  * to avoid applying "true_share" to a large VM object when only a subset is
15997  * targeted.
15998  *
15999  * For now, we target only the map entries created for the Objective C
16000  * Garbage Collector, which initially have the following properties:
16001  *      - alias == VM_MEMORY_MALLOC
16002  *      - wired_count == 0
16003  *      - !needs_copy
16004  * and a VM object with:
16005  *      - internal
16006  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
16007  *      - !true_share
16008  *      - vo_size == ANON_CHUNK_SIZE
16009  *
16010  * Only non-kernel map entries.
16011  */
16012 boolean_t
16013 vm_map_entry_should_cow_for_true_share(
16014         vm_map_entry_t  entry)
16015 {
16016         vm_object_t     object;
16017
16018         if (entry->is_sub_map) {
16019                 /* entry does not point at a VM object */
16020                 return FALSE;
16021         }
16022
16023         if (entry->needs_copy) {
16024                 /* already set for copy_on_write: done! */
16025                 return FALSE;
16026         }
16027
16028         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
16029             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
16030                 /* not a malloc heap or Obj-C Garbage Collector heap */
16031                 return FALSE;
16032         }
16033
16034         if (entry->wired_count) {
16035                 /* wired: can't change the map entry... */
16036                 vm_counters.should_cow_but_wired++;
16037                 return FALSE;
16038         }
16039
16040         object = VME_OBJECT(entry);
16041
16042         if (object == VM_OBJECT_NULL) {
16043                 /* no object yet... */
16044                 return FALSE;
16045         }
16046
16047         if (!object->internal) {
16048                 /* not an internal object */
16049                 return FALSE;
16050         }
16051
16052         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16053                 /* not the default copy strategy */
16054                 return FALSE;
16055         }
16056
16057         if (object->true_share) {
16058                 /* already true_share: too late to avoid it */
16059                 return FALSE;
16060         }
16061
16062         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
16063             object->vo_size != ANON_CHUNK_SIZE) {
16064                 /* ... not an object created for the ObjC Garbage Collector */
16065                 return FALSE;
16066         }
16067
16068         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
16069             object->vo_size != 2048 * 4096) {
16070                 /* ... not a "MALLOC_SMALL" heap */
16071                 return FALSE;
16072         }
16073
16074         /*
16075          * All the criteria match: we have a large object being targeted for "true_share".
16076          * To limit the adverse side-effects linked with "true_share", tell the caller to
16077          * try and avoid setting up the entire object for "true_share" by clipping the
16078          * targeted range and setting it up for copy-on-write.
16079          */
16080         return TRUE;
16081 }
16082
16083 vm_map_offset_t
16084 vm_map_round_page_mask(
16085         vm_map_offset_t offset,
16086         vm_map_offset_t mask)
16087 {
16088         return VM_MAP_ROUND_PAGE(offset, mask);
16089 }
16090
16091 vm_map_offset_t
16092 vm_map_trunc_page_mask(
16093         vm_map_offset_t offset,
16094         vm_map_offset_t mask)
16095 {
16096         return VM_MAP_TRUNC_PAGE(offset, mask);
16097 }
16098
16099 boolean_t
16100 vm_map_page_aligned(
16101         vm_map_offset_t offset,
16102         vm_map_offset_t mask)
16103 {
16104         return ((offset) & mask) == 0;
16105 }
16106
16107 int
16108 vm_map_page_shift(
16109         vm_map_t map)
16110 {
16111         return VM_MAP_PAGE_SHIFT(map);
16112 }
16113
16114 int
16115 vm_map_page_size(
16116         vm_map_t map)
16117 {
16118         return VM_MAP_PAGE_SIZE(map);
16119 }
16120
16121 vm_map_offset_t
16122 vm_map_page_mask(
16123         vm_map_t map)
16124 {
16125         return VM_MAP_PAGE_MASK(map);
16126 }
16127
16128 kern_return_t
16129 vm_map_set_page_shift(
16130         vm_map_t        map,
16131         int             pageshift)
16132 {
16133         if (map->hdr.nentries != 0) {
16134                 /* too late to change page size */
16135                 return KERN_FAILURE;
16136         }
16137
16138         map->hdr.page_shift = pageshift;
16139
16140         return KERN_SUCCESS;
16141 }
16142
16143 kern_return_t
16144 vm_map_query_volatile(
16145         vm_map_t        map,
16146         mach_vm_size_t  *volatile_virtual_size_p,
16147         mach_vm_size_t  *volatile_resident_size_p,
16148         mach_vm_size_t  *volatile_compressed_size_p,
16149         mach_vm_size_t  *volatile_pmap_size_p,
16150         mach_vm_size_t  *volatile_compressed_pmap_size_p)
16151 {
16152         mach_vm_size_t  volatile_virtual_size;
16153         mach_vm_size_t  volatile_resident_count;
16154         mach_vm_size_t  volatile_compressed_count;
16155         mach_vm_size_t  volatile_pmap_count;
16156         mach_vm_size_t  volatile_compressed_pmap_count;
16157         mach_vm_size_t  resident_count;
16158         vm_map_entry_t  entry;
16159         vm_object_t     object;
16160
16161         /* map should be locked by caller */
16162
16163         volatile_virtual_size = 0;
16164         volatile_resident_count = 0;
16165         volatile_compressed_count = 0;
16166         volatile_pmap_count = 0;
16167         volatile_compressed_pmap_count = 0;
16168
16169         for (entry = vm_map_first_entry(map);
16170              entry != vm_map_to_entry(map);
16171              entry = entry->vme_next) {
16172                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
16173
16174                 if (entry->is_sub_map) {
16175                         continue;
16176                 }
16177                 if (! (entry->protection & VM_PROT_WRITE)) {
16178                         continue;
16179                 }
16180                 object = VME_OBJECT(entry);
16181                 if (object == VM_OBJECT_NULL) {
16182                         continue;
16183                 }
16184                 if (object->purgable != VM_PURGABLE_VOLATILE &&
16185                     object->purgable != VM_PURGABLE_EMPTY) {
16186                         continue;
16187                 }
16188                 if (VME_OFFSET(entry)) {
16189                         /*
16190                          * If the map entry has been split and the object now
16191                          * appears several times in the VM map, we don't want
16192                          * to count the object's resident_page_count more than
16193                          * once.  We count it only for the first one, starting
16194                          * at offset 0 and ignore the other VM map entries.
16195                          */
16196                         continue;
16197                 }
16198                 resident_count = object->resident_page_count;
16199                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
16200                         resident_count = 0;
16201                 } else {
16202                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
16203                 }
16204
16205                 volatile_virtual_size += entry->vme_end - entry->vme_start;
16206                 volatile_resident_count += resident_count;
16207                 if (object->pager) {
16208                         volatile_compressed_count +=
16209                                 vm_compressor_pager_get_count(object->pager);
16210                 }
16211                 pmap_compressed_bytes = 0;
16212                 pmap_resident_bytes =
16213                         pmap_query_resident(map->pmap,
16214                                             entry->vme_start,
16215                                             entry->vme_end,
16216                                             &pmap_compressed_bytes);
16217                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
16218                 volatile_compressed_pmap_count += (pmap_compressed_bytes
16219                                                    / PAGE_SIZE);
16220         }
16221
16222         /* map is still locked on return */
16223
16224         *volatile_virtual_size_p = volatile_virtual_size;
16225         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
16226         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
16227         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
16228         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
16229
16230         return KERN_SUCCESS;
16231 }
16232
16233 void
16234 vm_map_sizes(vm_map_t map,
16235                 vm_map_size_t * psize,
16236                 vm_map_size_t * pfree,
16237                 vm_map_size_t * plargest_free)
16238 {
16239     vm_map_entry_t  entry;
16240     vm_map_offset_t prev;
16241     vm_map_size_t   free, total_free, largest_free;
16242     boolean_t       end;
16243
16244     if (!map)
16245     {
16246         *psize = *pfree = *plargest_free = 0;
16247         return;
16248     }
16249     total_free = largest_free = 0;
16250
16251     vm_map_lock_read(map);
16252     if (psize) *psize = map->max_offset - map->min_offset;
16253
16254     prev = map->min_offset;
16255     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16256     {
16257         end = (entry == vm_map_to_entry(map));
16258
16259         if (end) free = entry->vme_end   - prev;
16260         else     free = entry->vme_start - prev;
16261
16262         total_free += free;
16263         if (free > largest_free) largest_free = free;
16264
16265         if (end) break;
16266         prev = entry->vme_end;
16267     }
16268     vm_map_unlock_read(map);
16269     if (pfree)         *pfree = total_free;
16270     if (plargest_free) *plargest_free = largest_free;
16271 }
16272
16273 #if VM_SCAN_FOR_SHADOW_CHAIN
16274 int vm_map_shadow_max(vm_map_t map);
16275 int vm_map_shadow_max(
16276         vm_map_t map)
16277 {
16278         int             shadows, shadows_max;
16279         vm_map_entry_t  entry;
16280         vm_object_t     object, next_object;
16281
16282         if (map == NULL)
16283                 return 0;
16284
16285         shadows_max = 0;
16286
16287         vm_map_lock_read(map);
16288
16289         for (entry = vm_map_first_entry(map);
16290              entry != vm_map_to_entry(map);
16291              entry = entry->vme_next) {
16292                 if (entry->is_sub_map) {
16293                         continue;
16294                 }
16295                 object = VME_OBJECT(entry);
16296                 if (object == NULL) {
16297                         continue;
16298                 }
16299                 vm_object_lock_shared(object);
16300                 for (shadows = 0;
16301                      object->shadow != NULL;
16302                      shadows++, object = next_object) {
16303                         next_object = object->shadow;
16304                         vm_object_lock_shared(next_object);
16305                         vm_object_unlock(object);
16306                 }
16307                 vm_object_unlock(object);
16308                 if (shadows > shadows_max) {
16309                         shadows_max = shadows;
16310                 }
16311         }
16312
16313         vm_map_unlock_read(map);
16314
16315         return shadows_max;
16316 }
16317 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
16318
16319 void vm_commit_pagezero_status(vm_map_t lmap) {
16320         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
16321 }