osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/kalloc.h>
  88 #include <kern/zalloc.h>
  89
  90 #include <vm/cpm.h>
  91 #include <vm/vm_compressor_pager.h>
  92 #include <vm/vm_init.h>
  93 #include <vm/vm_fault.h>
  94 #include <vm/vm_map.h>
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_page.h>
  97 #include <vm/vm_pageout.h>
  98 #include <vm/vm_kern.h>
  99 #include <ipc/ipc_port.h>
 100 #include <kern/sched_prim.h>
 101 #include <kern/misc_protos.h>
 102 #include <kern/xpr.h>
 103
 104 #include <mach/vm_map_server.h>
 105 #include <mach/mach_host_server.h>
 106 #include <vm/vm_protos.h>
 107 #include <vm/vm_purgeable_internal.h>
 108
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_shared_region.h>
 111 #include <vm/vm_map_store.h>
 112
 113 extern int proc_selfpid(void);
 114 extern char *proc_name_address(void *p);
 115
 116 #if VM_MAP_DEBUG_APPLE_PROTECT
 117 int vm_map_debug_apple_protect = 0;
 118 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 119 #if VM_MAP_DEBUG_FOURK
 120 int vm_map_debug_fourk = 0;
 121 #endif /* VM_MAP_DEBUG_FOURK */
 122
 123 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 124 /* Internal prototypes
 125  */
 126
 127 static void vm_map_simplify_range(
 128         vm_map_t        map,
 129         vm_map_offset_t start,
 130         vm_map_offset_t end);   /* forward */
 131
 132 static boolean_t        vm_map_range_check(
 133         vm_map_t        map,
 134         vm_map_offset_t start,
 135         vm_map_offset_t end,
 136         vm_map_entry_t  *entry);
 137
 138 static vm_map_entry_t   _vm_map_entry_create(
 139         struct vm_map_header    *map_header, boolean_t map_locked);
 140
 141 static void             _vm_map_entry_dispose(
 142         struct vm_map_header    *map_header,
 143         vm_map_entry_t          entry);
 144
 145 static void             vm_map_pmap_enter(
 146         vm_map_t                map,
 147         vm_map_offset_t         addr,
 148         vm_map_offset_t         end_addr,
 149         vm_object_t             object,
 150         vm_object_offset_t      offset,
 151         vm_prot_t               protection);
 152
 153 static void             _vm_map_clip_end(
 154         struct vm_map_header    *map_header,
 155         vm_map_entry_t          entry,
 156         vm_map_offset_t         end);
 157
 158 static void             _vm_map_clip_start(
 159         struct vm_map_header    *map_header,
 160         vm_map_entry_t          entry,
 161         vm_map_offset_t         start);
 162
 163 static void             vm_map_entry_delete(
 164         vm_map_t        map,
 165         vm_map_entry_t  entry);
 166
 167 static kern_return_t    vm_map_delete(
 168         vm_map_t        map,
 169         vm_map_offset_t start,
 170         vm_map_offset_t end,
 171         int             flags,
 172         vm_map_t        zap_map);
 173
 174 static kern_return_t    vm_map_copy_overwrite_unaligned(
 175         vm_map_t        dst_map,
 176         vm_map_entry_t  entry,
 177         vm_map_copy_t   copy,
 178         vm_map_address_t start,
 179         boolean_t       discard_on_success);
 180
 181 static kern_return_t    vm_map_copy_overwrite_aligned(
 182         vm_map_t        dst_map,
 183         vm_map_entry_t  tmp_entry,
 184         vm_map_copy_t   copy,
 185         vm_map_offset_t start,
 186         pmap_t          pmap);
 187
 188 static kern_return_t    vm_map_copyin_kernel_buffer(
 189         vm_map_t        src_map,
 190         vm_map_address_t src_addr,
 191         vm_map_size_t   len,
 192         boolean_t       src_destroy,
 193         vm_map_copy_t   *copy_result);  /* OUT */
 194
 195 static kern_return_t    vm_map_copyout_kernel_buffer(
 196         vm_map_t        map,
 197         vm_map_address_t *addr, /* IN/OUT */
 198         vm_map_copy_t   copy,
 199         vm_map_size_t   copy_size,
 200         boolean_t       overwrite,
 201         boolean_t       consume_on_success);
 202
 203 static void             vm_map_fork_share(
 204         vm_map_t        old_map,
 205         vm_map_entry_t  old_entry,
 206         vm_map_t        new_map);
 207
 208 static boolean_t        vm_map_fork_copy(
 209         vm_map_t        old_map,
 210         vm_map_entry_t  *old_entry_p,
 211         vm_map_t        new_map,
 212         int             vm_map_copyin_flags);
 213
 214 void            vm_map_region_top_walk(
 215         vm_map_entry_t             entry,
 216         vm_region_top_info_t       top);
 217
 218 void            vm_map_region_walk(
 219         vm_map_t                   map,
 220         vm_map_offset_t            va,
 221         vm_map_entry_t             entry,
 222         vm_object_offset_t         offset,
 223         vm_object_size_t           range,
 224         vm_region_extended_info_t  extended,
 225         boolean_t                  look_for_pages,
 226         mach_msg_type_number_t count);
 227
 228 static kern_return_t    vm_map_wire_nested(
 229         vm_map_t                   map,
 230         vm_map_offset_t            start,
 231         vm_map_offset_t            end,
 232         vm_prot_t                  caller_prot,
 233         boolean_t                  user_wire,
 234         pmap_t                     map_pmap,
 235         vm_map_offset_t            pmap_addr,
 236         ppnum_t                    *physpage_p);
 237
 238 static kern_return_t    vm_map_unwire_nested(
 239         vm_map_t                   map,
 240         vm_map_offset_t            start,
 241         vm_map_offset_t            end,
 242         boolean_t                  user_wire,
 243         pmap_t                     map_pmap,
 244         vm_map_offset_t            pmap_addr);
 245
 246 static kern_return_t    vm_map_overwrite_submap_recurse(
 247         vm_map_t                   dst_map,
 248         vm_map_offset_t            dst_addr,
 249         vm_map_size_t              dst_size);
 250
 251 static kern_return_t    vm_map_copy_overwrite_nested(
 252         vm_map_t                   dst_map,
 253         vm_map_offset_t            dst_addr,
 254         vm_map_copy_t              copy,
 255         boolean_t                  interruptible,
 256         pmap_t                     pmap,
 257         boolean_t                  discard_on_success);
 258
 259 static kern_return_t    vm_map_remap_extract(
 260         vm_map_t                map,
 261         vm_map_offset_t         addr,
 262         vm_map_size_t           size,
 263         boolean_t               copy,
 264         struct vm_map_header    *map_header,
 265         vm_prot_t               *cur_protection,
 266         vm_prot_t               *max_protection,
 267         vm_inherit_t            inheritance,
 268         boolean_t               pageable,
 269         boolean_t               same_map);
 270
 271 static kern_return_t    vm_map_remap_range_allocate(
 272         vm_map_t                map,
 273         vm_map_address_t        *address,
 274         vm_map_size_t           size,
 275         vm_map_offset_t         mask,
 276         int                     flags,
 277         vm_map_entry_t          *map_entry);
 278
 279 static void             vm_map_region_look_for_page(
 280         vm_map_t                   map,
 281         vm_map_offset_t            va,
 282         vm_object_t                object,
 283         vm_object_offset_t         offset,
 284         int                        max_refcnt,
 285         int                        depth,
 286         vm_region_extended_info_t  extended,
 287         mach_msg_type_number_t count);
 288
 289 static int              vm_map_region_count_obj_refs(
 290         vm_map_entry_t             entry,
 291         vm_object_t                object);
 292
 293
 294 static kern_return_t    vm_map_willneed(
 295         vm_map_t        map,
 296         vm_map_offset_t start,
 297         vm_map_offset_t end);
 298
 299 static kern_return_t    vm_map_reuse_pages(
 300         vm_map_t        map,
 301         vm_map_offset_t start,
 302         vm_map_offset_t end);
 303
 304 static kern_return_t    vm_map_reusable_pages(
 305         vm_map_t        map,
 306         vm_map_offset_t start,
 307         vm_map_offset_t end);
 308
 309 static kern_return_t    vm_map_can_reuse(
 310         vm_map_t        map,
 311         vm_map_offset_t start,
 312         vm_map_offset_t end);
 313
 314 #if MACH_ASSERT
 315 static kern_return_t    vm_map_pageout(
 316         vm_map_t        map,
 317         vm_map_offset_t start,
 318         vm_map_offset_t end);
 319 #endif /* MACH_ASSERT */
 320
 321 /*
 322  * Macros to copy a vm_map_entry. We must be careful to correctly
 323  * manage the wired page count. vm_map_entry_copy() creates a new
 324  * map entry to the same memory - the wired count in the new entry
 325  * must be set to zero. vm_map_entry_copy_full() creates a new
 326  * entry that is identical to the old entry.  This preserves the
 327  * wire count; it's used for map splitting and zone changing in
 328  * vm_map_copyout.
 329  */
 330
 331 #define vm_map_entry_copy(NEW,OLD)      \
 332 MACRO_BEGIN                             \
 333 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 334         *(NEW) = *(OLD);                \
 335         (NEW)->is_shared = FALSE;       \
 336         (NEW)->needs_wakeup = FALSE;    \
 337         (NEW)->in_transition = FALSE;   \
 338         (NEW)->wired_count = 0;         \
 339         (NEW)->user_wired_count = 0;    \
 340         (NEW)->permanent = FALSE;       \
 341         (NEW)->used_for_jit = FALSE;    \
 342         (NEW)->from_reserved_zone = _vmec_reserved;     \
 343         (NEW)->iokit_acct = FALSE;      \
 344         (NEW)->vme_resilient_codesign = FALSE; \
 345         (NEW)->vme_resilient_media = FALSE;     \
 346         (NEW)->vme_atomic = FALSE;      \
 347 MACRO_END
 348
 349 #define vm_map_entry_copy_full(NEW,OLD)                 \
 350 MACRO_BEGIN                                             \
 351 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 352 (*(NEW) = *(OLD));                                      \
 353 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 354 MACRO_END
 355
 356 /*
 357  *      Decide if we want to allow processes to execute from their data or stack areas.
 358  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 359  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 360  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 361  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 362  *      specific pmap files since the default behavior varies according to architecture.  The
 363  *      main reason it varies is because of the need to provide binary compatibility with old
 364  *      applications that were written before these restrictions came into being.  In the old
 365  *      days, an app could execute anything it could read, but this has slowly been tightened
 366  *      up over time.  The default behavior is:
 367  *
 368  *      32-bit PPC apps         may execute from both stack and data areas
 369  *      32-bit Intel apps       may exeucte from data areas but not stack
 370  *      64-bit PPC/Intel apps   may not execute from either data or stack
 371  *
 372  *      An application on any architecture may override these defaults by explicitly
 373  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 374  *      system call.  This code here just determines what happens when an app tries to
 375  *      execute from a page that lacks execute permission.
 376  *
 377  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 378  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 379  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 380  *      execution from data areas for a particular binary even if the arch normally permits it. As
 381  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 382  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 383  *      are not all NX-safe.
 384  */
 385
 386 extern int allow_data_exec, allow_stack_exec;
 387
 388 int
 389 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 390 {
 391         int current_abi;
 392
 393         if (map->pmap == kernel_pmap) return FALSE;
 394
 395         /*
 396          * Determine if the app is running in 32 or 64 bit mode.
 397          */
 398
 399         if (vm_map_is_64bit(map))
 400                 current_abi = VM_ABI_64;
 401         else
 402                 current_abi = VM_ABI_32;
 403
 404         /*
 405          * Determine if we should allow the execution based on whether it's a
 406          * stack or data area and the current architecture.
 407          */
 408
 409         if (user_tag == VM_MEMORY_STACK)
 410                 return allow_stack_exec & current_abi;
 411
 412         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 413 }
 414
 415
 416 /*
 417  *      Virtual memory maps provide for the mapping, protection,
 418  *      and sharing of virtual memory objects.  In addition,
 419  *      this module provides for an efficient virtual copy of
 420  *      memory from one map to another.
 421  *
 422  *      Synchronization is required prior to most operations.
 423  *
 424  *      Maps consist of an ordered doubly-linked list of simple
 425  *      entries; a single hint is used to speed up lookups.
 426  *
 427  *      Sharing maps have been deleted from this version of Mach.
 428  *      All shared objects are now mapped directly into the respective
 429  *      maps.  This requires a change in the copy on write strategy;
 430  *      the asymmetric (delayed) strategy is used for shared temporary
 431  *      objects instead of the symmetric (shadow) strategy.  All maps
 432  *      are now "top level" maps (either task map, kernel map or submap
 433  *      of the kernel map).
 434  *
 435  *      Since portions of maps are specified by start/end addreses,
 436  *      which may not align with existing map entries, all
 437  *      routines merely "clip" entries to these start/end values.
 438  *      [That is, an entry is split into two, bordering at a
 439  *      start or end value.]  Note that these clippings may not
 440  *      always be necessary (as the two resulting entries are then
 441  *      not changed); however, the clipping is done for convenience.
 442  *      No attempt is currently made to "glue back together" two
 443  *      abutting entries.
 444  *
 445  *      The symmetric (shadow) copy strategy implements virtual copy
 446  *      by copying VM object references from one map to
 447  *      another, and then marking both regions as copy-on-write.
 448  *      It is important to note that only one writeable reference
 449  *      to a VM object region exists in any map when this strategy
 450  *      is used -- this means that shadow object creation can be
 451  *      delayed until a write operation occurs.  The symmetric (delayed)
 452  *      strategy allows multiple maps to have writeable references to
 453  *      the same region of a vm object, and hence cannot delay creating
 454  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 455  *      Copying of permanent objects is completely different; see
 456  *      vm_object_copy_strategically() in vm_object.c.
 457  */
 458
 459 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 460 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 461 zone_t  vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 462                                          * allocations */
 463 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 464 zone_t          vm_map_holes_zone;      /* zone for vm map holes (vm_map_links) structures */
 465
 466
 467 /*
 468  *      Placeholder object for submap operations.  This object is dropped
 469  *      into the range by a call to vm_map_find, and removed when
 470  *      vm_map_submap creates the submap.
 471  */
 472
 473 vm_object_t     vm_submap_object;
 474
 475 static void             *map_data;
 476 static vm_size_t        map_data_size;
 477 static void             *kentry_data;
 478 static vm_size_t        kentry_data_size;
 479 static void             *map_holes_data;
 480 static vm_size_t        map_holes_data_size;
 481
 482 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 483
 484 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 485 unsigned int not_in_kdp = 1;
 486
 487 unsigned int vm_map_set_cache_attr_count = 0;
 488
 489 kern_return_t
 490 vm_map_set_cache_attr(
 491         vm_map_t        map,
 492         vm_map_offset_t va)
 493 {
 494         vm_map_entry_t  map_entry;
 495         vm_object_t     object;
 496         kern_return_t   kr = KERN_SUCCESS;
 497
 498         vm_map_lock_read(map);
 499
 500         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 501             map_entry->is_sub_map) {
 502                 /*
 503                  * that memory is not properly mapped
 504                  */
 505                 kr = KERN_INVALID_ARGUMENT;
 506                 goto done;
 507         }
 508         object = VME_OBJECT(map_entry);
 509
 510         if (object == VM_OBJECT_NULL) {
 511                 /*
 512                  * there should be a VM object here at this point
 513                  */
 514                 kr = KERN_INVALID_ARGUMENT;
 515                 goto done;
 516         }
 517         vm_object_lock(object);
 518         object->set_cache_attr = TRUE;
 519         vm_object_unlock(object);
 520
 521         vm_map_set_cache_attr_count++;
 522 done:
 523         vm_map_unlock_read(map);
 524
 525         return kr;
 526 }
 527
 528
 529 #if CONFIG_CODE_DECRYPTION
 530 /*
 531  * vm_map_apple_protected:
 532  * This remaps the requested part of the object with an object backed by
 533  * the decrypting pager.
 534  * crypt_info contains entry points and session data for the crypt module.
 535  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 536  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 537  */
 538 kern_return_t
 539 vm_map_apple_protected(
 540         vm_map_t                map,
 541         vm_map_offset_t         start,
 542         vm_map_offset_t         end,
 543         vm_object_offset_t      crypto_backing_offset,
 544         struct pager_crypt_info *crypt_info)
 545 {
 546         boolean_t       map_locked;
 547         kern_return_t   kr;
 548         vm_map_entry_t  map_entry;
 549         struct vm_map_entry tmp_entry;
 550         memory_object_t unprotected_mem_obj;
 551         vm_object_t     protected_object;
 552         vm_map_offset_t map_addr;
 553         vm_map_offset_t start_aligned, end_aligned;
 554         vm_object_offset_t      crypto_start, crypto_end;
 555         int             vm_flags;
 556
 557         map_locked = FALSE;
 558         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 559
 560         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 561         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 562         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 563         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 564
 565         assert(start_aligned == start);
 566         assert(end_aligned == end);
 567
 568         map_addr = start_aligned;
 569         for (map_addr = start_aligned;
 570              map_addr < end;
 571              map_addr = tmp_entry.vme_end) {
 572                 vm_map_lock(map);
 573                 map_locked = TRUE;
 574
 575                 /* lookup the protected VM object */
 576                 if (!vm_map_lookup_entry(map,
 577                                          map_addr,
 578                                          &map_entry) ||
 579                     map_entry->is_sub_map ||
 580                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 581                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 582                         /* that memory is not properly mapped */
 583                         kr = KERN_INVALID_ARGUMENT;
 584                         goto done;
 585                 }
 586
 587                 /* get the protected object to be decrypted */
 588                 protected_object = VME_OBJECT(map_entry);
 589                 if (protected_object == VM_OBJECT_NULL) {
 590                         /* there should be a VM object here at this point */
 591                         kr = KERN_INVALID_ARGUMENT;
 592                         goto done;
 593                 }
 594                 /* ensure protected object stays alive while map is unlocked */
 595                 vm_object_reference(protected_object);
 596
 597                 /* limit the map entry to the area we want to cover */
 598                 vm_map_clip_start(map, map_entry, start_aligned);
 599                 vm_map_clip_end(map, map_entry, end_aligned);
 600
 601                 tmp_entry = *map_entry;
 602                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 603                 vm_map_unlock(map);
 604                 map_locked = FALSE;
 605
 606                 /*
 607                  * This map entry might be only partially encrypted
 608                  * (if not fully "page-aligned").
 609                  */
 610                 crypto_start = 0;
 611                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 612                 if (tmp_entry.vme_start < start) {
 613                         if (tmp_entry.vme_start != start_aligned) {
 614                                 kr = KERN_INVALID_ADDRESS;
 615                         }
 616                         crypto_start += (start - tmp_entry.vme_start);
 617                 }
 618                 if (tmp_entry.vme_end > end) {
 619                         if (tmp_entry.vme_end != end_aligned) {
 620                                 kr = KERN_INVALID_ADDRESS;
 621                         }
 622                         crypto_end -= (tmp_entry.vme_end - end);
 623                 }
 624
 625                 /*
 626                  * This "extra backing offset" is needed to get the decryption
 627                  * routine to use the right key.  It adjusts for the possibly
 628                  * relative offset of an interposed "4K" pager...
 629                  */
 630                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 631                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 632                 }
 633
 634                 /*
 635                  * Lookup (and create if necessary) the protected memory object
 636                  * matching that VM object.
 637                  * If successful, this also grabs a reference on the memory object,
 638                  * to guarantee that it doesn't go away before we get a chance to map
 639                  * it.
 640                  */
 641                 unprotected_mem_obj = apple_protect_pager_setup(
 642                         protected_object,
 643                         VME_OFFSET(&tmp_entry),
 644                         crypto_backing_offset,
 645                         crypt_info,
 646                         crypto_start,
 647                         crypto_end);
 648
 649                 /* release extra ref on protected object */
 650                 vm_object_deallocate(protected_object);
 651
 652                 if (unprotected_mem_obj == NULL) {
 653                         kr = KERN_FAILURE;
 654                         goto done;
 655                 }
 656
 657                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 658
 659                 /* map this memory object in place of the current one */
 660                 map_addr = tmp_entry.vme_start;
 661                 kr = vm_map_enter_mem_object(map,
 662                                              &map_addr,
 663                                              (tmp_entry.vme_end -
 664                                               tmp_entry.vme_start),
 665                                              (mach_vm_offset_t) 0,
 666                                              vm_flags,
 667                                              (ipc_port_t) unprotected_mem_obj,
 668                                              0,
 669                                              TRUE,
 670                                              tmp_entry.protection,
 671                                              tmp_entry.max_protection,
 672                                              tmp_entry.inheritance);
 673                 assert(kr == KERN_SUCCESS);
 674                 assert(map_addr == tmp_entry.vme_start);
 675
 676 #if VM_MAP_DEBUG_APPLE_PROTECT
 677                 if (vm_map_debug_apple_protect) {
 678                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 679                                " backing:[object:%p,offset:0x%llx,"
 680                                "crypto_backing_offset:0x%llx,"
 681                                "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 682                                map,
 683                                (uint64_t) map_addr,
 684                                (uint64_t) (map_addr + (tmp_entry.vme_end -
 685                                                        tmp_entry.vme_start)),
 686                                unprotected_mem_obj,
 687                                protected_object,
 688                                VME_OFFSET(&tmp_entry),
 689                                crypto_backing_offset,
 690                                crypto_start,
 691                                crypto_end);
 692                 }
 693 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 694
 695                 /*
 696                  * Release the reference obtained by
 697                  * apple_protect_pager_setup().
 698                  * The mapping (if it succeeded) is now holding a reference on
 699                  * the memory object.
 700                  */
 701                 memory_object_deallocate(unprotected_mem_obj);
 702                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 703
 704                 /* continue with next map entry */
 705                 crypto_backing_offset += (tmp_entry.vme_end -
 706                                           tmp_entry.vme_start);
 707                 crypto_backing_offset -= crypto_start;
 708         }
 709         kr = KERN_SUCCESS;
 710
 711 done:
 712         if (map_locked) {
 713                 vm_map_unlock(map);
 714         }
 715         return kr;
 716 }
 717 #endif  /* CONFIG_CODE_DECRYPTION */
 718
 719
 720 lck_grp_t               vm_map_lck_grp;
 721 lck_grp_attr_t  vm_map_lck_grp_attr;
 722 lck_attr_t              vm_map_lck_attr;
 723 lck_attr_t              vm_map_lck_rw_attr;
 724
 725
 726 /*
 727  *      vm_map_init:
 728  *
 729  *      Initialize the vm_map module.  Must be called before
 730  *      any other vm_map routines.
 731  *
 732  *      Map and entry structures are allocated from zones -- we must
 733  *      initialize those zones.
 734  *
 735  *      There are three zones of interest:
 736  *
 737  *      vm_map_zone:            used to allocate maps.
 738  *      vm_map_entry_zone:      used to allocate map entries.
 739  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 740  *
 741  *      The kernel allocates map entries from a special zone that is initially
 742  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 743  *      the kernel to allocate more memory to a entry zone when it became
 744  *      empty since the very act of allocating memory implies the creation
 745  *      of a new entry.
 746  */
 747 void
 748 vm_map_init(
 749         void)
 750 {
 751         vm_size_t entry_zone_alloc_size;
 752         const char *mez_name = "VM map entries";
 753
 754         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 755                             PAGE_SIZE, "maps");
 756         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 757 #if     defined(__LP64__)
 758         entry_zone_alloc_size = PAGE_SIZE * 5;
 759 #else
 760         entry_zone_alloc_size = PAGE_SIZE * 6;
 761 #endif
 762         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 763                                   1024*1024, entry_zone_alloc_size,
 764                                   mez_name);
 765         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 766         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 767         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 768
 769         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 770                                    kentry_data_size * 64, kentry_data_size,
 771                                    "Reserved VM map entries");
 772         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 773
 774         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 775                                  16*1024, PAGE_SIZE, "VM map copies");
 776         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 777
 778         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 779                                  16*1024, PAGE_SIZE, "VM map holes");
 780         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 781
 782         /*
 783          *      Cram the map and kentry zones with initial data.
 784          *      Set reserved_zone non-collectible to aid zone_gc().
 785          */
 786         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 787         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 788
 789         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 790         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 791         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 792         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 793         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 794         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 795         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 796
 797         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 798         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 799         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 800         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 801         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 802         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 803
 804         /*
 805          * Add the stolen memory to zones, adjust zone size and stolen counts.
 806          */
 807         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 808         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 809         zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
 810         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 811
 812         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 813         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 814         lck_attr_setdefault(&vm_map_lck_attr);
 815
 816         lck_attr_setdefault(&vm_map_lck_rw_attr);
 817         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 818
 819 #if VM_MAP_DEBUG_APPLE_PROTECT
 820         PE_parse_boot_argn("vm_map_debug_apple_protect",
 821                            &vm_map_debug_apple_protect,
 822                            sizeof(vm_map_debug_apple_protect));
 823 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 824 #if VM_MAP_DEBUG_APPLE_FOURK
 825         PE_parse_boot_argn("vm_map_debug_fourk",
 826                            &vm_map_debug_fourk,
 827                            sizeof(vm_map_debug_fourk));
 828 #endif /* VM_MAP_DEBUG_FOURK */
 829 }
 830
 831 void
 832 vm_map_steal_memory(
 833         void)
 834 {
 835         uint32_t kentry_initial_pages;
 836
 837         map_data_size = round_page(10 * sizeof(struct _vm_map));
 838         map_data = pmap_steal_memory(map_data_size);
 839
 840         /*
 841          * kentry_initial_pages corresponds to the number of kernel map entries
 842          * required during bootstrap until the asynchronous replenishment
 843          * scheme is activated and/or entries are available from the general
 844          * map entry pool.
 845          */
 846 #if     defined(__LP64__)
 847         kentry_initial_pages = 10;
 848 #else
 849         kentry_initial_pages = 6;
 850 #endif
 851
 852 #if CONFIG_GZALLOC
 853         /* If using the guard allocator, reserve more memory for the kernel
 854          * reserved map entry pool.
 855         */
 856         if (gzalloc_enabled())
 857                 kentry_initial_pages *= 1024;
 858 #endif
 859
 860         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 861         kentry_data = pmap_steal_memory(kentry_data_size);
 862
 863         map_holes_data_size = kentry_data_size;
 864         map_holes_data = pmap_steal_memory(map_holes_data_size);
 865 }
 866
 867 void
 868 vm_kernel_reserved_entry_init(void) {
 869         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 870         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
 871 }
 872
 873 void
 874 vm_map_disable_hole_optimization(vm_map_t map)
 875 {
 876         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
 877
 878         if (map->holelistenabled) {
 879
 880                 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
 881
 882                 while (hole_entry != NULL) {
 883
 884                         next_hole_entry = hole_entry->vme_next;
 885
 886                         hole_entry->vme_next = NULL;
 887                         hole_entry->vme_prev = NULL;
 888                         zfree(vm_map_holes_zone, hole_entry);
 889
 890                         if (next_hole_entry == head_entry) {
 891                                 hole_entry = NULL;
 892                         } else {
 893                                 hole_entry = next_hole_entry;
 894                         }
 895                 }
 896
 897                 map->holes_list = NULL;
 898                 map->holelistenabled = FALSE;
 899
 900                 map->first_free = vm_map_first_entry(map);
 901                 SAVE_HINT_HOLE_WRITE(map, NULL);
 902         }
 903 }
 904
 905 boolean_t
 906 vm_kernel_map_is_kernel(vm_map_t map) {
 907         return (map->pmap == kernel_pmap);
 908 }
 909
 910 /*
 911  *      vm_map_create:
 912  *
 913  *      Creates and returns a new empty VM map with
 914  *      the given physical map structure, and having
 915  *      the given lower and upper address bounds.
 916  */
 917
 918 boolean_t vm_map_supports_hole_optimization = TRUE;
 919
 920 vm_map_t
 921 vm_map_create(
 922         pmap_t                  pmap,
 923         vm_map_offset_t min,
 924         vm_map_offset_t max,
 925         boolean_t               pageable)
 926 {
 927         static int              color_seed = 0;
 928         vm_map_t        result;
 929         struct vm_map_links     *hole_entry = NULL;
 930
 931         result = (vm_map_t) zalloc(vm_map_zone);
 932         if (result == VM_MAP_NULL)
 933                 panic("vm_map_create");
 934
 935         vm_map_first_entry(result) = vm_map_to_entry(result);
 936         vm_map_last_entry(result)  = vm_map_to_entry(result);
 937         result->hdr.nentries = 0;
 938         result->hdr.entries_pageable = pageable;
 939
 940         vm_map_store_init( &(result->hdr) );
 941
 942         result->hdr.page_shift = PAGE_SHIFT;
 943
 944         result->size = 0;
 945         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 946         result->user_wire_size  = 0;
 947         result->ref_count = 1;
 948 #if     TASK_SWAPPER
 949         result->res_count = 1;
 950         result->sw_state = MAP_SW_IN;
 951 #endif  /* TASK_SWAPPER */
 952         result->pmap = pmap;
 953         result->min_offset = min;
 954         result->max_offset = max;
 955         result->wiring_required = FALSE;
 956         result->no_zero_fill = FALSE;
 957         result->mapped_in_other_pmaps = FALSE;
 958         result->wait_for_space = FALSE;
 959         result->switch_protect = FALSE;
 960         result->disable_vmentry_reuse = FALSE;
 961         result->map_disallow_data_exec = FALSE;
 962         result->is_nested_map = FALSE;
 963         result->highest_entry_end = 0;
 964         result->first_free = vm_map_to_entry(result);
 965         result->hint = vm_map_to_entry(result);
 966         result->color_rr = (color_seed++) & vm_color_mask;
 967         result->jit_entry_exists = FALSE;
 968
 969         if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
 970                 hole_entry = zalloc(vm_map_holes_zone);
 971
 972                 hole_entry->start = min;
 973                 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
 974                 result->holes_list = result->hole_hint = hole_entry;
 975                 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
 976                 result->holelistenabled = TRUE;
 977
 978         } else {
 979
 980                 result->holelistenabled = FALSE;
 981         }
 982
 983         vm_map_lock_init(result);
 984         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 985
 986         return(result);
 987 }
 988
 989 /*
 990  *      vm_map_entry_create:    [ internal use only ]
 991  *
 992  *      Allocates a VM map entry for insertion in the
 993  *      given map (or map copy).  No fields are filled.
 994  */
 995 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 996
 997 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 998         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 999 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1000
1001 static vm_map_entry_t
1002 _vm_map_entry_create(
1003         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1004 {
1005         zone_t  zone;
1006         vm_map_entry_t  entry;
1007
1008         zone = vm_map_entry_zone;
1009
1010         assert(map_header->entries_pageable ? !map_locked : TRUE);
1011
1012         if (map_header->entries_pageable) {
1013                 entry = (vm_map_entry_t) zalloc(zone);
1014         }
1015         else {
1016                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1017
1018                 if (entry == VM_MAP_ENTRY_NULL) {
1019                         zone = vm_map_entry_reserved_zone;
1020                         entry = (vm_map_entry_t) zalloc(zone);
1021                         OSAddAtomic(1, &reserved_zalloc_count);
1022                 } else
1023                         OSAddAtomic(1, &nonreserved_zalloc_count);
1024         }
1025
1026         if (entry == VM_MAP_ENTRY_NULL)
1027                 panic("vm_map_entry_create");
1028         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1029
1030         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1031 #if     MAP_ENTRY_CREATION_DEBUG
1032         entry->vme_creation_maphdr = map_header;
1033         backtrace(&entry->vme_creation_bt[0],
1034                   (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1035 #endif
1036         return(entry);
1037 }
1038
1039 /*
1040  *      vm_map_entry_dispose:   [ internal use only ]
1041  *
1042  *      Inverse of vm_map_entry_create.
1043  *
1044  *      write map lock held so no need to
1045  *      do anything special to insure correctness
1046  *      of the stores
1047  */
1048 #define vm_map_entry_dispose(map, entry)                        \
1049         _vm_map_entry_dispose(&(map)->hdr, (entry))
1050
1051 #define vm_map_copy_entry_dispose(map, entry) \
1052         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1053
1054 static void
1055 _vm_map_entry_dispose(
1056         struct vm_map_header    *map_header,
1057         vm_map_entry_t          entry)
1058 {
1059         zone_t          zone;
1060
1061         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1062                 zone = vm_map_entry_zone;
1063         else
1064                 zone = vm_map_entry_reserved_zone;
1065
1066         if (!map_header->entries_pageable) {
1067                 if (zone == vm_map_entry_zone)
1068                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1069                 else
1070                         OSAddAtomic(-1, &reserved_zalloc_count);
1071         }
1072
1073         zfree(zone, entry);
1074 }
1075
1076 #if MACH_ASSERT
1077 static boolean_t first_free_check = FALSE;
1078 boolean_t
1079 first_free_is_valid(
1080         vm_map_t        map)
1081 {
1082         if (!first_free_check)
1083                 return TRUE;
1084
1085         return( first_free_is_valid_store( map ));
1086 }
1087 #endif /* MACH_ASSERT */
1088
1089
1090 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1091         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1092
1093 #define vm_map_copy_entry_unlink(copy, entry)                           \
1094         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1095
1096 #if     MACH_ASSERT && TASK_SWAPPER
1097 /*
1098  *      vm_map_res_reference:
1099  *
1100  *      Adds another valid residence count to the given map.
1101  *
1102  *      Map is locked so this function can be called from
1103  *      vm_map_swapin.
1104  *
1105  */
1106 void vm_map_res_reference(vm_map_t map)
1107 {
1108         /* assert map is locked */
1109         assert(map->res_count >= 0);
1110         assert(map->ref_count >= map->res_count);
1111         if (map->res_count == 0) {
1112                 lck_mtx_unlock(&map->s_lock);
1113                 vm_map_lock(map);
1114                 vm_map_swapin(map);
1115                 lck_mtx_lock(&map->s_lock);
1116                 ++map->res_count;
1117                 vm_map_unlock(map);
1118         } else
1119                 ++map->res_count;
1120 }
1121
1122 /*
1123  *      vm_map_reference_swap:
1124  *
1125  *      Adds valid reference and residence counts to the given map.
1126  *
1127  *      The map may not be in memory (i.e. zero residence count).
1128  *
1129  */
1130 void vm_map_reference_swap(vm_map_t map)
1131 {
1132         assert(map != VM_MAP_NULL);
1133         lck_mtx_lock(&map->s_lock);
1134         assert(map->res_count >= 0);
1135         assert(map->ref_count >= map->res_count);
1136         map->ref_count++;
1137         vm_map_res_reference(map);
1138         lck_mtx_unlock(&map->s_lock);
1139 }
1140
1141 /*
1142  *      vm_map_res_deallocate:
1143  *
1144  *      Decrement residence count on a map; possibly causing swapout.
1145  *
1146  *      The map must be in memory (i.e. non-zero residence count).
1147  *
1148  *      The map is locked, so this function is callable from vm_map_deallocate.
1149  *
1150  */
1151 void vm_map_res_deallocate(vm_map_t map)
1152 {
1153         assert(map->res_count > 0);
1154         if (--map->res_count == 0) {
1155                 lck_mtx_unlock(&map->s_lock);
1156                 vm_map_lock(map);
1157                 vm_map_swapout(map);
1158                 vm_map_unlock(map);
1159                 lck_mtx_lock(&map->s_lock);
1160         }
1161         assert(map->ref_count >= map->res_count);
1162 }
1163 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1164
1165 /*
1166  *      vm_map_destroy:
1167  *
1168  *      Actually destroy a map.
1169  */
1170 void
1171 vm_map_destroy(
1172         vm_map_t        map,
1173         int             flags)
1174 {
1175         vm_map_lock(map);
1176
1177         /* final cleanup: no need to unnest shared region */
1178         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1179
1180         /* clean up regular map entries */
1181         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1182                              flags, VM_MAP_NULL);
1183         /* clean up leftover special mappings (commpage, etc...) */
1184         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1185                              flags, VM_MAP_NULL);
1186
1187         vm_map_disable_hole_optimization(map);
1188         vm_map_unlock(map);
1189
1190         assert(map->hdr.nentries == 0);
1191
1192         if(map->pmap)
1193                 pmap_destroy(map->pmap);
1194
1195         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1196                 /*
1197                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1198                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1199                  * structure or kalloc'ed via lck_mtx_init.
1200                  * An example is s_lock_ext within struct _vm_map.
1201                  *
1202                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1203                  * can add another tag to detect embedded vs alloc'ed indirect external
1204                  * mutexes but that'll be additional checks in the lock path and require
1205                  * updating dependencies for the old vs new tag.
1206                  *
1207                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1208                  * just when lock debugging is ON, we choose to forego explicitly destroying
1209                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1210                  * count on vm_map_lck_grp, which has no serious side-effect.
1211                  */
1212         } else {
1213                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1214                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1215         }
1216
1217         zfree(vm_map_zone, map);
1218 }
1219
1220 #if     TASK_SWAPPER
1221 /*
1222  * vm_map_swapin/vm_map_swapout
1223  *
1224  * Swap a map in and out, either referencing or releasing its resources.
1225  * These functions are internal use only; however, they must be exported
1226  * because they may be called from macros, which are exported.
1227  *
1228  * In the case of swapout, there could be races on the residence count,
1229  * so if the residence count is up, we return, assuming that a
1230  * vm_map_deallocate() call in the near future will bring us back.
1231  *
1232  * Locking:
1233  *      -- We use the map write lock for synchronization among races.
1234  *      -- The map write lock, and not the simple s_lock, protects the
1235  *         swap state of the map.
1236  *      -- If a map entry is a share map, then we hold both locks, in
1237  *         hierarchical order.
1238  *
1239  * Synchronization Notes:
1240  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1241  *      will block on the map lock and proceed when swapout is through.
1242  *      2) A vm_map_reference() call at this time is illegal, and will
1243  *      cause a panic.  vm_map_reference() is only allowed on resident
1244  *      maps, since it refuses to block.
1245  *      3) A vm_map_swapin() call during a swapin will block, and
1246  *      proceeed when the first swapin is done, turning into a nop.
1247  *      This is the reason the res_count is not incremented until
1248  *      after the swapin is complete.
1249  *      4) There is a timing hole after the checks of the res_count, before
1250  *      the map lock is taken, during which a swapin may get the lock
1251  *      before a swapout about to happen.  If this happens, the swapin
1252  *      will detect the state and increment the reference count, causing
1253  *      the swapout to be a nop, thereby delaying it until a later
1254  *      vm_map_deallocate.  If the swapout gets the lock first, then
1255  *      the swapin will simply block until the swapout is done, and
1256  *      then proceed.
1257  *
1258  * Because vm_map_swapin() is potentially an expensive operation, it
1259  * should be used with caution.
1260  *
1261  * Invariants:
1262  *      1) A map with a residence count of zero is either swapped, or
1263  *         being swapped.
1264  *      2) A map with a non-zero residence count is either resident,
1265  *         or being swapped in.
1266  */
1267
1268 int vm_map_swap_enable = 1;
1269
1270 void vm_map_swapin (vm_map_t map)
1271 {
1272         vm_map_entry_t entry;
1273
1274         if (!vm_map_swap_enable)        /* debug */
1275                 return;
1276
1277         /*
1278          * Map is locked
1279          * First deal with various races.
1280          */
1281         if (map->sw_state == MAP_SW_IN)
1282                 /*
1283                  * we raced with swapout and won.  Returning will incr.
1284                  * the res_count, turning the swapout into a nop.
1285                  */
1286                 return;
1287
1288         /*
1289          * The residence count must be zero.  If we raced with another
1290          * swapin, the state would have been IN; if we raced with a
1291          * swapout (after another competing swapin), we must have lost
1292          * the race to get here (see above comment), in which case
1293          * res_count is still 0.
1294          */
1295         assert(map->res_count == 0);
1296
1297         /*
1298          * There are no intermediate states of a map going out or
1299          * coming in, since the map is locked during the transition.
1300          */
1301         assert(map->sw_state == MAP_SW_OUT);
1302
1303         /*
1304          * We now operate upon each map entry.  If the entry is a sub-
1305          * or share-map, we call vm_map_res_reference upon it.
1306          * If the entry is an object, we call vm_object_res_reference
1307          * (this may iterate through the shadow chain).
1308          * Note that we hold the map locked the entire time,
1309          * even if we get back here via a recursive call in
1310          * vm_map_res_reference.
1311          */
1312         entry = vm_map_first_entry(map);
1313
1314         while (entry != vm_map_to_entry(map)) {
1315                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1316                         if (entry->is_sub_map) {
1317                                 vm_map_t lmap = VME_SUBMAP(entry);
1318                                 lck_mtx_lock(&lmap->s_lock);
1319                                 vm_map_res_reference(lmap);
1320                                 lck_mtx_unlock(&lmap->s_lock);
1321                         } else {
1322                                 vm_object_t object = VME_OBEJCT(entry);
1323                                 vm_object_lock(object);
1324                                 /*
1325                                  * This call may iterate through the
1326                                  * shadow chain.
1327                                  */
1328                                 vm_object_res_reference(object);
1329                                 vm_object_unlock(object);
1330                         }
1331                 }
1332                 entry = entry->vme_next;
1333         }
1334         assert(map->sw_state == MAP_SW_OUT);
1335         map->sw_state = MAP_SW_IN;
1336 }
1337
1338 void vm_map_swapout(vm_map_t map)
1339 {
1340         vm_map_entry_t entry;
1341
1342         /*
1343          * Map is locked
1344          * First deal with various races.
1345          * If we raced with a swapin and lost, the residence count
1346          * will have been incremented to 1, and we simply return.
1347          */
1348         lck_mtx_lock(&map->s_lock);
1349         if (map->res_count != 0) {
1350                 lck_mtx_unlock(&map->s_lock);
1351                 return;
1352         }
1353         lck_mtx_unlock(&map->s_lock);
1354
1355         /*
1356          * There are no intermediate states of a map going out or
1357          * coming in, since the map is locked during the transition.
1358          */
1359         assert(map->sw_state == MAP_SW_IN);
1360
1361         if (!vm_map_swap_enable)
1362                 return;
1363
1364         /*
1365          * We now operate upon each map entry.  If the entry is a sub-
1366          * or share-map, we call vm_map_res_deallocate upon it.
1367          * If the entry is an object, we call vm_object_res_deallocate
1368          * (this may iterate through the shadow chain).
1369          * Note that we hold the map locked the entire time,
1370          * even if we get back here via a recursive call in
1371          * vm_map_res_deallocate.
1372          */
1373         entry = vm_map_first_entry(map);
1374
1375         while (entry != vm_map_to_entry(map)) {
1376                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1377                         if (entry->is_sub_map) {
1378                                 vm_map_t lmap = VME_SUBMAP(entry);
1379                                 lck_mtx_lock(&lmap->s_lock);
1380                                 vm_map_res_deallocate(lmap);
1381                                 lck_mtx_unlock(&lmap->s_lock);
1382                         } else {
1383                                 vm_object_t object = VME_OBJECT(entry);
1384                                 vm_object_lock(object);
1385                                 /*
1386                                  * This call may take a long time,
1387                                  * since it could actively push
1388                                  * out pages (if we implement it
1389                                  * that way).
1390                                  */
1391                                 vm_object_res_deallocate(object);
1392                                 vm_object_unlock(object);
1393                         }
1394                 }
1395                 entry = entry->vme_next;
1396         }
1397         assert(map->sw_state == MAP_SW_IN);
1398         map->sw_state = MAP_SW_OUT;
1399 }
1400
1401 #endif  /* TASK_SWAPPER */
1402
1403 /*
1404  *      vm_map_lookup_entry:    [ internal use only ]
1405  *
1406  *      Calls into the vm map store layer to find the map
1407  *      entry containing (or immediately preceding) the
1408  *      specified address in the given map; the entry is returned
1409  *      in the "entry" parameter.  The boolean
1410  *      result indicates whether the address is
1411  *      actually contained in the map.
1412  */
1413 boolean_t
1414 vm_map_lookup_entry(
1415         vm_map_t                map,
1416         vm_map_offset_t address,
1417         vm_map_entry_t          *entry)         /* OUT */
1418 {
1419         return ( vm_map_store_lookup_entry( map, address, entry ));
1420 }
1421
1422 /*
1423  *      Routine:        vm_map_find_space
1424  *      Purpose:
1425  *              Allocate a range in the specified virtual address map,
1426  *              returning the entry allocated for that range.
1427  *              Used by kmem_alloc, etc.
1428  *
1429  *              The map must be NOT be locked. It will be returned locked
1430  *              on KERN_SUCCESS, unlocked on failure.
1431  *
1432  *              If an entry is allocated, the object/offset fields
1433  *              are initialized to zero.
1434  */
1435 kern_return_t
1436 vm_map_find_space(
1437         vm_map_t        map,
1438         vm_map_offset_t         *address,       /* OUT */
1439         vm_map_size_t           size,
1440         vm_map_offset_t         mask,
1441         int                     flags,
1442         vm_map_entry_t          *o_entry)       /* OUT */
1443 {
1444         vm_map_entry_t                  entry, new_entry;
1445         vm_map_offset_t start;
1446         vm_map_offset_t end;
1447         vm_map_entry_t                  hole_entry;
1448
1449         if (size == 0) {
1450                 *address = 0;
1451                 return KERN_INVALID_ARGUMENT;
1452         }
1453
1454         if (flags & VM_FLAGS_GUARD_AFTER) {
1455                 /* account for the back guard page in the size */
1456                 size += VM_MAP_PAGE_SIZE(map);
1457         }
1458
1459         new_entry = vm_map_entry_create(map, FALSE);
1460
1461         /*
1462          *      Look for the first possible address; if there's already
1463          *      something at this address, we have to start after it.
1464          */
1465
1466         vm_map_lock(map);
1467
1468         if( map->disable_vmentry_reuse == TRUE) {
1469                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1470         } else {
1471                 if (map->holelistenabled) {
1472                         hole_entry = (vm_map_entry_t)map->holes_list;
1473
1474                         if (hole_entry == NULL) {
1475                                 /*
1476                                  * No more space in the map?
1477                                  */
1478                                 vm_map_entry_dispose(map, new_entry);
1479                                 vm_map_unlock(map);
1480                                 return(KERN_NO_SPACE);
1481                         }
1482
1483                         entry = hole_entry;
1484                         start = entry->vme_start;
1485                 } else {
1486                         assert(first_free_is_valid(map));
1487                         if ((entry = map->first_free) == vm_map_to_entry(map))
1488                                 start = map->min_offset;
1489                         else
1490                                 start = entry->vme_end;
1491                 }
1492         }
1493
1494         /*
1495          *      In any case, the "entry" always precedes
1496          *      the proposed new region throughout the loop:
1497          */
1498
1499         while (TRUE) {
1500                 vm_map_entry_t  next;
1501
1502                 /*
1503                  *      Find the end of the proposed new region.
1504                  *      Be sure we didn't go beyond the end, or
1505                  *      wrap around the address.
1506                  */
1507
1508                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1509                         /* reserve space for the front guard page */
1510                         start += VM_MAP_PAGE_SIZE(map);
1511                 }
1512                 end = ((start + mask) & ~mask);
1513
1514                 if (end < start) {
1515                         vm_map_entry_dispose(map, new_entry);
1516                         vm_map_unlock(map);
1517                         return(KERN_NO_SPACE);
1518                 }
1519                 start = end;
1520                 end += size;
1521
1522                 if ((end > map->max_offset) || (end < start)) {
1523                         vm_map_entry_dispose(map, new_entry);
1524                         vm_map_unlock(map);
1525                         return(KERN_NO_SPACE);
1526                 }
1527
1528                 next = entry->vme_next;
1529
1530                 if (map->holelistenabled) {
1531                         if (entry->vme_end >= end)
1532                                 break;
1533                 } else {
1534                         /*
1535                          *      If there are no more entries, we must win.
1536                          *
1537                          *      OR
1538                          *
1539                          *      If there is another entry, it must be
1540                          *      after the end of the potential new region.
1541                          */
1542
1543                         if (next == vm_map_to_entry(map))
1544                                 break;
1545
1546                         if (next->vme_start >= end)
1547                                 break;
1548                 }
1549
1550                 /*
1551                  *      Didn't fit -- move to the next entry.
1552                  */
1553
1554                 entry = next;
1555
1556                 if (map->holelistenabled) {
1557                         if (entry == (vm_map_entry_t) map->holes_list) {
1558                                 /*
1559                                  * Wrapped around
1560                                  */
1561                                 vm_map_entry_dispose(map, new_entry);
1562                                 vm_map_unlock(map);
1563                                 return(KERN_NO_SPACE);
1564                         }
1565                         start = entry->vme_start;
1566                 } else {
1567                         start = entry->vme_end;
1568                 }
1569         }
1570
1571         if (map->holelistenabled) {
1572                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1573                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1574                 }
1575         }
1576
1577         /*
1578          *      At this point,
1579          *              "start" and "end" should define the endpoints of the
1580          *                      available new range, and
1581          *              "entry" should refer to the region before the new
1582          *                      range, and
1583          *
1584          *              the map should be locked.
1585          */
1586
1587         if (flags & VM_FLAGS_GUARD_BEFORE) {
1588                 /* go back for the front guard page */
1589                 start -= VM_MAP_PAGE_SIZE(map);
1590         }
1591         *address = start;
1592
1593         assert(start < end);
1594         new_entry->vme_start = start;
1595         new_entry->vme_end = end;
1596         assert(page_aligned(new_entry->vme_start));
1597         assert(page_aligned(new_entry->vme_end));
1598         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1599                                    VM_MAP_PAGE_MASK(map)));
1600         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1601                                    VM_MAP_PAGE_MASK(map)));
1602
1603         new_entry->is_shared = FALSE;
1604         new_entry->is_sub_map = FALSE;
1605         new_entry->use_pmap = TRUE;
1606         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1607         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1608
1609         new_entry->needs_copy = FALSE;
1610
1611         new_entry->inheritance = VM_INHERIT_DEFAULT;
1612         new_entry->protection = VM_PROT_DEFAULT;
1613         new_entry->max_protection = VM_PROT_ALL;
1614         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1615         new_entry->wired_count = 0;
1616         new_entry->user_wired_count = 0;
1617
1618         new_entry->in_transition = FALSE;
1619         new_entry->needs_wakeup = FALSE;
1620         new_entry->no_cache = FALSE;
1621         new_entry->permanent = FALSE;
1622         new_entry->superpage_size = FALSE;
1623         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1624                 new_entry->map_aligned = TRUE;
1625         } else {
1626                 new_entry->map_aligned = FALSE;
1627         }
1628
1629         new_entry->used_for_jit = FALSE;
1630         new_entry->zero_wired_pages = FALSE;
1631         new_entry->iokit_acct = FALSE;
1632         new_entry->vme_resilient_codesign = FALSE;
1633         new_entry->vme_resilient_media = FALSE;
1634         if (flags & VM_FLAGS_ATOMIC_ENTRY)
1635                 new_entry->vme_atomic = TRUE;
1636         else
1637                 new_entry->vme_atomic = FALSE;
1638
1639         int alias;
1640         VM_GET_FLAGS_ALIAS(flags, alias);
1641         VME_ALIAS_SET(new_entry, alias);
1642
1643         /*
1644          *      Insert the new entry into the list
1645          */
1646
1647         vm_map_store_entry_link(map, entry, new_entry);
1648
1649         map->size += size;
1650
1651         /*
1652          *      Update the lookup hint
1653          */
1654         SAVE_HINT_MAP_WRITE(map, new_entry);
1655
1656         *o_entry = new_entry;
1657         return(KERN_SUCCESS);
1658 }
1659
1660 int vm_map_pmap_enter_print = FALSE;
1661 int vm_map_pmap_enter_enable = FALSE;
1662
1663 /*
1664  *      Routine:        vm_map_pmap_enter [internal only]
1665  *
1666  *      Description:
1667  *              Force pages from the specified object to be entered into
1668  *              the pmap at the specified address if they are present.
1669  *              As soon as a page not found in the object the scan ends.
1670  *
1671  *      Returns:
1672  *              Nothing.
1673  *
1674  *      In/out conditions:
1675  *              The source map should not be locked on entry.
1676  */
1677 __unused static void
1678 vm_map_pmap_enter(
1679         vm_map_t                map,
1680         vm_map_offset_t         addr,
1681         vm_map_offset_t         end_addr,
1682         vm_object_t             object,
1683         vm_object_offset_t      offset,
1684         vm_prot_t               protection)
1685 {
1686         int                     type_of_fault;
1687         kern_return_t           kr;
1688
1689         if(map->pmap == 0)
1690                 return;
1691
1692         while (addr < end_addr) {
1693                 vm_page_t       m;
1694
1695
1696                 /*
1697                  * TODO:
1698                  * From vm_map_enter(), we come into this function without the map
1699                  * lock held or the object lock held.
1700                  * We haven't taken a reference on the object either.
1701                  * We should do a proper lookup on the map to make sure
1702                  * that things are sane before we go locking objects that
1703                  * could have been deallocated from under us.
1704                  */
1705
1706                 vm_object_lock(object);
1707
1708                 m = vm_page_lookup(object, offset);
1709                 /*
1710                  * ENCRYPTED SWAP:
1711                  * The user should never see encrypted data, so do not
1712                  * enter an encrypted page in the page table.
1713                  */
1714                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1715                     m->fictitious ||
1716                     (m->unusual && ( m->error || m->restart || m->absent))) {
1717                         vm_object_unlock(object);
1718                         return;
1719                 }
1720
1721                 if (vm_map_pmap_enter_print) {
1722                         printf("vm_map_pmap_enter:");
1723                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1724                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1725                 }
1726                 type_of_fault = DBG_CACHE_HIT_FAULT;
1727                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1728                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1729                                     0, /* XXX need user tag / alias? */
1730                                     0, /* alternate accounting? */
1731                                     NULL,
1732                                     &type_of_fault);
1733
1734                 vm_object_unlock(object);
1735
1736                 offset += PAGE_SIZE_64;
1737                 addr += PAGE_SIZE;
1738         }
1739 }
1740
1741 boolean_t vm_map_pmap_is_empty(
1742         vm_map_t        map,
1743         vm_map_offset_t start,
1744         vm_map_offset_t end);
1745 boolean_t vm_map_pmap_is_empty(
1746         vm_map_t        map,
1747         vm_map_offset_t start,
1748         vm_map_offset_t end)
1749 {
1750 #ifdef MACHINE_PMAP_IS_EMPTY
1751         return pmap_is_empty(map->pmap, start, end);
1752 #else   /* MACHINE_PMAP_IS_EMPTY */
1753         vm_map_offset_t offset;
1754         ppnum_t         phys_page;
1755
1756         if (map->pmap == NULL) {
1757                 return TRUE;
1758         }
1759
1760         for (offset = start;
1761              offset < end;
1762              offset += PAGE_SIZE) {
1763                 phys_page = pmap_find_phys(map->pmap, offset);
1764                 if (phys_page) {
1765                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1766                                 "page %d at 0x%llx\n",
1767                                 map, (long long)start, (long long)end,
1768                                 phys_page, (long long)offset);
1769                         return FALSE;
1770                 }
1771         }
1772         return TRUE;
1773 #endif  /* MACHINE_PMAP_IS_EMPTY */
1774 }
1775
1776 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1777 kern_return_t
1778 vm_map_random_address_for_size(
1779         vm_map_t        map,
1780         vm_map_offset_t *address,
1781         vm_map_size_t   size)
1782 {
1783         kern_return_t   kr = KERN_SUCCESS;
1784         int             tries = 0;
1785         vm_map_offset_t random_addr = 0;
1786         vm_map_offset_t hole_end;
1787
1788         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1789         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1790         vm_map_size_t   vm_hole_size = 0;
1791         vm_map_size_t   addr_space_size;
1792
1793         addr_space_size = vm_map_max(map) - vm_map_min(map);
1794
1795         assert(page_aligned(size));
1796
1797         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1798                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1799                 random_addr = vm_map_trunc_page(
1800                         vm_map_min(map) +(random_addr % addr_space_size),
1801                         VM_MAP_PAGE_MASK(map));
1802
1803                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1804                         if (prev_entry == vm_map_to_entry(map)) {
1805                                 next_entry = vm_map_first_entry(map);
1806                         } else {
1807                                 next_entry = prev_entry->vme_next;
1808                         }
1809                         if (next_entry == vm_map_to_entry(map)) {
1810                                 hole_end = vm_map_max(map);
1811                         } else {
1812                                 hole_end = next_entry->vme_start;
1813                         }
1814                         vm_hole_size = hole_end - random_addr;
1815                         if (vm_hole_size >= size) {
1816                                 *address = random_addr;
1817                                 break;
1818                         }
1819                 }
1820                 tries++;
1821         }
1822
1823         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1824                 kr = KERN_NO_SPACE;
1825         }
1826         return kr;
1827 }
1828
1829 /*
1830  *      Routine:        vm_map_enter
1831  *
1832  *      Description:
1833  *              Allocate a range in the specified virtual address map.
1834  *              The resulting range will refer to memory defined by
1835  *              the given memory object and offset into that object.
1836  *
1837  *              Arguments are as defined in the vm_map call.
1838  */
1839 int _map_enter_debug = 0;
1840 static unsigned int vm_map_enter_restore_successes = 0;
1841 static unsigned int vm_map_enter_restore_failures = 0;
1842 kern_return_t
1843 vm_map_enter(
1844         vm_map_t                map,
1845         vm_map_offset_t         *address,       /* IN/OUT */
1846         vm_map_size_t           size,
1847         vm_map_offset_t         mask,
1848         int                     flags,
1849         vm_object_t             object,
1850         vm_object_offset_t      offset,
1851         boolean_t               needs_copy,
1852         vm_prot_t               cur_protection,
1853         vm_prot_t               max_protection,
1854         vm_inherit_t            inheritance)
1855 {
1856         vm_map_entry_t          entry, new_entry;
1857         vm_map_offset_t         start, tmp_start, tmp_offset;
1858         vm_map_offset_t         end, tmp_end;
1859         vm_map_offset_t         tmp2_start, tmp2_end;
1860         vm_map_offset_t         step;
1861         kern_return_t           result = KERN_SUCCESS;
1862         vm_map_t                zap_old_map = VM_MAP_NULL;
1863         vm_map_t                zap_new_map = VM_MAP_NULL;
1864         boolean_t               map_locked = FALSE;
1865         boolean_t               pmap_empty = TRUE;
1866         boolean_t               new_mapping_established = FALSE;
1867         boolean_t               keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1868         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1869         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1870         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1871         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1872         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1873         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1874         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1875         boolean_t               iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1876         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1877         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1878         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
1879         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1880         vm_tag_t                alias, user_alias;
1881         vm_map_offset_t         effective_min_offset, effective_max_offset;
1882         kern_return_t           kr;
1883         boolean_t               clear_map_aligned = FALSE;
1884         vm_map_entry_t          hole_entry;
1885
1886         if (superpage_size) {
1887                 switch (superpage_size) {
1888                         /*
1889                          * Note that the current implementation only supports
1890                          * a single size for superpages, SUPERPAGE_SIZE, per
1891                          * architecture. As soon as more sizes are supposed
1892                          * to be supported, SUPERPAGE_SIZE has to be replaced
1893                          * with a lookup of the size depending on superpage_size.
1894                          */
1895 #ifdef __x86_64__
1896                         case SUPERPAGE_SIZE_ANY:
1897                                 /* handle it like 2 MB and round up to page size */
1898                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1899                         case SUPERPAGE_SIZE_2MB:
1900                                 break;
1901 #endif
1902                         default:
1903                                 return KERN_INVALID_ARGUMENT;
1904                 }
1905                 mask = SUPERPAGE_SIZE-1;
1906                 if (size & (SUPERPAGE_SIZE-1))
1907                         return KERN_INVALID_ARGUMENT;
1908                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1909         }
1910
1911
1912
1913         if (resilient_codesign || resilient_media) {
1914                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1915                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1916                         return KERN_PROTECTION_FAILURE;
1917                 }
1918         }
1919
1920         if (is_submap) {
1921                 if (purgable) {
1922                         /* submaps can not be purgeable */
1923                         return KERN_INVALID_ARGUMENT;
1924                 }
1925                 if (object == VM_OBJECT_NULL) {
1926                         /* submaps can not be created lazily */
1927                         return KERN_INVALID_ARGUMENT;
1928                 }
1929         }
1930         if (flags & VM_FLAGS_ALREADY) {
1931                 /*
1932                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1933                  * is already present.  For it to be meaningul, the requested
1934                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1935                  * we shouldn't try and remove what was mapped there first
1936                  * (!VM_FLAGS_OVERWRITE).
1937                  */
1938                 if ((flags & VM_FLAGS_ANYWHERE) ||
1939                     (flags & VM_FLAGS_OVERWRITE)) {
1940                         return KERN_INVALID_ARGUMENT;
1941                 }
1942         }
1943
1944         effective_min_offset = map->min_offset;
1945
1946         if (flags & VM_FLAGS_BEYOND_MAX) {
1947                 /*
1948                  * Allow an insertion beyond the map's max offset.
1949                  */
1950                 if (vm_map_is_64bit(map))
1951                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1952                 else
1953                         effective_max_offset = 0x00000000FFFFF000ULL;
1954         } else {
1955                 effective_max_offset = map->max_offset;
1956         }
1957
1958         if (size == 0 ||
1959             (offset & PAGE_MASK_64) != 0) {
1960                 *address = 0;
1961                 return KERN_INVALID_ARGUMENT;
1962         }
1963
1964         VM_GET_FLAGS_ALIAS(flags, alias);
1965         if (map->pmap == kernel_pmap) {
1966                 user_alias = VM_KERN_MEMORY_NONE;
1967         } else {
1968                 user_alias = alias;
1969         }
1970
1971 #define RETURN(value)   { result = value; goto BailOut; }
1972
1973         assert(page_aligned(*address));
1974         assert(page_aligned(size));
1975
1976         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1977                 /*
1978                  * In most cases, the caller rounds the size up to the
1979                  * map's page size.
1980                  * If we get a size that is explicitly not map-aligned here,
1981                  * we'll have to respect the caller's wish and mark the
1982                  * mapping as "not map-aligned" to avoid tripping the
1983                  * map alignment checks later.
1984                  */
1985                 clear_map_aligned = TRUE;
1986         }
1987         if (!anywhere &&
1988             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1989                 /*
1990                  * We've been asked to map at a fixed address and that
1991                  * address is not aligned to the map's specific alignment.
1992                  * The caller should know what it's doing (i.e. most likely
1993                  * mapping some fragmented copy map, transferring memory from
1994                  * a VM map with a different alignment), so clear map_aligned
1995                  * for this new VM map entry and proceed.
1996                  */
1997                 clear_map_aligned = TRUE;
1998         }
1999
2000         /*
2001          * Only zero-fill objects are allowed to be purgable.
2002          * LP64todo - limit purgable objects to 32-bits for now
2003          */
2004         if (purgable &&
2005             (offset != 0 ||
2006              (object != VM_OBJECT_NULL &&
2007               (object->vo_size != size ||
2008                object->purgable == VM_PURGABLE_DENY))
2009              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2010                 return KERN_INVALID_ARGUMENT;
2011
2012         if (!anywhere && overwrite) {
2013                 /*
2014                  * Create a temporary VM map to hold the old mappings in the
2015                  * affected area while we create the new one.
2016                  * This avoids releasing the VM map lock in
2017                  * vm_map_entry_delete() and allows atomicity
2018                  * when we want to replace some mappings with a new one.
2019                  * It also allows us to restore the old VM mappings if the
2020                  * new mapping fails.
2021                  */
2022                 zap_old_map = vm_map_create(PMAP_NULL,
2023                                             *address,
2024                                             *address + size,
2025                                             map->hdr.entries_pageable);
2026                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2027                 vm_map_disable_hole_optimization(zap_old_map);
2028         }
2029
2030 StartAgain: ;
2031
2032         start = *address;
2033
2034         if (anywhere) {
2035                 vm_map_lock(map);
2036                 map_locked = TRUE;
2037
2038                 if (entry_for_jit) {
2039                         if (map->jit_entry_exists) {
2040                                 result = KERN_INVALID_ARGUMENT;
2041                                 goto BailOut;
2042                         }
2043                         random_address = TRUE;
2044                 }
2045
2046                 if (random_address) {
2047                         /*
2048                          * Get a random start address.
2049                          */
2050                         result = vm_map_random_address_for_size(map, address, size);
2051                         if (result != KERN_SUCCESS) {
2052                                 goto BailOut;
2053                         }
2054                         start = *address;
2055                 }
2056
2057
2058                 /*
2059                  *      Calculate the first possible address.
2060                  */
2061
2062                 if (start < effective_min_offset)
2063                         start = effective_min_offset;
2064                 if (start > effective_max_offset)
2065                         RETURN(KERN_NO_SPACE);
2066
2067                 /*
2068                  *      Look for the first possible address;
2069                  *      if there's already something at this
2070                  *      address, we have to start after it.
2071                  */
2072
2073                 if( map->disable_vmentry_reuse == TRUE) {
2074                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2075                 } else {
2076
2077                         if (map->holelistenabled) {
2078                                 hole_entry = (vm_map_entry_t)map->holes_list;
2079
2080                                 if (hole_entry == NULL) {
2081                                         /*
2082                                          * No more space in the map?
2083                                          */
2084                                         result = KERN_NO_SPACE;
2085                                         goto BailOut;
2086                                 } else {
2087
2088                                         boolean_t found_hole = FALSE;
2089
2090                                         do {
2091                                                 if (hole_entry->vme_start >= start) {
2092                                                         start = hole_entry->vme_start;
2093                                                         found_hole = TRUE;
2094                                                         break;
2095                                                 }
2096
2097                                                 if (hole_entry->vme_end > start) {
2098                                                         found_hole = TRUE;
2099                                                         break;
2100                                                 }
2101                                                 hole_entry = hole_entry->vme_next;
2102
2103                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
2104
2105                                         if (found_hole == FALSE) {
2106                                                 result = KERN_NO_SPACE;
2107                                                 goto BailOut;
2108                                         }
2109
2110                                         entry = hole_entry;
2111
2112                                         if (start == 0)
2113                                                 start += PAGE_SIZE_64;
2114                                 }
2115                         } else {
2116                                 assert(first_free_is_valid(map));
2117
2118                                 entry = map->first_free;
2119
2120                                 if (entry == vm_map_to_entry(map)) {
2121                                         entry = NULL;
2122                                 } else {
2123                                        if (entry->vme_next == vm_map_to_entry(map)){
2124                                                /*
2125                                                 * Hole at the end of the map.
2126                                                 */
2127                                                 entry = NULL;
2128                                        } else {
2129                                                 if (start < (entry->vme_next)->vme_start ) {
2130                                                         start = entry->vme_end;
2131                                                         start = vm_map_round_page(start,
2132                                                                                   VM_MAP_PAGE_MASK(map));
2133                                                 } else {
2134                                                         /*
2135                                                          * Need to do a lookup.
2136                                                          */
2137                                                         entry = NULL;
2138                                                 }
2139                                        }
2140                                 }
2141
2142                                 if (entry == NULL) {
2143                                         vm_map_entry_t  tmp_entry;
2144                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2145                                                 assert(!entry_for_jit);
2146                                                 start = tmp_entry->vme_end;
2147                                                 start = vm_map_round_page(start,
2148                                                                           VM_MAP_PAGE_MASK(map));
2149                                         }
2150                                         entry = tmp_entry;
2151                                 }
2152                         }
2153                 }
2154
2155                 /*
2156                  *      In any case, the "entry" always precedes
2157                  *      the proposed new region throughout the
2158                  *      loop:
2159                  */
2160
2161                 while (TRUE) {
2162                         vm_map_entry_t  next;
2163
2164                         /*
2165                          *      Find the end of the proposed new region.
2166                          *      Be sure we didn't go beyond the end, or
2167                          *      wrap around the address.
2168                          */
2169
2170                         end = ((start + mask) & ~mask);
2171                         end = vm_map_round_page(end,
2172                                                 VM_MAP_PAGE_MASK(map));
2173                         if (end < start)
2174                                 RETURN(KERN_NO_SPACE);
2175                         start = end;
2176                         assert(VM_MAP_PAGE_ALIGNED(start,
2177                                                    VM_MAP_PAGE_MASK(map)));
2178                         end += size;
2179
2180                         if ((end > effective_max_offset) || (end < start)) {
2181                                 if (map->wait_for_space) {
2182                                         assert(!keep_map_locked);
2183                                         if (size <= (effective_max_offset -
2184                                                      effective_min_offset)) {
2185                                                 assert_wait((event_t)map,
2186                                                             THREAD_ABORTSAFE);
2187                                                 vm_map_unlock(map);
2188                                                 map_locked = FALSE;
2189                                                 thread_block(THREAD_CONTINUE_NULL);
2190                                                 goto StartAgain;
2191                                         }
2192                                 }
2193                                 RETURN(KERN_NO_SPACE);
2194                         }
2195
2196                         next = entry->vme_next;
2197
2198                         if (map->holelistenabled) {
2199                                 if (entry->vme_end >= end)
2200                                         break;
2201                         } else {
2202                                 /*
2203                                  *      If there are no more entries, we must win.
2204                                  *
2205                                  *      OR
2206                                  *
2207                                  *      If there is another entry, it must be
2208                                  *      after the end of the potential new region.
2209                                  */
2210
2211                                 if (next == vm_map_to_entry(map))
2212                                         break;
2213
2214                                 if (next->vme_start >= end)
2215                                         break;
2216                         }
2217
2218                         /*
2219                          *      Didn't fit -- move to the next entry.
2220                          */
2221
2222                         entry = next;
2223
2224                         if (map->holelistenabled) {
2225                                 if (entry == (vm_map_entry_t) map->holes_list) {
2226                                         /*
2227                                          * Wrapped around
2228                                          */
2229                                         result = KERN_NO_SPACE;
2230                                         goto BailOut;
2231                                 }
2232                                 start = entry->vme_start;
2233                         } else {
2234                                 start = entry->vme_end;
2235                         }
2236
2237                         start = vm_map_round_page(start,
2238                                                   VM_MAP_PAGE_MASK(map));
2239                 }
2240
2241                 if (map->holelistenabled) {
2242                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2243                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2244                         }
2245                 }
2246
2247                 *address = start;
2248                 assert(VM_MAP_PAGE_ALIGNED(*address,
2249                                            VM_MAP_PAGE_MASK(map)));
2250         } else {
2251                 /*
2252                  *      Verify that:
2253                  *              the address doesn't itself violate
2254                  *              the mask requirement.
2255                  */
2256
2257                 vm_map_lock(map);
2258                 map_locked = TRUE;
2259                 if ((start & mask) != 0)
2260                         RETURN(KERN_NO_SPACE);
2261
2262                 /*
2263                  *      ...     the address is within bounds
2264                  */
2265
2266                 end = start + size;
2267
2268                 if ((start < effective_min_offset) ||
2269                     (end > effective_max_offset) ||
2270                     (start >= end)) {
2271                         RETURN(KERN_INVALID_ADDRESS);
2272                 }
2273
2274                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2275                         /*
2276                          * Fixed mapping and "overwrite" flag: attempt to
2277                          * remove all existing mappings in the specified
2278                          * address range, saving them in our "zap_old_map".
2279                          */
2280                         (void) vm_map_delete(map, start, end,
2281                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2282                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2283                                              zap_old_map);
2284                 }
2285
2286                 /*
2287                  *      ...     the starting address isn't allocated
2288                  */
2289
2290                 if (vm_map_lookup_entry(map, start, &entry)) {
2291                         if (! (flags & VM_FLAGS_ALREADY)) {
2292                                 RETURN(KERN_NO_SPACE);
2293                         }
2294                         /*
2295                          * Check if what's already there is what we want.
2296                          */
2297                         tmp_start = start;
2298                         tmp_offset = offset;
2299                         if (entry->vme_start < start) {
2300                                 tmp_start -= start - entry->vme_start;
2301                                 tmp_offset -= start - entry->vme_start;
2302
2303                         }
2304                         for (; entry->vme_start < end;
2305                              entry = entry->vme_next) {
2306                                 /*
2307                                  * Check if the mapping's attributes
2308                                  * match the existing map entry.
2309                                  */
2310                                 if (entry == vm_map_to_entry(map) ||
2311                                     entry->vme_start != tmp_start ||
2312                                     entry->is_sub_map != is_submap ||
2313                                     VME_OFFSET(entry) != tmp_offset ||
2314                                     entry->needs_copy != needs_copy ||
2315                                     entry->protection != cur_protection ||
2316                                     entry->max_protection != max_protection ||
2317                                     entry->inheritance != inheritance ||
2318                                     entry->iokit_acct != iokit_acct ||
2319                                     VME_ALIAS(entry) != alias) {
2320                                         /* not the same mapping ! */
2321                                         RETURN(KERN_NO_SPACE);
2322                                 }
2323                                 /*
2324                                  * Check if the same object is being mapped.
2325                                  */
2326                                 if (is_submap) {
2327                                         if (VME_SUBMAP(entry) !=
2328                                             (vm_map_t) object) {
2329                                                 /* not the same submap */
2330                                                 RETURN(KERN_NO_SPACE);
2331                                         }
2332                                 } else {
2333                                         if (VME_OBJECT(entry) != object) {
2334                                                 /* not the same VM object... */
2335                                                 vm_object_t obj2;
2336
2337                                                 obj2 = VME_OBJECT(entry);
2338                                                 if ((obj2 == VM_OBJECT_NULL ||
2339                                                      obj2->internal) &&
2340                                                     (object == VM_OBJECT_NULL ||
2341                                                      object->internal)) {
2342                                                         /*
2343                                                          * ... but both are
2344                                                          * anonymous memory,
2345                                                          * so equivalent.
2346                                                          */
2347                                                 } else {
2348                                                         RETURN(KERN_NO_SPACE);
2349                                                 }
2350                                         }
2351                                 }
2352
2353                                 tmp_offset += entry->vme_end - entry->vme_start;
2354                                 tmp_start += entry->vme_end - entry->vme_start;
2355                                 if (entry->vme_end >= end) {
2356                                         /* reached the end of our mapping */
2357                                         break;
2358                                 }
2359                         }
2360                         /* it all matches:  let's use what's already there ! */
2361                         RETURN(KERN_MEMORY_PRESENT);
2362                 }
2363
2364                 /*
2365                  *      ...     the next region doesn't overlap the
2366                  *              end point.
2367                  */
2368
2369                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2370                     (entry->vme_next->vme_start < end))
2371                         RETURN(KERN_NO_SPACE);
2372         }
2373
2374         /*
2375          *      At this point,
2376          *              "start" and "end" should define the endpoints of the
2377          *                      available new range, and
2378          *              "entry" should refer to the region before the new
2379          *                      range, and
2380          *
2381          *              the map should be locked.
2382          */
2383
2384         /*
2385          *      See whether we can avoid creating a new entry (and object) by
2386          *      extending one of our neighbors.  [So far, we only attempt to
2387          *      extend from below.]  Note that we can never extend/join
2388          *      purgable objects because they need to remain distinct
2389          *      entities in order to implement their "volatile object"
2390          *      semantics.
2391          */
2392
2393         if (purgable || entry_for_jit) {
2394                 if (object == VM_OBJECT_NULL) {
2395
2396                         object = vm_object_allocate(size);
2397                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2398                         object->true_share = TRUE;
2399                         if (purgable) {
2400                                 task_t owner;
2401                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2402                                 if (map->pmap == kernel_pmap) {
2403                                         /*
2404                                          * Purgeable mappings made in a kernel
2405                                          * map are "owned" by the kernel itself
2406                                          * rather than the current user task
2407                                          * because they're likely to be used by
2408                                          * more than this user task (see
2409                                          * execargs_purgeable_allocate(), for
2410                                          * example).
2411                                          */
2412                                         owner = kernel_task;
2413                                 } else {
2414                                         owner = current_task();
2415                                 }
2416                                 assert(object->vo_purgeable_owner == NULL);
2417                                 assert(object->resident_page_count == 0);
2418                                 assert(object->wired_page_count == 0);
2419                                 vm_object_lock(object);
2420                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2421                                 vm_object_unlock(object);
2422                         }
2423                         offset = (vm_object_offset_t)0;
2424                 }
2425         } else if ((is_submap == FALSE) &&
2426                    (object == VM_OBJECT_NULL) &&
2427                    (entry != vm_map_to_entry(map)) &&
2428                    (entry->vme_end == start) &&
2429                    (!entry->is_shared) &&
2430                    (!entry->is_sub_map) &&
2431                    (!entry->in_transition) &&
2432                    (!entry->needs_wakeup) &&
2433                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2434                    (entry->protection == cur_protection) &&
2435                    (entry->max_protection == max_protection) &&
2436                    (entry->inheritance == inheritance) &&
2437                    ((user_alias == VM_MEMORY_REALLOC) ||
2438                     (VME_ALIAS(entry) == alias)) &&
2439                    (entry->no_cache == no_cache) &&
2440                    (entry->permanent == permanent) &&
2441                    (!entry->superpage_size && !superpage_size) &&
2442                    /*
2443                     * No coalescing if not map-aligned, to avoid propagating
2444                     * that condition any further than needed:
2445                     */
2446                    (!entry->map_aligned || !clear_map_aligned) &&
2447                    (!entry->zero_wired_pages) &&
2448                    (!entry->used_for_jit && !entry_for_jit) &&
2449                    (entry->iokit_acct == iokit_acct) &&
2450                    (!entry->vme_resilient_codesign) &&
2451                    (!entry->vme_resilient_media) &&
2452                    (!entry->vme_atomic) &&
2453
2454                    ((entry->vme_end - entry->vme_start) + size <=
2455                     (user_alias == VM_MEMORY_REALLOC ?
2456                      ANON_CHUNK_SIZE :
2457                      NO_COALESCE_LIMIT)) &&
2458
2459                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2460                 if (vm_object_coalesce(VME_OBJECT(entry),
2461                                        VM_OBJECT_NULL,
2462                                        VME_OFFSET(entry),
2463                                        (vm_object_offset_t) 0,
2464                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2465                                        (vm_map_size_t)(end - entry->vme_end))) {
2466
2467                         /*
2468                          *      Coalesced the two objects - can extend
2469                          *      the previous map entry to include the
2470                          *      new range.
2471                          */
2472                         map->size += (end - entry->vme_end);
2473                         assert(entry->vme_start < end);
2474                         assert(VM_MAP_PAGE_ALIGNED(end,
2475                                                    VM_MAP_PAGE_MASK(map)));
2476                         if (__improbable(vm_debug_events))
2477                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2478                         entry->vme_end = end;
2479                         if (map->holelistenabled) {
2480                                 vm_map_store_update_first_free(map, entry, TRUE);
2481                         } else {
2482                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2483                         }
2484                         new_mapping_established = TRUE;
2485                         RETURN(KERN_SUCCESS);
2486                 }
2487         }
2488
2489         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2490         new_entry = NULL;
2491
2492         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2493                 tmp2_end = tmp2_start + step;
2494                 /*
2495                  *      Create a new entry
2496                  *      LP64todo - for now, we can only allocate 4GB internal objects
2497                  *      because the default pager can't page bigger ones.  Remove this
2498                  *      when it can.
2499                  *
2500                  * XXX FBDP
2501                  * The reserved "page zero" in each process's address space can
2502                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2503                  * therefore different VM map entries serves no purpose and just
2504                  * slows down operations on the VM map, so let's not split the
2505                  * allocation into 4GB chunks if the max protection is NONE.  That
2506                  * memory should never be accessible, so it will never get to the
2507                  * default pager.
2508                  */
2509                 tmp_start = tmp2_start;
2510                 if (object == VM_OBJECT_NULL &&
2511                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2512                     max_protection != VM_PROT_NONE &&
2513                     superpage_size == 0)
2514                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2515                 else
2516                         tmp_end = tmp2_end;
2517                 do {
2518                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2519                                                         object, offset, needs_copy,
2520                                                         FALSE, FALSE,
2521                                                         cur_protection, max_protection,
2522                                                         VM_BEHAVIOR_DEFAULT,
2523                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2524                                                         0, no_cache,
2525                                                         permanent,
2526                                                         superpage_size,
2527                                                         clear_map_aligned,
2528                                                         is_submap);
2529
2530                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2531                         VME_ALIAS_SET(new_entry, alias);
2532
2533                         if (entry_for_jit){
2534                                 if (!(map->jit_entry_exists)){
2535                                         new_entry->used_for_jit = TRUE;
2536                                         map->jit_entry_exists = TRUE;
2537                                 }
2538                         }
2539
2540                         if (resilient_codesign &&
2541                             ! ((cur_protection | max_protection) &
2542                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2543                                 new_entry->vme_resilient_codesign = TRUE;
2544                         }
2545
2546                         if (resilient_media &&
2547                             ! ((cur_protection | max_protection) &
2548                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2549                                 new_entry->vme_resilient_media = TRUE;
2550                         }
2551
2552                         assert(!new_entry->iokit_acct);
2553                         if (!is_submap &&
2554                             object != VM_OBJECT_NULL &&
2555                             object->purgable != VM_PURGABLE_DENY) {
2556                                 assert(new_entry->use_pmap);
2557                                 assert(!new_entry->iokit_acct);
2558                                 /*
2559                                  * Turn off pmap accounting since
2560                                  * purgeable objects have their
2561                                  * own ledgers.
2562                                  */
2563                                 new_entry->use_pmap = FALSE;
2564                         } else if (!is_submap &&
2565                                    iokit_acct &&
2566                                    object != VM_OBJECT_NULL &&
2567                                    object->internal) {
2568                                 /* alternate accounting */
2569                                 assert(!new_entry->iokit_acct);
2570                                 assert(new_entry->use_pmap);
2571                                 new_entry->iokit_acct = TRUE;
2572                                 new_entry->use_pmap = FALSE;
2573                                 DTRACE_VM4(
2574                                         vm_map_iokit_mapped_region,
2575                                         vm_map_t, map,
2576                                         vm_map_offset_t, new_entry->vme_start,
2577                                         vm_map_offset_t, new_entry->vme_end,
2578                                         int, VME_ALIAS(new_entry));
2579                                 vm_map_iokit_mapped_region(
2580                                         map,
2581                                         (new_entry->vme_end -
2582                                          new_entry->vme_start));
2583                         } else if (!is_submap) {
2584                                 assert(!new_entry->iokit_acct);
2585                                 assert(new_entry->use_pmap);
2586                         }
2587
2588                         if (is_submap) {
2589                                 vm_map_t        submap;
2590                                 boolean_t       submap_is_64bit;
2591                                 boolean_t       use_pmap;
2592
2593                                 assert(new_entry->is_sub_map);
2594                                 assert(!new_entry->use_pmap);
2595                                 assert(!new_entry->iokit_acct);
2596                                 submap = (vm_map_t) object;
2597                                 submap_is_64bit = vm_map_is_64bit(submap);
2598                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2599 #ifndef NO_NESTED_PMAP
2600                                 if (use_pmap && submap->pmap == NULL) {
2601                                         ledger_t ledger = map->pmap->ledger;
2602                                         /* we need a sub pmap to nest... */
2603                                         submap->pmap = pmap_create(ledger, 0,
2604                                             submap_is_64bit);
2605                                         if (submap->pmap == NULL) {
2606                                                 /* let's proceed without nesting... */
2607                                         }
2608                                 }
2609                                 if (use_pmap && submap->pmap != NULL) {
2610                                         kr = pmap_nest(map->pmap,
2611                                                        submap->pmap,
2612                                                        tmp_start,
2613                                                        tmp_start,
2614                                                        tmp_end - tmp_start);
2615                                         if (kr != KERN_SUCCESS) {
2616                                                 printf("vm_map_enter: "
2617                                                        "pmap_nest(0x%llx,0x%llx) "
2618                                                        "error 0x%x\n",
2619                                                        (long long)tmp_start,
2620                                                        (long long)tmp_end,
2621                                                        kr);
2622                                         } else {
2623                                                 /* we're now nested ! */
2624                                                 new_entry->use_pmap = TRUE;
2625                                                 pmap_empty = FALSE;
2626                                         }
2627                                 }
2628 #endif /* NO_NESTED_PMAP */
2629                         }
2630                         entry = new_entry;
2631
2632                         if (superpage_size) {
2633                                 vm_page_t pages, m;
2634                                 vm_object_t sp_object;
2635
2636                                 VME_OFFSET_SET(entry, 0);
2637
2638                                 /* allocate one superpage */
2639                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2640                                 if (kr != KERN_SUCCESS) {
2641                                         /* deallocate whole range... */
2642                                         new_mapping_established = TRUE;
2643                                         /* ... but only up to "tmp_end" */
2644                                         size -= end - tmp_end;
2645                                         RETURN(kr);
2646                                 }
2647
2648                                 /* create one vm_object per superpage */
2649                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2650                                 sp_object->phys_contiguous = TRUE;
2651                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2652                                 VME_OBJECT_SET(entry, sp_object);
2653                                 assert(entry->use_pmap);
2654
2655                                 /* enter the base pages into the object */
2656                                 vm_object_lock(sp_object);
2657                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2658                                         m = pages;
2659                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2660                                         pages = NEXT_PAGE(m);
2661                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2662                                         vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2663                                 }
2664                                 vm_object_unlock(sp_object);
2665                         }
2666                 } while (tmp_end != tmp2_end &&
2667                          (tmp_start = tmp_end) &&
2668                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2669                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2670         }
2671
2672         new_mapping_established = TRUE;
2673
2674 BailOut:
2675         assert(map_locked == TRUE);
2676
2677         if (result == KERN_SUCCESS) {
2678                 vm_prot_t pager_prot;
2679                 memory_object_t pager;
2680
2681 #if DEBUG
2682                 if (pmap_empty &&
2683                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2684                         assert(vm_map_pmap_is_empty(map,
2685                                                     *address,
2686                                                     *address+size));
2687                 }
2688 #endif /* DEBUG */
2689
2690                 /*
2691                  * For "named" VM objects, let the pager know that the
2692                  * memory object is being mapped.  Some pagers need to keep
2693                  * track of this, to know when they can reclaim the memory
2694                  * object, for example.
2695                  * VM calls memory_object_map() for each mapping (specifying
2696                  * the protection of each mapping) and calls
2697                  * memory_object_last_unmap() when all the mappings are gone.
2698                  */
2699                 pager_prot = max_protection;
2700                 if (needs_copy) {
2701                         /*
2702                          * Copy-On-Write mapping: won't modify
2703                          * the memory object.
2704                          */
2705                         pager_prot &= ~VM_PROT_WRITE;
2706                 }
2707                 if (!is_submap &&
2708                     object != VM_OBJECT_NULL &&
2709                     object->named &&
2710                     object->pager != MEMORY_OBJECT_NULL) {
2711                         vm_object_lock(object);
2712                         pager = object->pager;
2713                         if (object->named &&
2714                             pager != MEMORY_OBJECT_NULL) {
2715                                 assert(object->pager_ready);
2716                                 vm_object_mapping_wait(object, THREAD_UNINT);
2717                                 vm_object_mapping_begin(object);
2718                                 vm_object_unlock(object);
2719
2720                                 kr = memory_object_map(pager, pager_prot);
2721                                 assert(kr == KERN_SUCCESS);
2722
2723                                 vm_object_lock(object);
2724                                 vm_object_mapping_end(object);
2725                         }
2726                         vm_object_unlock(object);
2727                 }
2728         }
2729
2730         assert(map_locked == TRUE);
2731
2732         if (!keep_map_locked) {
2733                 vm_map_unlock(map);
2734                 map_locked = FALSE;
2735         }
2736
2737         /*
2738          * We can't hold the map lock if we enter this block.
2739          */
2740
2741         if (result == KERN_SUCCESS) {
2742
2743                 /*      Wire down the new entry if the user
2744                  *      requested all new map entries be wired.
2745                  */
2746                 if ((map->wiring_required)||(superpage_size)) {
2747                         assert(!keep_map_locked);
2748                         pmap_empty = FALSE; /* pmap won't be empty */
2749                         kr = vm_map_wire(map, start, end,
2750                                              new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2751                                              TRUE);
2752                         result = kr;
2753                 }
2754
2755         }
2756
2757         if (result != KERN_SUCCESS) {
2758                 if (new_mapping_established) {
2759                         /*
2760                          * We have to get rid of the new mappings since we
2761                          * won't make them available to the user.
2762                          * Try and do that atomically, to minimize the risk
2763                          * that someone else create new mappings that range.
2764                          */
2765                         zap_new_map = vm_map_create(PMAP_NULL,
2766                                                     *address,
2767                                                     *address + size,
2768                                                     map->hdr.entries_pageable);
2769                         vm_map_set_page_shift(zap_new_map,
2770                                               VM_MAP_PAGE_SHIFT(map));
2771                         vm_map_disable_hole_optimization(zap_new_map);
2772
2773                         if (!map_locked) {
2774                                 vm_map_lock(map);
2775                                 map_locked = TRUE;
2776                         }
2777                         (void) vm_map_delete(map, *address, *address+size,
2778                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2779                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2780                                              zap_new_map);
2781                 }
2782                 if (zap_old_map != VM_MAP_NULL &&
2783                     zap_old_map->hdr.nentries != 0) {
2784                         vm_map_entry_t  entry1, entry2;
2785
2786                         /*
2787                          * The new mapping failed.  Attempt to restore
2788                          * the old mappings, saved in the "zap_old_map".
2789                          */
2790                         if (!map_locked) {
2791                                 vm_map_lock(map);
2792                                 map_locked = TRUE;
2793                         }
2794
2795                         /* first check if the coast is still clear */
2796                         start = vm_map_first_entry(zap_old_map)->vme_start;
2797                         end = vm_map_last_entry(zap_old_map)->vme_end;
2798                         if (vm_map_lookup_entry(map, start, &entry1) ||
2799                             vm_map_lookup_entry(map, end, &entry2) ||
2800                             entry1 != entry2) {
2801                                 /*
2802                                  * Part of that range has already been
2803                                  * re-mapped:  we can't restore the old
2804                                  * mappings...
2805                                  */
2806                                 vm_map_enter_restore_failures++;
2807                         } else {
2808                                 /*
2809                                  * Transfer the saved map entries from
2810                                  * "zap_old_map" to the original "map",
2811                                  * inserting them all after "entry1".
2812                                  */
2813                                 for (entry2 = vm_map_first_entry(zap_old_map);
2814                                      entry2 != vm_map_to_entry(zap_old_map);
2815                                      entry2 = vm_map_first_entry(zap_old_map)) {
2816                                         vm_map_size_t entry_size;
2817
2818                                         entry_size = (entry2->vme_end -
2819                                                       entry2->vme_start);
2820                                         vm_map_store_entry_unlink(zap_old_map,
2821                                                             entry2);
2822                                         zap_old_map->size -= entry_size;
2823                                         vm_map_store_entry_link(map, entry1, entry2);
2824                                         map->size += entry_size;
2825                                         entry1 = entry2;
2826                                 }
2827                                 if (map->wiring_required) {
2828                                         /*
2829                                          * XXX TODO: we should rewire the
2830                                          * old pages here...
2831                                          */
2832                                 }
2833                                 vm_map_enter_restore_successes++;
2834                         }
2835                 }
2836         }
2837
2838         /*
2839          * The caller is responsible for releasing the lock if it requested to
2840          * keep the map locked.
2841          */
2842         if (map_locked && !keep_map_locked) {
2843                 vm_map_unlock(map);
2844         }
2845
2846         /*
2847          * Get rid of the "zap_maps" and all the map entries that
2848          * they may still contain.
2849          */
2850         if (zap_old_map != VM_MAP_NULL) {
2851                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2852                 zap_old_map = VM_MAP_NULL;
2853         }
2854         if (zap_new_map != VM_MAP_NULL) {
2855                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2856                 zap_new_map = VM_MAP_NULL;
2857         }
2858
2859         return result;
2860
2861 #undef  RETURN
2862 }
2863
2864
2865 /*
2866  * Counters for the prefault optimization.
2867  */
2868 int64_t vm_prefault_nb_pages = 0;
2869 int64_t vm_prefault_nb_bailout = 0;
2870
2871 static kern_return_t
2872 vm_map_enter_mem_object_helper(
2873         vm_map_t                target_map,
2874         vm_map_offset_t         *address,
2875         vm_map_size_t           initial_size,
2876         vm_map_offset_t         mask,
2877         int                     flags,
2878         ipc_port_t              port,
2879         vm_object_offset_t      offset,
2880         boolean_t               copy,
2881         vm_prot_t               cur_protection,
2882         vm_prot_t               max_protection,
2883         vm_inherit_t            inheritance,
2884         upl_page_list_ptr_t     page_list,
2885         unsigned int            page_list_count)
2886 {
2887         vm_map_address_t        map_addr;
2888         vm_map_size_t           map_size;
2889         vm_object_t             object;
2890         vm_object_size_t        size;
2891         kern_return_t           result;
2892         boolean_t               mask_cur_protection, mask_max_protection;
2893         boolean_t               try_prefault = (page_list_count != 0);
2894         vm_map_offset_t         offset_in_mapping = 0;
2895
2896         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2897         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2898         cur_protection &= ~VM_PROT_IS_MASK;
2899         max_protection &= ~VM_PROT_IS_MASK;
2900
2901         /*
2902          * Check arguments for validity
2903          */
2904         if ((target_map == VM_MAP_NULL) ||
2905             (cur_protection & ~VM_PROT_ALL) ||
2906             (max_protection & ~VM_PROT_ALL) ||
2907             (inheritance > VM_INHERIT_LAST_VALID) ||
2908             (try_prefault && (copy || !page_list)) ||
2909             initial_size == 0) {
2910                 return KERN_INVALID_ARGUMENT;
2911         }
2912
2913         {
2914                 map_addr = vm_map_trunc_page(*address,
2915                                              VM_MAP_PAGE_MASK(target_map));
2916                 map_size = vm_map_round_page(initial_size,
2917                                              VM_MAP_PAGE_MASK(target_map));
2918         }
2919         size = vm_object_round_page(initial_size);
2920
2921         /*
2922          * Find the vm object (if any) corresponding to this port.
2923          */
2924         if (!IP_VALID(port)) {
2925                 object = VM_OBJECT_NULL;
2926                 offset = 0;
2927                 copy = FALSE;
2928         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2929                 vm_named_entry_t        named_entry;
2930
2931                 named_entry = (vm_named_entry_t) port->ip_kobject;
2932
2933                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2934                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2935                         offset += named_entry->data_offset;
2936                 }
2937
2938                 /* a few checks to make sure user is obeying rules */
2939                 if (size == 0) {
2940                         if (offset >= named_entry->size)
2941                                 return KERN_INVALID_RIGHT;
2942                         size = named_entry->size - offset;
2943                 }
2944                 if (mask_max_protection) {
2945                         max_protection &= named_entry->protection;
2946                 }
2947                 if (mask_cur_protection) {
2948                         cur_protection &= named_entry->protection;
2949                 }
2950                 if ((named_entry->protection & max_protection) !=
2951                     max_protection)
2952                         return KERN_INVALID_RIGHT;
2953                 if ((named_entry->protection & cur_protection) !=
2954                     cur_protection)
2955                         return KERN_INVALID_RIGHT;
2956                 if (offset + size < offset) {
2957                         /* overflow */
2958                         return KERN_INVALID_ARGUMENT;
2959                 }
2960                 if (named_entry->size < (offset + initial_size)) {
2961                         return KERN_INVALID_ARGUMENT;
2962                 }
2963
2964                 if (named_entry->is_copy) {
2965                         /* for a vm_map_copy, we can only map it whole */
2966                         if ((size != named_entry->size) &&
2967                             (vm_map_round_page(size,
2968                                                VM_MAP_PAGE_MASK(target_map)) ==
2969                              named_entry->size)) {
2970                                 /* XXX FBDP use the rounded size... */
2971                                 size = vm_map_round_page(
2972                                         size,
2973                                         VM_MAP_PAGE_MASK(target_map));
2974                         }
2975
2976                         if (!(flags & VM_FLAGS_ANYWHERE) &&
2977                             (offset != 0 ||
2978                              size != named_entry->size)) {
2979                                 /*
2980                                  * XXX for a mapping at a "fixed" address,
2981                                  * we can't trim after mapping the whole
2982                                  * memory entry, so reject a request for a
2983                                  * partial mapping.
2984                                  */
2985                                 return KERN_INVALID_ARGUMENT;
2986                         }
2987                 }
2988
2989                 /* the callers parameter offset is defined to be the */
2990                 /* offset from beginning of named entry offset in object */
2991                 offset = offset + named_entry->offset;
2992
2993                 if (! VM_MAP_PAGE_ALIGNED(size,
2994                                           VM_MAP_PAGE_MASK(target_map))) {
2995                         /*
2996                          * Let's not map more than requested;
2997                          * vm_map_enter() will handle this "not map-aligned"
2998                          * case.
2999                          */
3000                         map_size = size;
3001                 }
3002
3003                 named_entry_lock(named_entry);
3004                 if (named_entry->is_sub_map) {
3005                         vm_map_t                submap;
3006
3007                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3008                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3009                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3010                         }
3011
3012                         submap = named_entry->backing.map;
3013                         vm_map_lock(submap);
3014                         vm_map_reference(submap);
3015                         vm_map_unlock(submap);
3016                         named_entry_unlock(named_entry);
3017
3018                         result = vm_map_enter(target_map,
3019                                               &map_addr,
3020                                               map_size,
3021                                               mask,
3022                                               flags | VM_FLAGS_SUBMAP,
3023                                               (vm_object_t) submap,
3024                                               offset,
3025                                               copy,
3026                                               cur_protection,
3027                                               max_protection,
3028                                               inheritance);
3029                         if (result != KERN_SUCCESS) {
3030                                 vm_map_deallocate(submap);
3031                         } else {
3032                                 /*
3033                                  * No need to lock "submap" just to check its
3034                                  * "mapped" flag: that flag is never reset
3035                                  * once it's been set and if we race, we'll
3036                                  * just end up setting it twice, which is OK.
3037                                  */
3038                                 if (submap->mapped_in_other_pmaps == FALSE &&
3039                                     vm_map_pmap(submap) != PMAP_NULL &&
3040                                     vm_map_pmap(submap) !=
3041                                     vm_map_pmap(target_map)) {
3042                                         /*
3043                                          * This submap is being mapped in a map
3044                                          * that uses a different pmap.
3045                                          * Set its "mapped_in_other_pmaps" flag
3046                                          * to indicate that we now need to
3047                                          * remove mappings from all pmaps rather
3048                                          * than just the submap's pmap.
3049                                          */
3050                                         vm_map_lock(submap);
3051                                         submap->mapped_in_other_pmaps = TRUE;
3052                                         vm_map_unlock(submap);
3053                                 }
3054                                 *address = map_addr;
3055                         }
3056                         return result;
3057
3058                 } else if (named_entry->is_pager) {
3059                         unsigned int    access;
3060                         vm_prot_t       protections;
3061                         unsigned int    wimg_mode;
3062
3063                         protections = named_entry->protection & VM_PROT_ALL;
3064                         access = GET_MAP_MEM(named_entry->protection);
3065
3066                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3067                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3068                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3069                         }
3070
3071                         object = vm_object_enter(named_entry->backing.pager,
3072                                                  named_entry->size,
3073                                                  named_entry->internal,
3074                                                  FALSE,
3075                                                  FALSE);
3076                         if (object == VM_OBJECT_NULL) {
3077                                 named_entry_unlock(named_entry);
3078                                 return KERN_INVALID_OBJECT;
3079                         }
3080
3081                         /* JMM - drop reference on pager here */
3082
3083                         /* create an extra ref for the named entry */
3084                         vm_object_lock(object);
3085                         vm_object_reference_locked(object);
3086                         named_entry->backing.object = object;
3087                         named_entry->is_pager = FALSE;
3088                         named_entry_unlock(named_entry);
3089
3090                         wimg_mode = object->wimg_bits;
3091
3092                         if (access == MAP_MEM_IO) {
3093                                 wimg_mode = VM_WIMG_IO;
3094                         } else if (access == MAP_MEM_COPYBACK) {
3095                                 wimg_mode = VM_WIMG_USE_DEFAULT;
3096                         } else if (access == MAP_MEM_INNERWBACK) {
3097                                 wimg_mode = VM_WIMG_INNERWBACK;
3098                         } else if (access == MAP_MEM_WTHRU) {
3099                                 wimg_mode = VM_WIMG_WTHRU;
3100                         } else if (access == MAP_MEM_WCOMB) {
3101                                 wimg_mode = VM_WIMG_WCOMB;
3102                         }
3103
3104                         /* wait for object (if any) to be ready */
3105                         if (!named_entry->internal) {
3106                                 while (!object->pager_ready) {
3107                                         vm_object_wait(
3108                                                 object,
3109                                                 VM_OBJECT_EVENT_PAGER_READY,
3110                                                 THREAD_UNINT);
3111                                         vm_object_lock(object);
3112                                 }
3113                         }
3114
3115                         if (object->wimg_bits != wimg_mode)
3116                                 vm_object_change_wimg_mode(object, wimg_mode);
3117
3118 #if VM_OBJECT_TRACKING_OP_TRUESHARE
3119                         if (!object->true_share &&
3120                             vm_object_tracking_inited) {
3121                                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3122                                 int num = 0;
3123
3124                                 num = OSBacktrace(bt,
3125                                                   VM_OBJECT_TRACKING_BTDEPTH);
3126                                 btlog_add_entry(vm_object_tracking_btlog,
3127                                                 object,
3128                                                 VM_OBJECT_TRACKING_OP_TRUESHARE,
3129                                                 bt,
3130                                                 num);
3131                         }
3132 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3133
3134                         object->true_share = TRUE;
3135
3136                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3137                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3138                         vm_object_unlock(object);
3139
3140                 } else if (named_entry->is_copy) {
3141                         kern_return_t   kr;
3142                         vm_map_copy_t   copy_map;
3143                         vm_map_entry_t  copy_entry;
3144                         vm_map_offset_t copy_addr;
3145
3146                         if (flags & ~(VM_FLAGS_FIXED |
3147                                       VM_FLAGS_ANYWHERE |
3148                                       VM_FLAGS_OVERWRITE |
3149                                       VM_FLAGS_IOKIT_ACCT |
3150                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
3151                                       VM_FLAGS_RETURN_DATA_ADDR |
3152                                       VM_FLAGS_ALIAS_MASK)) {
3153                                 named_entry_unlock(named_entry);
3154                                 return KERN_INVALID_ARGUMENT;
3155                         }
3156
3157                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3158                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3159                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3160                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3161                                         offset_in_mapping &= ~((signed)(0xFFF));
3162                                 offset = vm_object_trunc_page(offset);
3163                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3164                         }
3165
3166                         copy_map = named_entry->backing.copy;
3167                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3168                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3169                                 /* unsupported type; should not happen */
3170                                 printf("vm_map_enter_mem_object: "
3171                                        "memory_entry->backing.copy "
3172                                        "unsupported type 0x%x\n",
3173                                        copy_map->type);
3174                                 named_entry_unlock(named_entry);
3175                                 return KERN_INVALID_ARGUMENT;
3176                         }
3177
3178                         /* reserve a contiguous range */
3179                         kr = vm_map_enter(target_map,
3180                                           &map_addr,
3181                                           /* map whole mem entry, trim later: */
3182                                           named_entry->size,
3183                                           mask,
3184                                           flags & (VM_FLAGS_ANYWHERE |
3185                                                    VM_FLAGS_OVERWRITE |
3186                                                    VM_FLAGS_IOKIT_ACCT |
3187                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
3188                                                    VM_FLAGS_RETURN_DATA_ADDR |
3189                                                    VM_FLAGS_ALIAS_MASK),
3190                                           VM_OBJECT_NULL,
3191                                           0,
3192                                           FALSE, /* copy */
3193                                           cur_protection,
3194                                           max_protection,
3195                                           inheritance);
3196                         if (kr != KERN_SUCCESS) {
3197                                 named_entry_unlock(named_entry);
3198                                 return kr;
3199                         }
3200
3201                         copy_addr = map_addr;
3202
3203                         for (copy_entry = vm_map_copy_first_entry(copy_map);
3204                              copy_entry != vm_map_copy_to_entry(copy_map);
3205                              copy_entry = copy_entry->vme_next) {
3206                                 int                     remap_flags = 0;
3207                                 vm_map_t                copy_submap;
3208                                 vm_object_t             copy_object;
3209                                 vm_map_size_t           copy_size;
3210                                 vm_object_offset_t      copy_offset;
3211                                 int                     copy_vm_alias;
3212
3213                                 copy_object = VME_OBJECT(copy_entry);
3214                                 copy_offset = VME_OFFSET(copy_entry);
3215                                 copy_size = (copy_entry->vme_end -
3216                                              copy_entry->vme_start);
3217                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3218                                 if (copy_vm_alias == 0) {
3219                                         /*
3220                                          * Caller does not want a specific
3221                                          * alias for this new mapping:  use
3222                                          * the alias of the original mapping.
3223                                          */
3224                                         copy_vm_alias = VME_ALIAS(copy_entry);
3225                                 }
3226
3227                                 /* sanity check */
3228                                 if ((copy_addr + copy_size) >
3229                                     (map_addr +
3230                                      named_entry->size /* XXX full size */ )) {
3231                                         /* over-mapping too much !? */
3232                                         kr = KERN_INVALID_ARGUMENT;
3233                                         /* abort */
3234                                         break;
3235                                 }
3236
3237                                 /* take a reference on the object */
3238                                 if (copy_entry->is_sub_map) {
3239                                         remap_flags |= VM_FLAGS_SUBMAP;
3240                                         copy_submap = VME_SUBMAP(copy_entry);
3241                                         vm_map_lock(copy_submap);
3242                                         vm_map_reference(copy_submap);
3243                                         vm_map_unlock(copy_submap);
3244                                         copy_object = (vm_object_t) copy_submap;
3245                                 } else if (!copy &&
3246                                            copy_object != VM_OBJECT_NULL &&
3247                                            (copy_entry->needs_copy ||
3248                                             copy_object->shadowed ||
3249                                             (!copy_object->true_share &&
3250                                              !copy_entry->is_shared &&
3251                                              copy_object->vo_size > copy_size))) {
3252                                         /*
3253                                          * We need to resolve our side of this
3254                                          * "symmetric" copy-on-write now; we
3255                                          * need a new object to map and share,
3256                                          * instead of the current one which
3257                                          * might still be shared with the
3258                                          * original mapping.
3259                                          *
3260                                          * Note: A "vm_map_copy_t" does not
3261                                          * have a lock but we're protected by
3262                                          * the named entry's lock here.
3263                                          */
3264                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3265                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
3266                                         if (!copy_entry->needs_copy &&
3267                                             copy_entry->protection & VM_PROT_WRITE) {
3268                                                 vm_prot_t prot;
3269
3270                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
3271                                                 vm_object_pmap_protect(copy_object,
3272                                                                        copy_offset,
3273                                                                        copy_size,
3274                                                                        PMAP_NULL,
3275                                                                        0,
3276                                                                        prot);
3277                                         }
3278
3279                                         copy_entry->needs_copy = FALSE;
3280                                         copy_entry->is_shared = TRUE;
3281                                         copy_object = VME_OBJECT(copy_entry);
3282                                         copy_offset = VME_OFFSET(copy_entry);
3283                                         vm_object_lock(copy_object);
3284                                         vm_object_reference_locked(copy_object);
3285                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
3286                                                 /* we're about to make a shared mapping of this object */
3287                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3288                                                 copy_object->true_share = TRUE;
3289                                         }
3290                                         vm_object_unlock(copy_object);
3291                                 } else {
3292                                         /*
3293                                          * We already have the right object
3294                                          * to map.
3295                                          */
3296                                         copy_object = VME_OBJECT(copy_entry);
3297                                         vm_object_reference(copy_object);
3298                                 }
3299
3300                                 /* over-map the object into destination */
3301                                 remap_flags |= flags;
3302                                 remap_flags |= VM_FLAGS_FIXED;
3303                                 remap_flags |= VM_FLAGS_OVERWRITE;
3304                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
3305                                 remap_flags |= VM_MAKE_TAG(copy_vm_alias);
3306                                 if (!copy && !copy_entry->is_sub_map) {
3307                                         /*
3308                                          * copy-on-write should have been
3309                                          * resolved at this point, or we would
3310                                          * end up sharing instead of copying.
3311                                          */
3312                                         assert(!copy_entry->needs_copy);
3313                                 }
3314                                 kr = vm_map_enter(target_map,
3315                                                   &copy_addr,
3316                                                   copy_size,
3317                                                   (vm_map_offset_t) 0,
3318                                                   remap_flags,
3319                                                   copy_object,
3320                                                   copy_offset,
3321                                                   copy,
3322                                                   cur_protection,
3323                                                   max_protection,
3324                                                   inheritance);
3325                                 if (kr != KERN_SUCCESS) {
3326                                         if (copy_entry->is_sub_map) {
3327                                                 vm_map_deallocate(copy_submap);
3328                                         } else {
3329                                                 vm_object_deallocate(copy_object);
3330                                         }
3331                                         /* abort */
3332                                         break;
3333                                 }
3334
3335                                 /* next mapping */
3336                                 copy_addr += copy_size;
3337                         }
3338
3339                         if (kr == KERN_SUCCESS) {
3340                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3341                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3342                                         *address = map_addr + offset_in_mapping;
3343                                 } else {
3344                                         *address = map_addr;
3345                                 }
3346
3347                                 if (offset) {
3348                                         /*
3349                                          * Trim in front, from 0 to "offset".
3350                                          */
3351                                         vm_map_remove(target_map,
3352                                                       map_addr,
3353                                                       map_addr + offset,
3354                                                       0);
3355                                         *address += offset;
3356                                 }
3357                                 if (offset + map_size < named_entry->size) {
3358                                         /*
3359                                          * Trim in back, from
3360                                          * "offset + map_size" to
3361                                          * "named_entry->size".
3362                                          */
3363                                         vm_map_remove(target_map,
3364                                                       (map_addr +
3365                                                        offset + map_size),
3366                                                       (map_addr +
3367                                                        named_entry->size),
3368                                                       0);
3369                                 }
3370                         }
3371                         named_entry_unlock(named_entry);
3372
3373                         if (kr != KERN_SUCCESS) {
3374                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
3375                                         /* deallocate the contiguous range */
3376                                         (void) vm_deallocate(target_map,
3377                                                              map_addr,
3378                                                              map_size);
3379                                 }
3380                         }
3381
3382                         return kr;
3383
3384                 } else {
3385                         /* This is the case where we are going to map */
3386                         /* an already mapped object.  If the object is */
3387                         /* not ready it is internal.  An external     */
3388                         /* object cannot be mapped until it is ready  */
3389                         /* we can therefore avoid the ready check     */
3390                         /* in this case.  */
3391                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3392                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3393                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3394                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3395                                         offset_in_mapping &= ~((signed)(0xFFF));
3396                                 offset = vm_object_trunc_page(offset);
3397                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3398                         }
3399
3400                         object = named_entry->backing.object;
3401                         assert(object != VM_OBJECT_NULL);
3402                         named_entry_unlock(named_entry);
3403                         vm_object_reference(object);
3404                 }
3405         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3406                 /*
3407                  * JMM - This is temporary until we unify named entries
3408                  * and raw memory objects.
3409                  *
3410                  * Detected fake ip_kotype for a memory object.  In
3411                  * this case, the port isn't really a port at all, but
3412                  * instead is just a raw memory object.
3413                  */
3414                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3415                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3416                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3417                 }
3418
3419                 object = vm_object_enter((memory_object_t)port,
3420                                          size, FALSE, FALSE, FALSE);
3421                 if (object == VM_OBJECT_NULL)
3422                         return KERN_INVALID_OBJECT;
3423
3424                 /* wait for object (if any) to be ready */
3425                 if (object != VM_OBJECT_NULL) {
3426                         if (object == kernel_object) {
3427                                 printf("Warning: Attempt to map kernel object"
3428                                         " by a non-private kernel entity\n");
3429                                 return KERN_INVALID_OBJECT;
3430                         }
3431                         if (!object->pager_ready) {
3432                                 vm_object_lock(object);
3433
3434                                 while (!object->pager_ready) {
3435                                         vm_object_wait(object,
3436                                                        VM_OBJECT_EVENT_PAGER_READY,
3437                                                        THREAD_UNINT);
3438                                         vm_object_lock(object);
3439                                 }
3440                                 vm_object_unlock(object);
3441                         }
3442                 }
3443         } else {
3444                 return KERN_INVALID_OBJECT;
3445         }
3446
3447         if (object != VM_OBJECT_NULL &&
3448             object->named &&
3449             object->pager != MEMORY_OBJECT_NULL &&
3450             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3451                 memory_object_t pager;
3452                 vm_prot_t       pager_prot;
3453                 kern_return_t   kr;
3454
3455                 /*
3456                  * For "named" VM objects, let the pager know that the
3457                  * memory object is being mapped.  Some pagers need to keep
3458                  * track of this, to know when they can reclaim the memory
3459                  * object, for example.
3460                  * VM calls memory_object_map() for each mapping (specifying
3461                  * the protection of each mapping) and calls
3462                  * memory_object_last_unmap() when all the mappings are gone.
3463                  */
3464                 pager_prot = max_protection;
3465                 if (copy) {
3466                         /*
3467                          * Copy-On-Write mapping: won't modify the
3468                          * memory object.
3469                          */
3470                         pager_prot &= ~VM_PROT_WRITE;
3471                 }
3472                 vm_object_lock(object);
3473                 pager = object->pager;
3474                 if (object->named &&
3475                     pager != MEMORY_OBJECT_NULL &&
3476                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3477                         assert(object->pager_ready);
3478                         vm_object_mapping_wait(object, THREAD_UNINT);
3479                         vm_object_mapping_begin(object);
3480                         vm_object_unlock(object);
3481
3482                         kr = memory_object_map(pager, pager_prot);
3483                         assert(kr == KERN_SUCCESS);
3484
3485                         vm_object_lock(object);
3486                         vm_object_mapping_end(object);
3487                 }
3488                 vm_object_unlock(object);
3489         }
3490
3491         /*
3492          *      Perform the copy if requested
3493          */
3494
3495         if (copy) {
3496                 vm_object_t             new_object;
3497                 vm_object_offset_t      new_offset;
3498
3499                 result = vm_object_copy_strategically(object, offset,
3500                                                       map_size,
3501                                                       &new_object, &new_offset,
3502                                                       &copy);
3503
3504
3505                 if (result == KERN_MEMORY_RESTART_COPY) {
3506                         boolean_t success;
3507                         boolean_t src_needs_copy;
3508
3509                         /*
3510                          * XXX
3511                          * We currently ignore src_needs_copy.
3512                          * This really is the issue of how to make
3513                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3514                          * non-kernel users to use. Solution forthcoming.
3515                          * In the meantime, since we don't allow non-kernel
3516                          * memory managers to specify symmetric copy,
3517                          * we won't run into problems here.
3518                          */
3519                         new_object = object;
3520                         new_offset = offset;
3521                         success = vm_object_copy_quickly(&new_object,
3522                                                          new_offset,
3523                                                          map_size,
3524                                                          &src_needs_copy,
3525                                                          &copy);
3526                         assert(success);
3527                         result = KERN_SUCCESS;
3528                 }
3529                 /*
3530                  *      Throw away the reference to the
3531                  *      original object, as it won't be mapped.
3532                  */
3533
3534                 vm_object_deallocate(object);
3535
3536                 if (result != KERN_SUCCESS) {
3537                         return result;
3538                 }
3539
3540                 object = new_object;
3541                 offset = new_offset;
3542         }
3543
3544         /*
3545          * If users want to try to prefault pages, the mapping and prefault
3546          * needs to be atomic.
3547          */
3548         if (try_prefault)
3549                 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3550
3551         {
3552                 result = vm_map_enter(target_map,
3553                                       &map_addr, map_size,
3554                                       (vm_map_offset_t)mask,
3555                                       flags,
3556                                       object, offset,
3557                                       copy,
3558                                       cur_protection, max_protection,
3559                                       inheritance);
3560         }
3561         if (result != KERN_SUCCESS)
3562                 vm_object_deallocate(object);
3563
3564         /*
3565          * Try to prefault, and do not forget to release the vm map lock.
3566          */
3567         if (result == KERN_SUCCESS && try_prefault) {
3568                 mach_vm_address_t va = map_addr;
3569                 kern_return_t kr = KERN_SUCCESS;
3570                 unsigned int i = 0;
3571                 int pmap_options;
3572
3573                 pmap_options = PMAP_OPTIONS_NOWAIT;
3574                 if (object->internal) {
3575                         pmap_options |= PMAP_OPTIONS_INTERNAL;
3576                 }
3577
3578                 for (i = 0; i < page_list_count; ++i) {
3579                         if (UPL_VALID_PAGE(page_list, i)) {
3580                                 /*
3581                                  * If this function call failed, we should stop
3582                                  * trying to optimize, other calls are likely
3583                                  * going to fail too.
3584                                  *
3585                                  * We are not gonna report an error for such
3586                                  * failure though. That's an optimization, not
3587                                  * something critical.
3588                                  */
3589                                 kr = pmap_enter_options(target_map->pmap,
3590                                                         va, UPL_PHYS_PAGE(page_list, i),
3591                                                         cur_protection, VM_PROT_NONE,
3592                                                         0, TRUE, pmap_options, NULL);
3593                                 if (kr != KERN_SUCCESS) {
3594                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
3595                                         break;
3596                                 }
3597                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
3598                         }
3599
3600                         /* Next virtual address */
3601                         va += PAGE_SIZE;
3602                 }
3603                 vm_map_unlock(target_map);
3604         }
3605
3606         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3607                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3608                 *address = map_addr + offset_in_mapping;
3609         } else {
3610                 *address = map_addr;
3611         }
3612         return result;
3613 }
3614
3615 kern_return_t
3616 vm_map_enter_mem_object(
3617         vm_map_t                target_map,
3618         vm_map_offset_t         *address,
3619         vm_map_size_t           initial_size,
3620         vm_map_offset_t         mask,
3621         int                     flags,
3622         ipc_port_t              port,
3623         vm_object_offset_t      offset,
3624         boolean_t               copy,
3625         vm_prot_t               cur_protection,
3626         vm_prot_t               max_protection,
3627         vm_inherit_t            inheritance)
3628 {
3629         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3630                                               port, offset, copy, cur_protection, max_protection,
3631                                               inheritance, NULL, 0);
3632 }
3633
3634 kern_return_t
3635 vm_map_enter_mem_object_prefault(
3636         vm_map_t                target_map,
3637         vm_map_offset_t         *address,
3638         vm_map_size_t           initial_size,
3639         vm_map_offset_t         mask,
3640         int                     flags,
3641         ipc_port_t              port,
3642         vm_object_offset_t      offset,
3643         vm_prot_t               cur_protection,
3644         vm_prot_t               max_protection,
3645         upl_page_list_ptr_t     page_list,
3646         unsigned int            page_list_count)
3647 {
3648         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3649                                               port, offset, FALSE, cur_protection, max_protection,
3650                                               VM_INHERIT_DEFAULT, page_list, page_list_count);
3651 }
3652
3653
3654 kern_return_t
3655 vm_map_enter_mem_object_control(
3656         vm_map_t                target_map,
3657         vm_map_offset_t         *address,
3658         vm_map_size_t           initial_size,
3659         vm_map_offset_t         mask,
3660         int                     flags,
3661         memory_object_control_t control,
3662         vm_object_offset_t      offset,
3663         boolean_t               copy,
3664         vm_prot_t               cur_protection,
3665         vm_prot_t               max_protection,
3666         vm_inherit_t            inheritance)
3667 {
3668         vm_map_address_t        map_addr;
3669         vm_map_size_t           map_size;
3670         vm_object_t             object;
3671         vm_object_size_t        size;
3672         kern_return_t           result;
3673         memory_object_t         pager;
3674         vm_prot_t               pager_prot;
3675         kern_return_t           kr;
3676
3677         /*
3678          * Check arguments for validity
3679          */
3680         if ((target_map == VM_MAP_NULL) ||
3681             (cur_protection & ~VM_PROT_ALL) ||
3682             (max_protection & ~VM_PROT_ALL) ||
3683             (inheritance > VM_INHERIT_LAST_VALID) ||
3684             initial_size == 0) {
3685                 return KERN_INVALID_ARGUMENT;
3686         }
3687
3688         {
3689                 map_addr = vm_map_trunc_page(*address,
3690                                              VM_MAP_PAGE_MASK(target_map));
3691                 map_size = vm_map_round_page(initial_size,
3692                                              VM_MAP_PAGE_MASK(target_map));
3693         }
3694         size = vm_object_round_page(initial_size);
3695
3696         object = memory_object_control_to_vm_object(control);
3697
3698         if (object == VM_OBJECT_NULL)
3699                 return KERN_INVALID_OBJECT;
3700
3701         if (object == kernel_object) {
3702                 printf("Warning: Attempt to map kernel object"
3703                        " by a non-private kernel entity\n");
3704                 return KERN_INVALID_OBJECT;
3705         }
3706
3707         vm_object_lock(object);
3708         object->ref_count++;
3709         vm_object_res_reference(object);
3710
3711         /*
3712          * For "named" VM objects, let the pager know that the
3713          * memory object is being mapped.  Some pagers need to keep
3714          * track of this, to know when they can reclaim the memory
3715          * object, for example.
3716          * VM calls memory_object_map() for each mapping (specifying
3717          * the protection of each mapping) and calls
3718          * memory_object_last_unmap() when all the mappings are gone.
3719          */
3720         pager_prot = max_protection;
3721         if (copy) {
3722                 pager_prot &= ~VM_PROT_WRITE;
3723         }
3724         pager = object->pager;
3725         if (object->named &&
3726             pager != MEMORY_OBJECT_NULL &&
3727             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3728                 assert(object->pager_ready);
3729                 vm_object_mapping_wait(object, THREAD_UNINT);
3730                 vm_object_mapping_begin(object);
3731                 vm_object_unlock(object);
3732
3733                 kr = memory_object_map(pager, pager_prot);
3734                 assert(kr == KERN_SUCCESS);
3735
3736                 vm_object_lock(object);
3737                 vm_object_mapping_end(object);
3738         }
3739         vm_object_unlock(object);
3740
3741         /*
3742          *      Perform the copy if requested
3743          */
3744
3745         if (copy) {
3746                 vm_object_t             new_object;
3747                 vm_object_offset_t      new_offset;
3748
3749                 result = vm_object_copy_strategically(object, offset, size,
3750                                                       &new_object, &new_offset,
3751                                                       &copy);
3752
3753
3754                 if (result == KERN_MEMORY_RESTART_COPY) {
3755                         boolean_t success;
3756                         boolean_t src_needs_copy;
3757
3758                         /*
3759                          * XXX
3760                          * We currently ignore src_needs_copy.
3761                          * This really is the issue of how to make
3762                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3763                          * non-kernel users to use. Solution forthcoming.
3764                          * In the meantime, since we don't allow non-kernel
3765                          * memory managers to specify symmetric copy,
3766                          * we won't run into problems here.
3767                          */
3768                         new_object = object;
3769                         new_offset = offset;
3770                         success = vm_object_copy_quickly(&new_object,
3771                                                          new_offset, size,
3772                                                          &src_needs_copy,
3773                                                          &copy);
3774                         assert(success);
3775                         result = KERN_SUCCESS;
3776                 }
3777                 /*
3778                  *      Throw away the reference to the
3779                  *      original object, as it won't be mapped.
3780                  */
3781
3782                 vm_object_deallocate(object);
3783
3784                 if (result != KERN_SUCCESS) {
3785                         return result;
3786                 }
3787
3788                 object = new_object;
3789                 offset = new_offset;
3790         }
3791
3792         {
3793                 result = vm_map_enter(target_map,
3794                                       &map_addr, map_size,
3795                                       (vm_map_offset_t)mask,
3796                                       flags,
3797                                       object, offset,
3798                                       copy,
3799                                       cur_protection, max_protection,
3800                                       inheritance);
3801         }
3802         if (result != KERN_SUCCESS)
3803                 vm_object_deallocate(object);
3804         *address = map_addr;
3805
3806         return result;
3807 }
3808
3809
3810 #if     VM_CPM
3811
3812 #ifdef MACH_ASSERT
3813 extern pmap_paddr_t     avail_start, avail_end;
3814 #endif
3815
3816 /*
3817  *      Allocate memory in the specified map, with the caveat that
3818  *      the memory is physically contiguous.  This call may fail
3819  *      if the system can't find sufficient contiguous memory.
3820  *      This call may cause or lead to heart-stopping amounts of
3821  *      paging activity.
3822  *
3823  *      Memory obtained from this call should be freed in the
3824  *      normal way, viz., via vm_deallocate.
3825  */
3826 kern_return_t
3827 vm_map_enter_cpm(
3828         vm_map_t                map,
3829         vm_map_offset_t *addr,
3830         vm_map_size_t           size,
3831         int                     flags)
3832 {
3833         vm_object_t             cpm_obj;
3834         pmap_t                  pmap;
3835         vm_page_t               m, pages;
3836         kern_return_t           kr;
3837         vm_map_offset_t         va, start, end, offset;
3838 #if     MACH_ASSERT
3839         vm_map_offset_t         prev_addr = 0;
3840 #endif  /* MACH_ASSERT */
3841
3842         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3843         vm_tag_t tag;
3844
3845         VM_GET_FLAGS_ALIAS(flags, tag);
3846
3847         if (size == 0) {
3848                 *addr = 0;
3849                 return KERN_SUCCESS;
3850         }
3851         if (anywhere)
3852                 *addr = vm_map_min(map);
3853         else
3854                 *addr = vm_map_trunc_page(*addr,
3855                                           VM_MAP_PAGE_MASK(map));
3856         size = vm_map_round_page(size,
3857                                  VM_MAP_PAGE_MASK(map));
3858
3859         /*
3860          * LP64todo - cpm_allocate should probably allow
3861          * allocations of >4GB, but not with the current
3862          * algorithm, so just cast down the size for now.
3863          */
3864         if (size > VM_MAX_ADDRESS)
3865                 return KERN_RESOURCE_SHORTAGE;
3866         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3867                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3868                 return kr;
3869
3870         cpm_obj = vm_object_allocate((vm_object_size_t)size);
3871         assert(cpm_obj != VM_OBJECT_NULL);
3872         assert(cpm_obj->internal);
3873         assert(cpm_obj->vo_size == (vm_object_size_t)size);
3874         assert(cpm_obj->can_persist == FALSE);
3875         assert(cpm_obj->pager_created == FALSE);
3876         assert(cpm_obj->pageout == FALSE);
3877         assert(cpm_obj->shadow == VM_OBJECT_NULL);
3878
3879         /*
3880          *      Insert pages into object.
3881          */
3882
3883         vm_object_lock(cpm_obj);
3884         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3885                 m = pages;
3886                 pages = NEXT_PAGE(m);
3887                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3888
3889                 assert(!m->gobbled);
3890                 assert(!m->wanted);
3891                 assert(!m->pageout);
3892                 assert(!m->tabled);
3893                 assert(VM_PAGE_WIRED(m));
3894                 /*
3895                  * ENCRYPTED SWAP:
3896                  * "m" is not supposed to be pageable, so it
3897                  * should not be encrypted.  It wouldn't be safe
3898                  * to enter it in a new VM object while encrypted.
3899                  */
3900                 ASSERT_PAGE_DECRYPTED(m);
3901                 assert(m->busy);
3902                 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
3903
3904                 m->busy = FALSE;
3905                 vm_page_insert(m, cpm_obj, offset);
3906         }
3907         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3908         vm_object_unlock(cpm_obj);
3909
3910         /*
3911          *      Hang onto a reference on the object in case a
3912          *      multi-threaded application for some reason decides
3913          *      to deallocate the portion of the address space into
3914          *      which we will insert this object.
3915          *
3916          *      Unfortunately, we must insert the object now before
3917          *      we can talk to the pmap module about which addresses
3918          *      must be wired down.  Hence, the race with a multi-
3919          *      threaded app.
3920          */
3921         vm_object_reference(cpm_obj);
3922
3923         /*
3924          *      Insert object into map.
3925          */
3926
3927         kr = vm_map_enter(
3928                 map,
3929                 addr,
3930                 size,
3931                 (vm_map_offset_t)0,
3932                 flags,
3933                 cpm_obj,
3934                 (vm_object_offset_t)0,
3935                 FALSE,
3936                 VM_PROT_ALL,
3937                 VM_PROT_ALL,
3938                 VM_INHERIT_DEFAULT);
3939
3940         if (kr != KERN_SUCCESS) {
3941                 /*
3942                  *      A CPM object doesn't have can_persist set,
3943                  *      so all we have to do is deallocate it to
3944                  *      free up these pages.
3945                  */
3946                 assert(cpm_obj->pager_created == FALSE);
3947                 assert(cpm_obj->can_persist == FALSE);
3948                 assert(cpm_obj->pageout == FALSE);
3949                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3950                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3951                 vm_object_deallocate(cpm_obj); /* kill creation ref */
3952         }
3953
3954         /*
3955          *      Inform the physical mapping system that the
3956          *      range of addresses may not fault, so that
3957          *      page tables and such can be locked down as well.
3958          */
3959         start = *addr;
3960         end = start + size;
3961         pmap = vm_map_pmap(map);
3962         pmap_pageable(pmap, start, end, FALSE);
3963
3964         /*
3965          *      Enter each page into the pmap, to avoid faults.
3966          *      Note that this loop could be coded more efficiently,
3967          *      if the need arose, rather than looking up each page
3968          *      again.
3969          */
3970         for (offset = 0, va = start; offset < size;
3971              va += PAGE_SIZE, offset += PAGE_SIZE) {
3972                 int type_of_fault;
3973
3974                 vm_object_lock(cpm_obj);
3975                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3976                 assert(m != VM_PAGE_NULL);
3977
3978                 vm_page_zero_fill(m);
3979
3980                 type_of_fault = DBG_ZERO_FILL_FAULT;
3981
3982                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3983                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3984                                &type_of_fault);
3985
3986                 vm_object_unlock(cpm_obj);
3987         }
3988
3989 #if     MACH_ASSERT
3990         /*
3991          *      Verify ordering in address space.
3992          */
3993         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3994                 vm_object_lock(cpm_obj);
3995                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3996                 vm_object_unlock(cpm_obj);
3997                 if (m == VM_PAGE_NULL)
3998                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3999                               cpm_obj, (uint64_t)offset);
4000                 assert(m->tabled);
4001                 assert(!m->busy);
4002                 assert(!m->wanted);
4003                 assert(!m->fictitious);
4004                 assert(!m->private);
4005                 assert(!m->absent);
4006                 assert(!m->error);
4007                 assert(!m->cleaning);
4008                 assert(!m->laundry);
4009                 assert(!m->precious);
4010                 assert(!m->clustered);
4011                 if (offset != 0) {
4012                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4013                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
4014                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
4015                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
4016                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
4017                                 panic("vm_allocate_cpm:  pages not contig!");
4018                         }
4019                 }
4020                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4021         }
4022 #endif  /* MACH_ASSERT */
4023
4024         vm_object_deallocate(cpm_obj); /* kill extra ref */
4025
4026         return kr;
4027 }
4028
4029
4030 #else   /* VM_CPM */
4031
4032 /*
4033  *      Interface is defined in all cases, but unless the kernel
4034  *      is built explicitly for this option, the interface does
4035  *      nothing.
4036  */
4037
4038 kern_return_t
4039 vm_map_enter_cpm(
4040         __unused vm_map_t       map,
4041         __unused vm_map_offset_t        *addr,
4042         __unused vm_map_size_t  size,
4043         __unused int            flags)
4044 {
4045         return KERN_FAILURE;
4046 }
4047 #endif /* VM_CPM */
4048
4049 /* Not used without nested pmaps */
4050 #ifndef NO_NESTED_PMAP
4051 /*
4052  * Clip and unnest a portion of a nested submap mapping.
4053  */
4054
4055
4056 static void
4057 vm_map_clip_unnest(
4058         vm_map_t        map,
4059         vm_map_entry_t  entry,
4060         vm_map_offset_t start_unnest,
4061         vm_map_offset_t end_unnest)
4062 {
4063         vm_map_offset_t old_start_unnest = start_unnest;
4064         vm_map_offset_t old_end_unnest = end_unnest;
4065
4066         assert(entry->is_sub_map);
4067         assert(VME_SUBMAP(entry) != NULL);
4068         assert(entry->use_pmap);
4069
4070         /*
4071          * Query the platform for the optimal unnest range.
4072          * DRK: There's some duplication of effort here, since
4073          * callers may have adjusted the range to some extent. This
4074          * routine was introduced to support 1GiB subtree nesting
4075          * for x86 platforms, which can also nest on 2MiB boundaries
4076          * depending on size/alignment.
4077          */
4078         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
4079                 assert(VME_SUBMAP(entry)->is_nested_map);
4080                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4081                 log_unnest_badness(map,
4082                                    old_start_unnest,
4083                                    old_end_unnest,
4084                                    VME_SUBMAP(entry)->is_nested_map,
4085                                    (entry->vme_start +
4086                                     VME_SUBMAP(entry)->lowest_unnestable_start -
4087                                     VME_OFFSET(entry)));
4088         }
4089
4090         if (entry->vme_start > start_unnest ||
4091             entry->vme_end < end_unnest) {
4092                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4093                       "bad nested entry: start=0x%llx end=0x%llx\n",
4094                       (long long)start_unnest, (long long)end_unnest,
4095                       (long long)entry->vme_start, (long long)entry->vme_end);
4096         }
4097
4098         if (start_unnest > entry->vme_start) {
4099                 _vm_map_clip_start(&map->hdr,
4100                                    entry,
4101                                    start_unnest);
4102                 if (map->holelistenabled) {
4103                         vm_map_store_update_first_free(map, NULL, FALSE);
4104                 } else {
4105                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4106                 }
4107         }
4108         if (entry->vme_end > end_unnest) {
4109                 _vm_map_clip_end(&map->hdr,
4110                                  entry,
4111                                  end_unnest);
4112                 if (map->holelistenabled) {
4113                         vm_map_store_update_first_free(map, NULL, FALSE);
4114                 } else {
4115                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4116                 }
4117         }
4118
4119         pmap_unnest(map->pmap,
4120                     entry->vme_start,
4121                     entry->vme_end - entry->vme_start);
4122         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
4123                 /* clean up parent map/maps */
4124                 vm_map_submap_pmap_clean(
4125                         map, entry->vme_start,
4126                         entry->vme_end,
4127                         VME_SUBMAP(entry),
4128                         VME_OFFSET(entry));
4129         }
4130         entry->use_pmap = FALSE;
4131         if ((map->pmap != kernel_pmap) &&
4132             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4133                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
4134         }
4135 }
4136 #endif  /* NO_NESTED_PMAP */
4137
4138 /*
4139  *      vm_map_clip_start:      [ internal use only ]
4140  *
4141  *      Asserts that the given entry begins at or after
4142  *      the specified address; if necessary,
4143  *      it splits the entry into two.
4144  */
4145 void
4146 vm_map_clip_start(
4147         vm_map_t        map,
4148         vm_map_entry_t  entry,
4149         vm_map_offset_t startaddr)
4150 {
4151 #ifndef NO_NESTED_PMAP
4152         if (entry->is_sub_map &&
4153             entry->use_pmap &&
4154             startaddr >= entry->vme_start) {
4155                 vm_map_offset_t start_unnest, end_unnest;
4156
4157                 /*
4158                  * Make sure "startaddr" is no longer in a nested range
4159                  * before we clip.  Unnest only the minimum range the platform
4160                  * can handle.
4161                  * vm_map_clip_unnest may perform additional adjustments to
4162                  * the unnest range.
4163                  */
4164                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4165                 end_unnest = start_unnest + pmap_nesting_size_min;
4166                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4167         }
4168 #endif /* NO_NESTED_PMAP */
4169         if (startaddr > entry->vme_start) {
4170                 if (VME_OBJECT(entry) &&
4171                     !entry->is_sub_map &&
4172                     VME_OBJECT(entry)->phys_contiguous) {
4173                         pmap_remove(map->pmap,
4174                                     (addr64_t)(entry->vme_start),
4175                                     (addr64_t)(entry->vme_end));
4176                 }
4177                 if (entry->vme_atomic) {
4178                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4179                 }
4180                 _vm_map_clip_start(&map->hdr, entry, startaddr);
4181                 if (map->holelistenabled) {
4182                         vm_map_store_update_first_free(map, NULL, FALSE);
4183                 } else {
4184                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4185                 }
4186         }
4187 }
4188
4189
4190 #define vm_map_copy_clip_start(copy, entry, startaddr) \
4191         MACRO_BEGIN \
4192         if ((startaddr) > (entry)->vme_start) \
4193                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4194         MACRO_END
4195
4196 /*
4197  *      This routine is called only when it is known that
4198  *      the entry must be split.
4199  */
4200 static void
4201 _vm_map_clip_start(
4202         struct vm_map_header    *map_header,
4203         vm_map_entry_t          entry,
4204         vm_map_offset_t         start)
4205 {
4206         vm_map_entry_t  new_entry;
4207
4208         /*
4209          *      Split off the front portion --
4210          *      note that we must insert the new
4211          *      entry BEFORE this one, so that
4212          *      this entry has the specified starting
4213          *      address.
4214          */
4215
4216         if (entry->map_aligned) {
4217                 assert(VM_MAP_PAGE_ALIGNED(start,
4218                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4219         }
4220
4221         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4222         vm_map_entry_copy_full(new_entry, entry);
4223
4224         new_entry->vme_end = start;
4225         assert(new_entry->vme_start < new_entry->vme_end);
4226         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
4227         assert(start < entry->vme_end);
4228         entry->vme_start = start;
4229
4230         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
4231
4232         if (entry->is_sub_map)
4233                 vm_map_reference(VME_SUBMAP(new_entry));
4234         else
4235                 vm_object_reference(VME_OBJECT(new_entry));
4236 }
4237
4238
4239 /*
4240  *      vm_map_clip_end:        [ internal use only ]
4241  *
4242  *      Asserts that the given entry ends at or before
4243  *      the specified address; if necessary,
4244  *      it splits the entry into two.
4245  */
4246 void
4247 vm_map_clip_end(
4248         vm_map_t        map,
4249         vm_map_entry_t  entry,
4250         vm_map_offset_t endaddr)
4251 {
4252         if (endaddr > entry->vme_end) {
4253                 /*
4254                  * Within the scope of this clipping, limit "endaddr" to
4255                  * the end of this map entry...
4256                  */
4257                 endaddr = entry->vme_end;
4258         }
4259 #ifndef NO_NESTED_PMAP
4260         if (entry->is_sub_map && entry->use_pmap) {
4261                 vm_map_offset_t start_unnest, end_unnest;
4262
4263                 /*
4264                  * Make sure the range between the start of this entry and
4265                  * the new "endaddr" is no longer nested before we clip.
4266                  * Unnest only the minimum range the platform can handle.
4267                  * vm_map_clip_unnest may perform additional adjustments to
4268                  * the unnest range.
4269                  */
4270                 start_unnest = entry->vme_start;
4271                 end_unnest =
4272                         (endaddr + pmap_nesting_size_min - 1) &
4273                         ~(pmap_nesting_size_min - 1);
4274                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4275         }
4276 #endif /* NO_NESTED_PMAP */
4277         if (endaddr < entry->vme_end) {
4278                 if (VME_OBJECT(entry) &&
4279                     !entry->is_sub_map &&
4280                     VME_OBJECT(entry)->phys_contiguous) {
4281                         pmap_remove(map->pmap,
4282                                     (addr64_t)(entry->vme_start),
4283                                     (addr64_t)(entry->vme_end));
4284                 }
4285                 if (entry->vme_atomic) {
4286                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4287                 }
4288                 _vm_map_clip_end(&map->hdr, entry, endaddr);
4289                 if (map->holelistenabled) {
4290                         vm_map_store_update_first_free(map, NULL, FALSE);
4291                 } else {
4292                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4293                 }
4294         }
4295 }
4296
4297
4298 #define vm_map_copy_clip_end(copy, entry, endaddr) \
4299         MACRO_BEGIN \
4300         if ((endaddr) < (entry)->vme_end) \
4301                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4302         MACRO_END
4303
4304 /*
4305  *      This routine is called only when it is known that
4306  *      the entry must be split.
4307  */
4308 static void
4309 _vm_map_clip_end(
4310         struct vm_map_header    *map_header,
4311         vm_map_entry_t          entry,
4312         vm_map_offset_t         end)
4313 {
4314         vm_map_entry_t  new_entry;
4315
4316         /*
4317          *      Create a new entry and insert it
4318          *      AFTER the specified entry
4319          */
4320
4321         if (entry->map_aligned) {
4322                 assert(VM_MAP_PAGE_ALIGNED(end,
4323                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4324         }
4325
4326         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4327         vm_map_entry_copy_full(new_entry, entry);
4328
4329         assert(entry->vme_start < end);
4330         new_entry->vme_start = entry->vme_end = end;
4331         VME_OFFSET_SET(new_entry,
4332                        VME_OFFSET(new_entry) + (end - entry->vme_start));
4333         assert(new_entry->vme_start < new_entry->vme_end);
4334
4335         _vm_map_store_entry_link(map_header, entry, new_entry);
4336
4337         if (entry->is_sub_map)
4338                 vm_map_reference(VME_SUBMAP(new_entry));
4339         else
4340                 vm_object_reference(VME_OBJECT(new_entry));
4341 }
4342
4343
4344 /*
4345  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
4346  *
4347  *      Asserts that the starting and ending region
4348  *      addresses fall within the valid range of the map.
4349  */
4350 #define VM_MAP_RANGE_CHECK(map, start, end)     \
4351         MACRO_BEGIN                             \
4352         if (start < vm_map_min(map))            \
4353                 start = vm_map_min(map);        \
4354         if (end > vm_map_max(map))              \
4355                 end = vm_map_max(map);          \
4356         if (start > end)                        \
4357                 start = end;                    \
4358         MACRO_END
4359
4360 /*
4361  *      vm_map_range_check:     [ internal use only ]
4362  *
4363  *      Check that the region defined by the specified start and
4364  *      end addresses are wholly contained within a single map
4365  *      entry or set of adjacent map entries of the spacified map,
4366  *      i.e. the specified region contains no unmapped space.
4367  *      If any or all of the region is unmapped, FALSE is returned.
4368  *      Otherwise, TRUE is returned and if the output argument 'entry'
4369  *      is not NULL it points to the map entry containing the start
4370  *      of the region.
4371  *
4372  *      The map is locked for reading on entry and is left locked.
4373  */
4374 static boolean_t
4375 vm_map_range_check(
4376         vm_map_t                map,
4377         vm_map_offset_t         start,
4378         vm_map_offset_t         end,
4379         vm_map_entry_t          *entry)
4380 {
4381         vm_map_entry_t          cur;
4382         vm_map_offset_t         prev;
4383
4384         /*
4385          *      Basic sanity checks first
4386          */
4387         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4388                 return (FALSE);
4389
4390         /*
4391          *      Check first if the region starts within a valid
4392          *      mapping for the map.
4393          */
4394         if (!vm_map_lookup_entry(map, start, &cur))
4395                 return (FALSE);
4396
4397         /*
4398          *      Optimize for the case that the region is contained
4399          *      in a single map entry.
4400          */
4401         if (entry != (vm_map_entry_t *) NULL)
4402                 *entry = cur;
4403         if (end <= cur->vme_end)
4404                 return (TRUE);
4405
4406         /*
4407          *      If the region is not wholly contained within a
4408          *      single entry, walk the entries looking for holes.
4409          */
4410         prev = cur->vme_end;
4411         cur = cur->vme_next;
4412         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4413                 if (end <= cur->vme_end)
4414                         return (TRUE);
4415                 prev = cur->vme_end;
4416                 cur = cur->vme_next;
4417         }
4418         return (FALSE);
4419 }
4420
4421 /*
4422  *      vm_map_submap:          [ kernel use only ]
4423  *
4424  *      Mark the given range as handled by a subordinate map.
4425  *
4426  *      This range must have been created with vm_map_find using
4427  *      the vm_submap_object, and no other operations may have been
4428  *      performed on this range prior to calling vm_map_submap.
4429  *
4430  *      Only a limited number of operations can be performed
4431  *      within this rage after calling vm_map_submap:
4432  *              vm_fault
4433  *      [Don't try vm_map_copyin!]
4434  *
4435  *      To remove a submapping, one must first remove the
4436  *      range from the superior map, and then destroy the
4437  *      submap (if desired).  [Better yet, don't try it.]
4438  */
4439 kern_return_t
4440 vm_map_submap(
4441         vm_map_t        map,
4442         vm_map_offset_t start,
4443         vm_map_offset_t end,
4444         vm_map_t        submap,
4445         vm_map_offset_t offset,
4446 #ifdef NO_NESTED_PMAP
4447         __unused
4448 #endif  /* NO_NESTED_PMAP */
4449         boolean_t       use_pmap)
4450 {
4451         vm_map_entry_t          entry;
4452         kern_return_t           result = KERN_INVALID_ARGUMENT;
4453         vm_object_t             object;
4454
4455         vm_map_lock(map);
4456
4457         if (! vm_map_lookup_entry(map, start, &entry)) {
4458                 entry = entry->vme_next;
4459         }
4460
4461         if (entry == vm_map_to_entry(map) ||
4462             entry->is_sub_map) {
4463                 vm_map_unlock(map);
4464                 return KERN_INVALID_ARGUMENT;
4465         }
4466
4467         vm_map_clip_start(map, entry, start);
4468         vm_map_clip_end(map, entry, end);
4469
4470         if ((entry->vme_start == start) && (entry->vme_end == end) &&
4471             (!entry->is_sub_map) &&
4472             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
4473             (object->resident_page_count == 0) &&
4474             (object->copy == VM_OBJECT_NULL) &&
4475             (object->shadow == VM_OBJECT_NULL) &&
4476             (!object->pager_created)) {
4477                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4478                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
4479                 vm_object_deallocate(object);
4480                 entry->is_sub_map = TRUE;
4481                 entry->use_pmap = FALSE;
4482                 VME_SUBMAP_SET(entry, submap);
4483                 vm_map_reference(submap);
4484                 if (submap->mapped_in_other_pmaps == FALSE &&
4485                     vm_map_pmap(submap) != PMAP_NULL &&
4486                     vm_map_pmap(submap) != vm_map_pmap(map)) {
4487                         /*
4488                          * This submap is being mapped in a map
4489                          * that uses a different pmap.
4490                          * Set its "mapped_in_other_pmaps" flag
4491                          * to indicate that we now need to
4492                          * remove mappings from all pmaps rather
4493                          * than just the submap's pmap.
4494                          */
4495                         submap->mapped_in_other_pmaps = TRUE;
4496                 }
4497
4498 #ifndef NO_NESTED_PMAP
4499                 if (use_pmap) {
4500                         /* nest if platform code will allow */
4501                         if(submap->pmap == NULL) {
4502                                 ledger_t ledger = map->pmap->ledger;
4503                                 submap->pmap = pmap_create(ledger,
4504                                                 (vm_map_size_t) 0, FALSE);
4505                                 if(submap->pmap == PMAP_NULL) {
4506                                         vm_map_unlock(map);
4507                                         return(KERN_NO_SPACE);
4508                                 }
4509                         }
4510                         result = pmap_nest(map->pmap,
4511                                            (VME_SUBMAP(entry))->pmap,
4512                                            (addr64_t)start,
4513                                            (addr64_t)start,
4514                                            (uint64_t)(end - start));
4515                         if(result)
4516                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4517                         entry->use_pmap = TRUE;
4518                 }
4519 #else   /* NO_NESTED_PMAP */
4520                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
4521 #endif  /* NO_NESTED_PMAP */
4522                 result = KERN_SUCCESS;
4523         }
4524         vm_map_unlock(map);
4525
4526         return(result);
4527 }
4528
4529
4530 /*
4531  *      vm_map_protect:
4532  *
4533  *      Sets the protection of the specified address
4534  *      region in the target map.  If "set_max" is
4535  *      specified, the maximum protection is to be set;
4536  *      otherwise, only the current protection is affected.
4537  */
4538 kern_return_t
4539 vm_map_protect(
4540         vm_map_t        map,
4541         vm_map_offset_t start,
4542         vm_map_offset_t end,
4543         vm_prot_t       new_prot,
4544         boolean_t       set_max)
4545 {
4546         vm_map_entry_t                  current;
4547         vm_map_offset_t                 prev;
4548         vm_map_entry_t                  entry;
4549         vm_prot_t                       new_max;
4550
4551         XPR(XPR_VM_MAP,
4552             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4553             map, start, end, new_prot, set_max);
4554
4555         vm_map_lock(map);
4556
4557         /* LP64todo - remove this check when vm_map_commpage64()
4558          * no longer has to stuff in a map_entry for the commpage
4559          * above the map's max_offset.
4560          */
4561         if (start >= map->max_offset) {
4562                 vm_map_unlock(map);
4563                 return(KERN_INVALID_ADDRESS);
4564         }
4565
4566         while(1) {
4567                 /*
4568                  *      Lookup the entry.  If it doesn't start in a valid
4569                  *      entry, return an error.
4570                  */
4571                 if (! vm_map_lookup_entry(map, start, &entry)) {
4572                         vm_map_unlock(map);
4573                         return(KERN_INVALID_ADDRESS);
4574                 }
4575
4576                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4577                         start = SUPERPAGE_ROUND_DOWN(start);
4578                         continue;
4579                 }
4580                 break;
4581         }
4582         if (entry->superpage_size)
4583                 end = SUPERPAGE_ROUND_UP(end);
4584
4585         /*
4586          *      Make a first pass to check for protection and address
4587          *      violations.
4588          */
4589
4590         current = entry;
4591         prev = current->vme_start;
4592         while ((current != vm_map_to_entry(map)) &&
4593                (current->vme_start < end)) {
4594
4595                 /*
4596                  * If there is a hole, return an error.
4597                  */
4598                 if (current->vme_start != prev) {
4599                         vm_map_unlock(map);
4600                         return(KERN_INVALID_ADDRESS);
4601                 }
4602
4603                 new_max = current->max_protection;
4604                 if(new_prot & VM_PROT_COPY) {
4605                         new_max |= VM_PROT_WRITE;
4606                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4607                                 vm_map_unlock(map);
4608                                 return(KERN_PROTECTION_FAILURE);
4609                         }
4610                 } else {
4611                         if ((new_prot & new_max) != new_prot) {
4612                                 vm_map_unlock(map);
4613                                 return(KERN_PROTECTION_FAILURE);
4614                         }
4615                 }
4616
4617
4618                 prev = current->vme_end;
4619                 current = current->vme_next;
4620         }
4621
4622
4623         if (end > prev) {
4624                 vm_map_unlock(map);
4625                 return(KERN_INVALID_ADDRESS);
4626         }
4627
4628         /*
4629          *      Go back and fix up protections.
4630          *      Clip to start here if the range starts within
4631          *      the entry.
4632          */
4633
4634         current = entry;
4635         if (current != vm_map_to_entry(map)) {
4636                 /* clip and unnest if necessary */
4637                 vm_map_clip_start(map, current, start);
4638         }
4639
4640         while ((current != vm_map_to_entry(map)) &&
4641                (current->vme_start < end)) {
4642
4643                 vm_prot_t       old_prot;
4644
4645                 vm_map_clip_end(map, current, end);
4646
4647                 if (current->is_sub_map) {
4648                         /* clipping did unnest if needed */
4649                         assert(!current->use_pmap);
4650                 }
4651
4652                 old_prot = current->protection;
4653
4654                 if(new_prot & VM_PROT_COPY) {
4655                         /* caller is asking specifically to copy the      */
4656                         /* mapped data, this implies that max protection  */
4657                         /* will include write.  Caller must be prepared   */
4658                         /* for loss of shared memory communication in the */
4659                         /* target area after taking this step */
4660
4661                         if (current->is_sub_map == FALSE &&
4662                             VME_OBJECT(current) == VM_OBJECT_NULL) {
4663                                 VME_OBJECT_SET(current,
4664                                                vm_object_allocate(
4665                                                        (vm_map_size_t)
4666                                                        (current->vme_end -
4667                                                         current->vme_start)));
4668                                 VME_OFFSET_SET(current, 0);
4669                                 assert(current->use_pmap);
4670                         }
4671                         assert(current->wired_count == 0);
4672                         current->needs_copy = TRUE;
4673                         current->max_protection |= VM_PROT_WRITE;
4674                 }
4675
4676                 if (set_max)
4677                         current->protection =
4678                                 (current->max_protection =
4679                                  new_prot & ~VM_PROT_COPY) &
4680                                 old_prot;
4681                 else
4682                         current->protection = new_prot & ~VM_PROT_COPY;
4683
4684                 /*
4685                  *      Update physical map if necessary.
4686                  *      If the request is to turn off write protection,
4687                  *      we won't do it for real (in pmap). This is because
4688                  *      it would cause copy-on-write to fail.  We've already
4689                  *      set, the new protection in the map, so if a
4690                  *      write-protect fault occurred, it will be fixed up
4691                  *      properly, COW or not.
4692                  */
4693                 if (current->protection != old_prot) {
4694                         /* Look one level in we support nested pmaps */
4695                         /* from mapped submaps which are direct entries */
4696                         /* in our map */
4697
4698                         vm_prot_t prot;
4699
4700                         prot = current->protection;
4701                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
4702                                 prot &= ~VM_PROT_WRITE;
4703                         } else {
4704                                 assert(!VME_OBJECT(current)->code_signed);
4705                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4706                         }
4707
4708                         if (override_nx(map, VME_ALIAS(current)) && prot)
4709                                 prot |= VM_PROT_EXECUTE;
4710
4711
4712                         if (current->is_sub_map && current->use_pmap) {
4713                                 pmap_protect(VME_SUBMAP(current)->pmap,
4714                                              current->vme_start,
4715                                              current->vme_end,
4716                                              prot);
4717                         } else {
4718                                 pmap_protect(map->pmap,
4719                                              current->vme_start,
4720                                              current->vme_end,
4721                                              prot);
4722                         }
4723                 }
4724                 current = current->vme_next;
4725         }
4726
4727         current = entry;
4728         while ((current != vm_map_to_entry(map)) &&
4729                (current->vme_start <= end)) {
4730                 vm_map_simplify_entry(map, current);
4731                 current = current->vme_next;
4732         }
4733
4734         vm_map_unlock(map);
4735         return(KERN_SUCCESS);
4736 }
4737
4738 /*
4739  *      vm_map_inherit:
4740  *
4741  *      Sets the inheritance of the specified address
4742  *      range in the target map.  Inheritance
4743  *      affects how the map will be shared with
4744  *      child maps at the time of vm_map_fork.
4745  */
4746 kern_return_t
4747 vm_map_inherit(
4748         vm_map_t        map,
4749         vm_map_offset_t start,
4750         vm_map_offset_t end,
4751         vm_inherit_t    new_inheritance)
4752 {
4753         vm_map_entry_t  entry;
4754         vm_map_entry_t  temp_entry;
4755
4756         vm_map_lock(map);
4757
4758         VM_MAP_RANGE_CHECK(map, start, end);
4759
4760         if (vm_map_lookup_entry(map, start, &temp_entry)) {
4761                 entry = temp_entry;
4762         }
4763         else {
4764                 temp_entry = temp_entry->vme_next;
4765                 entry = temp_entry;
4766         }
4767
4768         /* first check entire range for submaps which can't support the */
4769         /* given inheritance. */
4770         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4771                 if(entry->is_sub_map) {
4772                         if(new_inheritance == VM_INHERIT_COPY) {
4773                                 vm_map_unlock(map);
4774                                 return(KERN_INVALID_ARGUMENT);
4775                         }
4776                 }
4777
4778                 entry = entry->vme_next;
4779         }
4780
4781         entry = temp_entry;
4782         if (entry != vm_map_to_entry(map)) {
4783                 /* clip and unnest if necessary */
4784                 vm_map_clip_start(map, entry, start);
4785         }
4786
4787         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4788                 vm_map_clip_end(map, entry, end);
4789                 if (entry->is_sub_map) {
4790                         /* clip did unnest if needed */
4791                         assert(!entry->use_pmap);
4792                 }
4793
4794                 entry->inheritance = new_inheritance;
4795
4796                 entry = entry->vme_next;
4797         }
4798
4799         vm_map_unlock(map);
4800         return(KERN_SUCCESS);
4801 }
4802
4803 /*
4804  * Update the accounting for the amount of wired memory in this map.  If the user has
4805  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
4806  */
4807
4808 static kern_return_t
4809 add_wire_counts(
4810         vm_map_t        map,
4811         vm_map_entry_t  entry,
4812         boolean_t       user_wire)
4813 {
4814         vm_map_size_t   size;
4815
4816         if (user_wire) {
4817                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
4818
4819                 /*
4820                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
4821                  * this map entry.
4822                  */
4823
4824                 if (entry->user_wired_count == 0) {
4825                         size = entry->vme_end - entry->vme_start;
4826
4827                         /*
4828                          * Since this is the first time the user is wiring this map entry, check to see if we're
4829                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4830                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4831                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4832                          * limit, then we fail.
4833                          */
4834
4835                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4836                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4837                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4838                                 return KERN_RESOURCE_SHORTAGE;
4839
4840                         /*
4841                          * The first time the user wires an entry, we also increment the wired_count and add this to
4842                          * the total that has been wired in the map.
4843                          */
4844
4845                         if (entry->wired_count >= MAX_WIRE_COUNT)
4846                                 return KERN_FAILURE;
4847
4848                         entry->wired_count++;
4849                         map->user_wire_size += size;
4850                 }
4851
4852                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4853                         return KERN_FAILURE;
4854
4855                 entry->user_wired_count++;
4856
4857         } else {
4858
4859                 /*
4860                  * The kernel's wiring the memory.  Just bump the count and continue.
4861                  */
4862
4863                 if (entry->wired_count >= MAX_WIRE_COUNT)
4864                         panic("vm_map_wire: too many wirings");
4865
4866                 entry->wired_count++;
4867         }
4868
4869         return KERN_SUCCESS;
4870 }
4871
4872 /*
4873  * Update the memory wiring accounting now that the given map entry is being unwired.
4874  */
4875
4876 static void
4877 subtract_wire_counts(
4878         vm_map_t        map,
4879         vm_map_entry_t  entry,
4880         boolean_t       user_wire)
4881 {
4882
4883         if (user_wire) {
4884
4885                 /*
4886                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4887                  */
4888
4889                 if (entry->user_wired_count == 1) {
4890
4891                         /*
4892                          * We're removing the last user wire reference.  Decrement the wired_count and the total
4893                          * user wired memory for this map.
4894                          */
4895
4896                         assert(entry->wired_count >= 1);
4897                         entry->wired_count--;
4898                         map->user_wire_size -= entry->vme_end - entry->vme_start;
4899                 }
4900
4901                 assert(entry->user_wired_count >= 1);
4902                 entry->user_wired_count--;
4903
4904         } else {
4905
4906                 /*
4907                  * The kernel is unwiring the memory.   Just update the count.
4908                  */
4909
4910                 assert(entry->wired_count >= 1);
4911                 entry->wired_count--;
4912         }
4913 }
4914
4915
4916 /*
4917  *      vm_map_wire:
4918  *
4919  *      Sets the pageability of the specified address range in the
4920  *      target map as wired.  Regions specified as not pageable require
4921  *      locked-down physical memory and physical page maps.  The
4922  *      access_type variable indicates types of accesses that must not
4923  *      generate page faults.  This is checked against protection of
4924  *      memory being locked-down.
4925  *
4926  *      The map must not be locked, but a reference must remain to the
4927  *      map throughout the call.
4928  */
4929 static kern_return_t
4930 vm_map_wire_nested(
4931         vm_map_t                map,
4932         vm_map_offset_t         start,
4933         vm_map_offset_t         end,
4934         vm_prot_t               caller_prot,
4935         boolean_t               user_wire,
4936         pmap_t                  map_pmap,
4937         vm_map_offset_t         pmap_addr,
4938         ppnum_t                 *physpage_p)
4939 {
4940         vm_map_entry_t          entry;
4941         vm_prot_t               access_type;
4942         struct vm_map_entry     *first_entry, tmp_entry;
4943         vm_map_t                real_map;
4944         vm_map_offset_t         s,e;
4945         kern_return_t           rc;
4946         boolean_t               need_wakeup;
4947         boolean_t               main_map = FALSE;
4948         wait_interrupt_t        interruptible_state;
4949         thread_t                cur_thread;
4950         unsigned int            last_timestamp;
4951         vm_map_size_t           size;
4952         boolean_t               wire_and_extract;
4953
4954         access_type = (caller_prot & VM_PROT_ALL);
4955
4956         wire_and_extract = FALSE;
4957         if (physpage_p != NULL) {
4958                 /*
4959                  * The caller wants the physical page number of the
4960                  * wired page.  We return only one physical page number
4961                  * so this works for only one page at a time.
4962                  */
4963                 if ((end - start) != PAGE_SIZE) {
4964                         return KERN_INVALID_ARGUMENT;
4965                 }
4966                 wire_and_extract = TRUE;
4967                 *physpage_p = 0;
4968         }
4969
4970         vm_map_lock(map);
4971         if(map_pmap == NULL)
4972                 main_map = TRUE;
4973         last_timestamp = map->timestamp;
4974
4975         VM_MAP_RANGE_CHECK(map, start, end);
4976         assert(page_aligned(start));
4977         assert(page_aligned(end));
4978         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4979         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4980         if (start == end) {
4981                 /* We wired what the caller asked for, zero pages */
4982                 vm_map_unlock(map);
4983                 return KERN_SUCCESS;
4984         }
4985
4986         need_wakeup = FALSE;
4987         cur_thread = current_thread();
4988
4989         s = start;
4990         rc = KERN_SUCCESS;
4991
4992         if (vm_map_lookup_entry(map, s, &first_entry)) {
4993                 entry = first_entry;
4994                 /*
4995                  * vm_map_clip_start will be done later.
4996                  * We don't want to unnest any nested submaps here !
4997                  */
4998         } else {
4999                 /* Start address is not in map */
5000                 rc = KERN_INVALID_ADDRESS;
5001                 goto done;
5002         }
5003
5004         while ((entry != vm_map_to_entry(map)) && (s < end)) {
5005                 /*
5006                  * At this point, we have wired from "start" to "s".
5007                  * We still need to wire from "s" to "end".
5008                  *
5009                  * "entry" hasn't been clipped, so it could start before "s"
5010                  * and/or end after "end".
5011                  */
5012
5013                 /* "e" is how far we want to wire in this entry */
5014                 e = entry->vme_end;
5015                 if (e > end)
5016                         e = end;
5017
5018                 /*
5019                  * If another thread is wiring/unwiring this entry then
5020                  * block after informing other thread to wake us up.
5021                  */
5022                 if (entry->in_transition) {
5023                         wait_result_t wait_result;
5024
5025                         /*
5026                          * We have not clipped the entry.  Make sure that
5027                          * the start address is in range so that the lookup
5028                          * below will succeed.
5029                          * "s" is the current starting point: we've already
5030                          * wired from "start" to "s" and we still have
5031                          * to wire from "s" to "end".
5032                          */
5033
5034                         entry->needs_wakeup = TRUE;
5035
5036                         /*
5037                          * wake up anybody waiting on entries that we have
5038                          * already wired.
5039                          */
5040                         if (need_wakeup) {
5041                                 vm_map_entry_wakeup(map);
5042                                 need_wakeup = FALSE;
5043                         }
5044                         /*
5045                          * User wiring is interruptible
5046                          */
5047                         wait_result = vm_map_entry_wait(map,
5048                                                         (user_wire) ? THREAD_ABORTSAFE :
5049                                                         THREAD_UNINT);
5050                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
5051                                 /*
5052                                  * undo the wirings we have done so far
5053                                  * We do not clear the needs_wakeup flag,
5054                                  * because we cannot tell if we were the
5055                                  * only one waiting.
5056                                  */
5057                                 rc = KERN_FAILURE;
5058                                 goto done;
5059                         }
5060
5061                         /*
5062                          * Cannot avoid a lookup here. reset timestamp.
5063                          */
5064                         last_timestamp = map->timestamp;
5065
5066                         /*
5067                          * The entry could have been clipped, look it up again.
5068                          * Worse that can happen is, it may not exist anymore.
5069                          */
5070                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
5071                                 /*
5072                                  * User: undo everything upto the previous
5073                                  * entry.  let vm_map_unwire worry about
5074                                  * checking the validity of the range.
5075                                  */
5076                                 rc = KERN_FAILURE;
5077                                 goto done;
5078                         }
5079                         entry = first_entry;
5080                         continue;
5081                 }
5082
5083                 if (entry->is_sub_map) {
5084                         vm_map_offset_t sub_start;
5085                         vm_map_offset_t sub_end;
5086                         vm_map_offset_t local_start;
5087                         vm_map_offset_t local_end;
5088                         pmap_t          pmap;
5089
5090                         if (wire_and_extract) {
5091                                 /*
5092                                  * Wiring would result in copy-on-write
5093                                  * which would not be compatible with
5094                                  * the sharing we have with the original
5095                                  * provider of this memory.
5096                                  */
5097                                 rc = KERN_INVALID_ARGUMENT;
5098                                 goto done;
5099                         }
5100
5101                         vm_map_clip_start(map, entry, s);
5102                         vm_map_clip_end(map, entry, end);
5103
5104                         sub_start = VME_OFFSET(entry);
5105                         sub_end = entry->vme_end;
5106                         sub_end += VME_OFFSET(entry) - entry->vme_start;
5107
5108                         local_end = entry->vme_end;
5109                         if(map_pmap == NULL) {
5110                                 vm_object_t             object;
5111                                 vm_object_offset_t      offset;
5112                                 vm_prot_t               prot;
5113                                 boolean_t               wired;
5114                                 vm_map_entry_t          local_entry;
5115                                 vm_map_version_t         version;
5116                                 vm_map_t                lookup_map;
5117
5118                                 if(entry->use_pmap) {
5119                                         pmap = VME_SUBMAP(entry)->pmap;
5120                                         /* ppc implementation requires that */
5121                                         /* submaps pmap address ranges line */
5122                                         /* up with parent map */
5123 #ifdef notdef
5124                                         pmap_addr = sub_start;
5125 #endif
5126                                         pmap_addr = s;
5127                                 } else {
5128                                         pmap = map->pmap;
5129                                         pmap_addr = s;
5130                                 }
5131
5132                                 if (entry->wired_count) {
5133                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5134                                                 goto done;
5135
5136                                         /*
5137                                          * The map was not unlocked:
5138                                          * no need to goto re-lookup.
5139                                          * Just go directly to next entry.
5140                                          */
5141                                         entry = entry->vme_next;
5142                                         s = entry->vme_start;
5143                                         continue;
5144
5145                                 }
5146
5147                                 /* call vm_map_lookup_locked to */
5148                                 /* cause any needs copy to be   */
5149                                 /* evaluated */
5150                                 local_start = entry->vme_start;
5151                                 lookup_map = map;
5152                                 vm_map_lock_write_to_read(map);
5153                                 if(vm_map_lookup_locked(
5154                                            &lookup_map, local_start,
5155                                            access_type | VM_PROT_COPY,
5156                                            OBJECT_LOCK_EXCLUSIVE,
5157                                            &version, &object,
5158                                            &offset, &prot, &wired,
5159                                            NULL,
5160                                            &real_map)) {
5161
5162                                         vm_map_unlock_read(lookup_map);
5163                                         assert(map_pmap == NULL);
5164                                         vm_map_unwire(map, start,
5165                                                       s, user_wire);
5166                                         return(KERN_FAILURE);
5167                                 }
5168                                 vm_object_unlock(object);
5169                                 if(real_map != lookup_map)
5170                                         vm_map_unlock(real_map);
5171                                 vm_map_unlock_read(lookup_map);
5172                                 vm_map_lock(map);
5173
5174                                 /* we unlocked, so must re-lookup */
5175                                 if (!vm_map_lookup_entry(map,
5176                                                          local_start,
5177                                                          &local_entry)) {
5178                                         rc = KERN_FAILURE;
5179                                         goto done;
5180                                 }
5181
5182                                 /*
5183                                  * entry could have been "simplified",
5184                                  * so re-clip
5185                                  */
5186                                 entry = local_entry;
5187                                 assert(s == local_start);
5188                                 vm_map_clip_start(map, entry, s);
5189                                 vm_map_clip_end(map, entry, end);
5190                                 /* re-compute "e" */
5191                                 e = entry->vme_end;
5192                                 if (e > end)
5193                                         e = end;
5194
5195                                 /* did we have a change of type? */
5196                                 if (!entry->is_sub_map) {
5197                                         last_timestamp = map->timestamp;
5198                                         continue;
5199                                 }
5200                         } else {
5201                                 local_start = entry->vme_start;
5202                                 pmap = map_pmap;
5203                         }
5204
5205                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5206                                 goto done;
5207
5208                         entry->in_transition = TRUE;
5209
5210                         vm_map_unlock(map);
5211                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
5212                                                 sub_start, sub_end,
5213                                                 caller_prot,
5214                                                 user_wire, pmap, pmap_addr,
5215                                                 NULL);
5216                         vm_map_lock(map);
5217
5218                         /*
5219                          * Find the entry again.  It could have been clipped
5220                          * after we unlocked the map.
5221                          */
5222                         if (!vm_map_lookup_entry(map, local_start,
5223                                                  &first_entry))
5224                                 panic("vm_map_wire: re-lookup failed");
5225                         entry = first_entry;
5226
5227                         assert(local_start == s);
5228                         /* re-compute "e" */
5229                         e = entry->vme_end;
5230                         if (e > end)
5231                                 e = end;
5232
5233                         last_timestamp = map->timestamp;
5234                         while ((entry != vm_map_to_entry(map)) &&
5235                                (entry->vme_start < e)) {
5236                                 assert(entry->in_transition);
5237                                 entry->in_transition = FALSE;
5238                                 if (entry->needs_wakeup) {
5239                                         entry->needs_wakeup = FALSE;
5240                                         need_wakeup = TRUE;
5241                                 }
5242                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
5243                                         subtract_wire_counts(map, entry, user_wire);
5244                                 }
5245                                 entry = entry->vme_next;
5246                         }
5247                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5248                                 goto done;
5249                         }
5250
5251                         /* no need to relookup again */
5252                         s = entry->vme_start;
5253                         continue;
5254                 }
5255
5256                 /*
5257                  * If this entry is already wired then increment
5258                  * the appropriate wire reference count.
5259                  */
5260                 if (entry->wired_count) {
5261
5262                         if ((entry->protection & access_type) != access_type) {
5263                                 /* found a protection problem */
5264
5265                                 /*
5266                                  * XXX FBDP
5267                                  * We should always return an error
5268                                  * in this case but since we didn't
5269                                  * enforce it before, let's do
5270                                  * it only for the new "wire_and_extract"
5271                                  * code path for now...
5272                                  */
5273                                 if (wire_and_extract) {
5274                                         rc = KERN_PROTECTION_FAILURE;
5275                                         goto done;
5276                                 }
5277                         }
5278
5279                         /*
5280                          * entry is already wired down, get our reference
5281                          * after clipping to our range.
5282                          */
5283                         vm_map_clip_start(map, entry, s);
5284                         vm_map_clip_end(map, entry, end);
5285
5286                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5287                                 goto done;
5288
5289                         if (wire_and_extract) {
5290                                 vm_object_t             object;
5291                                 vm_object_offset_t      offset;
5292                                 vm_page_t               m;
5293
5294                                 /*
5295                                  * We don't have to "wire" the page again
5296                                  * bit we still have to "extract" its
5297                                  * physical page number, after some sanity
5298                                  * checks.
5299                                  */
5300                                 assert((entry->vme_end - entry->vme_start)
5301                                        == PAGE_SIZE);
5302                                 assert(!entry->needs_copy);
5303                                 assert(!entry->is_sub_map);
5304                                 assert(VME_OBJECT(entry));
5305                                 if (((entry->vme_end - entry->vme_start)
5306                                      != PAGE_SIZE) ||
5307                                     entry->needs_copy ||
5308                                     entry->is_sub_map ||
5309                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
5310                                         rc = KERN_INVALID_ARGUMENT;
5311                                         goto done;
5312                                 }
5313
5314                                 object = VME_OBJECT(entry);
5315                                 offset = VME_OFFSET(entry);
5316                                 /* need exclusive lock to update m->dirty */
5317                                 if (entry->protection & VM_PROT_WRITE) {
5318                                         vm_object_lock(object);
5319                                 } else {
5320                                         vm_object_lock_shared(object);
5321                                 }
5322                                 m = vm_page_lookup(object, offset);
5323                                 assert(m != VM_PAGE_NULL);
5324                                 assert(VM_PAGE_WIRED(m));
5325                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
5326                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
5327                                         if (entry->protection & VM_PROT_WRITE) {
5328                                                 vm_object_lock_assert_exclusive(
5329                                                         object);
5330                                                 m->dirty = TRUE;
5331                                         }
5332                                 } else {
5333                                         /* not already wired !? */
5334                                         *physpage_p = 0;
5335                                 }
5336                                 vm_object_unlock(object);
5337                         }
5338
5339                         /* map was not unlocked: no need to relookup */
5340                         entry = entry->vme_next;
5341                         s = entry->vme_start;
5342                         continue;
5343                 }
5344
5345                 /*
5346                  * Unwired entry or wire request transmitted via submap
5347                  */
5348
5349
5350
5351                 /*
5352                  * Perform actions of vm_map_lookup that need the write
5353                  * lock on the map: create a shadow object for a
5354                  * copy-on-write region, or an object for a zero-fill
5355                  * region.
5356                  */
5357                 size = entry->vme_end - entry->vme_start;
5358                 /*
5359                  * If wiring a copy-on-write page, we need to copy it now
5360                  * even if we're only (currently) requesting read access.
5361                  * This is aggressive, but once it's wired we can't move it.
5362                  */
5363                 if (entry->needs_copy) {
5364                         if (wire_and_extract) {
5365                                 /*
5366                                  * We're supposed to share with the original
5367                                  * provider so should not be "needs_copy"
5368                                  */
5369                                 rc = KERN_INVALID_ARGUMENT;
5370                                 goto done;
5371                         }
5372
5373                         VME_OBJECT_SHADOW(entry, size);
5374                         entry->needs_copy = FALSE;
5375                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5376                         if (wire_and_extract) {
5377                                 /*
5378                                  * We're supposed to share with the original
5379                                  * provider so should already have an object.
5380                                  */
5381                                 rc = KERN_INVALID_ARGUMENT;
5382                                 goto done;
5383                         }
5384                         VME_OBJECT_SET(entry, vm_object_allocate(size));
5385                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
5386                         assert(entry->use_pmap);
5387                 }
5388
5389                 vm_map_clip_start(map, entry, s);
5390                 vm_map_clip_end(map, entry, end);
5391
5392                 /* re-compute "e" */
5393                 e = entry->vme_end;
5394                 if (e > end)
5395                         e = end;
5396
5397                 /*
5398                  * Check for holes and protection mismatch.
5399                  * Holes: Next entry should be contiguous unless this
5400                  *        is the end of the region.
5401                  * Protection: Access requested must be allowed, unless
5402                  *      wiring is by protection class
5403                  */
5404                 if ((entry->vme_end < end) &&
5405                     ((entry->vme_next == vm_map_to_entry(map)) ||
5406                      (entry->vme_next->vme_start > entry->vme_end))) {
5407                         /* found a hole */
5408                         rc = KERN_INVALID_ADDRESS;
5409                         goto done;
5410                 }
5411                 if ((entry->protection & access_type) != access_type) {
5412                         /* found a protection problem */
5413                         rc = KERN_PROTECTION_FAILURE;
5414                         goto done;
5415                 }
5416
5417                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5418
5419                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5420                         goto done;
5421
5422                 entry->in_transition = TRUE;
5423
5424                 /*
5425                  * This entry might get split once we unlock the map.
5426                  * In vm_fault_wire(), we need the current range as
5427                  * defined by this entry.  In order for this to work
5428                  * along with a simultaneous clip operation, we make a
5429                  * temporary copy of this entry and use that for the
5430                  * wiring.  Note that the underlying objects do not
5431                  * change during a clip.
5432                  */
5433                 tmp_entry = *entry;
5434
5435                 /*
5436                  * The in_transition state guarentees that the entry
5437                  * (or entries for this range, if split occured) will be
5438                  * there when the map lock is acquired for the second time.
5439                  */
5440                 vm_map_unlock(map);
5441
5442                 if (!user_wire && cur_thread != THREAD_NULL)
5443                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
5444                 else
5445                         interruptible_state = THREAD_UNINT;
5446
5447                 if(map_pmap)
5448                         rc = vm_fault_wire(map,
5449                                            &tmp_entry, caller_prot, map_pmap, pmap_addr,
5450                                            physpage_p);
5451                 else
5452                         rc = vm_fault_wire(map,
5453                                            &tmp_entry, caller_prot, map->pmap,
5454                                            tmp_entry.vme_start,
5455                                            physpage_p);
5456
5457                 if (!user_wire && cur_thread != THREAD_NULL)
5458                         thread_interrupt_level(interruptible_state);
5459
5460                 vm_map_lock(map);
5461
5462                 if (last_timestamp+1 != map->timestamp) {
5463                         /*
5464                          * Find the entry again.  It could have been clipped
5465                          * after we unlocked the map.
5466                          */
5467                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5468                                                  &first_entry))
5469                                 panic("vm_map_wire: re-lookup failed");
5470
5471                         entry = first_entry;
5472                 }
5473
5474                 last_timestamp = map->timestamp;
5475
5476                 while ((entry != vm_map_to_entry(map)) &&
5477                        (entry->vme_start < tmp_entry.vme_end)) {
5478                         assert(entry->in_transition);
5479                         entry->in_transition = FALSE;
5480                         if (entry->needs_wakeup) {
5481                                 entry->needs_wakeup = FALSE;
5482                                 need_wakeup = TRUE;
5483                         }
5484                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5485                                 subtract_wire_counts(map, entry, user_wire);
5486                         }
5487                         entry = entry->vme_next;
5488                 }
5489
5490                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
5491                         goto done;
5492                 }
5493
5494                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
5495                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
5496                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
5497                         /* found a "new" hole */
5498                         s = tmp_entry.vme_end;
5499                         rc = KERN_INVALID_ADDRESS;
5500                         goto done;
5501                 }
5502
5503                 s = entry->vme_start;
5504
5505         } /* end while loop through map entries */
5506
5507 done:
5508         if (rc == KERN_SUCCESS) {
5509                 /* repair any damage we may have made to the VM map */
5510                 vm_map_simplify_range(map, start, end);
5511         }
5512
5513         vm_map_unlock(map);
5514
5515         /*
5516          * wake up anybody waiting on entries we wired.
5517          */
5518         if (need_wakeup)
5519                 vm_map_entry_wakeup(map);
5520
5521         if (rc != KERN_SUCCESS) {
5522                 /* undo what has been wired so far */
5523                 vm_map_unwire_nested(map, start, s, user_wire,
5524                                      map_pmap, pmap_addr);
5525                 if (physpage_p) {
5526                         *physpage_p = 0;
5527                 }
5528         }
5529
5530         return rc;
5531
5532 }
5533
5534 kern_return_t
5535 vm_map_wire_external(
5536         vm_map_t                map,
5537         vm_map_offset_t         start,
5538         vm_map_offset_t         end,
5539         vm_prot_t               caller_prot,
5540         boolean_t               user_wire)
5541 {
5542         kern_return_t   kret;
5543
5544         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5545         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5546         kret = vm_map_wire_nested(map, start, end, caller_prot,
5547                                   user_wire, (pmap_t)NULL, 0, NULL);
5548         return kret;
5549 }
5550
5551 kern_return_t
5552 vm_map_wire(
5553         vm_map_t                map,
5554         vm_map_offset_t         start,
5555         vm_map_offset_t         end,
5556         vm_prot_t               caller_prot,
5557         boolean_t               user_wire)
5558 {
5559         kern_return_t   kret;
5560
5561         kret = vm_map_wire_nested(map, start, end, caller_prot,
5562                                   user_wire, (pmap_t)NULL, 0, NULL);
5563         return kret;
5564 }
5565
5566 kern_return_t
5567 vm_map_wire_and_extract_external(
5568         vm_map_t        map,
5569         vm_map_offset_t start,
5570         vm_prot_t       caller_prot,
5571         boolean_t       user_wire,
5572         ppnum_t         *physpage_p)
5573 {
5574         kern_return_t   kret;
5575
5576         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5577         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5578         kret = vm_map_wire_nested(map,
5579                                   start,
5580                                   start+VM_MAP_PAGE_SIZE(map),
5581                                   caller_prot,
5582                                   user_wire,
5583                                   (pmap_t)NULL,
5584                                   0,
5585                                   physpage_p);
5586         if (kret != KERN_SUCCESS &&
5587             physpage_p != NULL) {
5588                 *physpage_p = 0;
5589         }
5590         return kret;
5591 }
5592
5593 kern_return_t
5594 vm_map_wire_and_extract(
5595         vm_map_t        map,
5596         vm_map_offset_t start,
5597         vm_prot_t       caller_prot,
5598         boolean_t       user_wire,
5599         ppnum_t         *physpage_p)
5600 {
5601         kern_return_t   kret;
5602
5603         kret = vm_map_wire_nested(map,
5604                                   start,
5605                                   start+VM_MAP_PAGE_SIZE(map),
5606                                   caller_prot,
5607                                   user_wire,
5608                                   (pmap_t)NULL,
5609                                   0,
5610                                   physpage_p);
5611         if (kret != KERN_SUCCESS &&
5612             physpage_p != NULL) {
5613                 *physpage_p = 0;
5614         }
5615         return kret;
5616 }
5617
5618 /*
5619  *      vm_map_unwire:
5620  *
5621  *      Sets the pageability of the specified address range in the target
5622  *      as pageable.  Regions specified must have been wired previously.
5623  *
5624  *      The map must not be locked, but a reference must remain to the map
5625  *      throughout the call.
5626  *
5627  *      Kernel will panic on failures.  User unwire ignores holes and
5628  *      unwired and intransition entries to avoid losing memory by leaving
5629  *      it unwired.
5630  */
5631 static kern_return_t
5632 vm_map_unwire_nested(
5633         vm_map_t                map,
5634         vm_map_offset_t         start,
5635         vm_map_offset_t         end,
5636         boolean_t               user_wire,
5637         pmap_t                  map_pmap,
5638         vm_map_offset_t         pmap_addr)
5639 {
5640         vm_map_entry_t          entry;
5641         struct vm_map_entry     *first_entry, tmp_entry;
5642         boolean_t               need_wakeup;
5643         boolean_t               main_map = FALSE;
5644         unsigned int            last_timestamp;
5645
5646         vm_map_lock(map);
5647         if(map_pmap == NULL)
5648                 main_map = TRUE;
5649         last_timestamp = map->timestamp;
5650
5651         VM_MAP_RANGE_CHECK(map, start, end);
5652         assert(page_aligned(start));
5653         assert(page_aligned(end));
5654         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5655         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5656
5657         if (start == end) {
5658                 /* We unwired what the caller asked for: zero pages */
5659                 vm_map_unlock(map);
5660                 return KERN_SUCCESS;
5661         }
5662
5663         if (vm_map_lookup_entry(map, start, &first_entry)) {
5664                 entry = first_entry;
5665                 /*
5666                  * vm_map_clip_start will be done later.
5667                  * We don't want to unnest any nested sub maps here !
5668                  */
5669         }
5670         else {
5671                 if (!user_wire) {
5672                         panic("vm_map_unwire: start not found");
5673                 }
5674                 /*      Start address is not in map. */
5675                 vm_map_unlock(map);
5676                 return(KERN_INVALID_ADDRESS);
5677         }
5678
5679         if (entry->superpage_size) {
5680                 /* superpages are always wired */
5681                 vm_map_unlock(map);
5682                 return KERN_INVALID_ADDRESS;
5683         }
5684
5685         need_wakeup = FALSE;
5686         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5687                 if (entry->in_transition) {
5688                         /*
5689                          * 1)
5690                          * Another thread is wiring down this entry. Note
5691                          * that if it is not for the other thread we would
5692                          * be unwiring an unwired entry.  This is not
5693                          * permitted.  If we wait, we will be unwiring memory
5694                          * we did not wire.
5695                          *
5696                          * 2)
5697                          * Another thread is unwiring this entry.  We did not
5698                          * have a reference to it, because if we did, this
5699                          * entry will not be getting unwired now.
5700                          */
5701                         if (!user_wire) {
5702                                 /*
5703                                  * XXX FBDP
5704                                  * This could happen:  there could be some
5705                                  * overlapping vslock/vsunlock operations
5706                                  * going on.
5707                                  * We should probably just wait and retry,
5708                                  * but then we have to be careful that this
5709                                  * entry could get "simplified" after
5710                                  * "in_transition" gets unset and before
5711                                  * we re-lookup the entry, so we would
5712                                  * have to re-clip the entry to avoid
5713                                  * re-unwiring what we have already unwired...
5714                                  * See vm_map_wire_nested().
5715                                  *
5716                                  * Or we could just ignore "in_transition"
5717                                  * here and proceed to decement the wired
5718                                  * count(s) on this entry.  That should be fine
5719                                  * as long as "wired_count" doesn't drop all
5720                                  * the way to 0 (and we should panic if THAT
5721                                  * happens).
5722                                  */
5723                                 panic("vm_map_unwire: in_transition entry");
5724                         }
5725
5726                         entry = entry->vme_next;
5727                         continue;
5728                 }
5729
5730                 if (entry->is_sub_map) {
5731                         vm_map_offset_t sub_start;
5732                         vm_map_offset_t sub_end;
5733                         vm_map_offset_t local_end;
5734                         pmap_t          pmap;
5735
5736                         vm_map_clip_start(map, entry, start);
5737                         vm_map_clip_end(map, entry, end);
5738
5739                         sub_start = VME_OFFSET(entry);
5740                         sub_end = entry->vme_end - entry->vme_start;
5741                         sub_end += VME_OFFSET(entry);
5742                         local_end = entry->vme_end;
5743                         if(map_pmap == NULL) {
5744                                 if(entry->use_pmap) {
5745                                         pmap = VME_SUBMAP(entry)->pmap;
5746                                         pmap_addr = sub_start;
5747                                 } else {
5748                                         pmap = map->pmap;
5749                                         pmap_addr = start;
5750                                 }
5751                                 if (entry->wired_count == 0 ||
5752                                     (user_wire && entry->user_wired_count == 0)) {
5753                                         if (!user_wire)
5754                                                 panic("vm_map_unwire: entry is unwired");
5755                                         entry = entry->vme_next;
5756                                         continue;
5757                                 }
5758
5759                                 /*
5760                                  * Check for holes
5761                                  * Holes: Next entry should be contiguous unless
5762                                  * this is the end of the region.
5763                                  */
5764                                 if (((entry->vme_end < end) &&
5765                                      ((entry->vme_next == vm_map_to_entry(map)) ||
5766                                       (entry->vme_next->vme_start
5767                                        > entry->vme_end)))) {
5768                                         if (!user_wire)
5769                                                 panic("vm_map_unwire: non-contiguous region");
5770 /*
5771                                         entry = entry->vme_next;
5772                                         continue;
5773 */
5774                                 }
5775
5776                                 subtract_wire_counts(map, entry, user_wire);
5777
5778                                 if (entry->wired_count != 0) {
5779                                         entry = entry->vme_next;
5780                                         continue;
5781                                 }
5782
5783                                 entry->in_transition = TRUE;
5784                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
5785
5786                                 /*
5787                                  * We can unlock the map now. The in_transition state
5788                                  * guarantees existance of the entry.
5789                                  */
5790                                 vm_map_unlock(map);
5791                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5792                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
5793                                 vm_map_lock(map);
5794
5795                                 if (last_timestamp+1 != map->timestamp) {
5796                                         /*
5797                                          * Find the entry again.  It could have been
5798                                          * clipped or deleted after we unlocked the map.
5799                                          */
5800                                         if (!vm_map_lookup_entry(map,
5801                                                                  tmp_entry.vme_start,
5802                                                                  &first_entry)) {
5803                                                 if (!user_wire)
5804                                                         panic("vm_map_unwire: re-lookup failed");
5805                                                 entry = first_entry->vme_next;
5806                                         } else
5807                                                 entry = first_entry;
5808                                 }
5809                                 last_timestamp = map->timestamp;
5810
5811                                 /*
5812                                  * clear transition bit for all constituent entries
5813                                  * that were in the original entry (saved in
5814                                  * tmp_entry).  Also check for waiters.
5815                                  */
5816                                 while ((entry != vm_map_to_entry(map)) &&
5817                                        (entry->vme_start < tmp_entry.vme_end)) {
5818                                         assert(entry->in_transition);
5819                                         entry->in_transition = FALSE;
5820                                         if (entry->needs_wakeup) {
5821                                                 entry->needs_wakeup = FALSE;
5822                                                 need_wakeup = TRUE;
5823                                         }
5824                                         entry = entry->vme_next;
5825                                 }
5826                                 continue;
5827                         } else {
5828                                 vm_map_unlock(map);
5829                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5830                                                      sub_start, sub_end, user_wire, map_pmap,
5831                                                      pmap_addr);
5832                                 vm_map_lock(map);
5833
5834                                 if (last_timestamp+1 != map->timestamp) {
5835                                         /*
5836                                          * Find the entry again.  It could have been
5837                                          * clipped or deleted after we unlocked the map.
5838                                          */
5839                                         if (!vm_map_lookup_entry(map,
5840                                                                  tmp_entry.vme_start,
5841                                                                  &first_entry)) {
5842                                                 if (!user_wire)
5843                                                         panic("vm_map_unwire: re-lookup failed");
5844                                                 entry = first_entry->vme_next;
5845                                         } else
5846                                                 entry = first_entry;
5847                                 }
5848                                 last_timestamp = map->timestamp;
5849                         }
5850                 }
5851
5852
5853                 if ((entry->wired_count == 0) ||
5854                     (user_wire && entry->user_wired_count == 0)) {
5855                         if (!user_wire)
5856                                 panic("vm_map_unwire: entry is unwired");
5857
5858                         entry = entry->vme_next;
5859                         continue;
5860                 }
5861
5862                 assert(entry->wired_count > 0 &&
5863                        (!user_wire || entry->user_wired_count > 0));
5864
5865                 vm_map_clip_start(map, entry, start);
5866                 vm_map_clip_end(map, entry, end);
5867
5868                 /*
5869                  * Check for holes
5870                  * Holes: Next entry should be contiguous unless
5871                  *        this is the end of the region.
5872                  */
5873                 if (((entry->vme_end < end) &&
5874                      ((entry->vme_next == vm_map_to_entry(map)) ||
5875                       (entry->vme_next->vme_start > entry->vme_end)))) {
5876
5877                         if (!user_wire)
5878                                 panic("vm_map_unwire: non-contiguous region");
5879                         entry = entry->vme_next;
5880                         continue;
5881                 }
5882
5883                 subtract_wire_counts(map, entry, user_wire);
5884
5885                 if (entry->wired_count != 0) {
5886                         entry = entry->vme_next;
5887                         continue;
5888                 }
5889
5890                 if(entry->zero_wired_pages) {
5891                         entry->zero_wired_pages = FALSE;
5892                 }
5893
5894                 entry->in_transition = TRUE;
5895                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
5896
5897                 /*
5898                  * We can unlock the map now. The in_transition state
5899                  * guarantees existance of the entry.
5900                  */
5901                 vm_map_unlock(map);
5902                 if(map_pmap) {
5903                         vm_fault_unwire(map,
5904                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
5905                 } else {
5906                         vm_fault_unwire(map,
5907                                         &tmp_entry, FALSE, map->pmap,
5908                                         tmp_entry.vme_start);
5909                 }
5910                 vm_map_lock(map);
5911
5912                 if (last_timestamp+1 != map->timestamp) {
5913                         /*
5914                          * Find the entry again.  It could have been clipped
5915                          * or deleted after we unlocked the map.
5916                          */
5917                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5918                                                  &first_entry)) {
5919                                 if (!user_wire)
5920                                         panic("vm_map_unwire: re-lookup failed");
5921                                 entry = first_entry->vme_next;
5922                         } else
5923                                 entry = first_entry;
5924                 }
5925                 last_timestamp = map->timestamp;
5926
5927                 /*
5928                  * clear transition bit for all constituent entries that
5929                  * were in the original entry (saved in tmp_entry).  Also
5930                  * check for waiters.
5931                  */
5932                 while ((entry != vm_map_to_entry(map)) &&
5933                        (entry->vme_start < tmp_entry.vme_end)) {
5934                         assert(entry->in_transition);
5935                         entry->in_transition = FALSE;
5936                         if (entry->needs_wakeup) {
5937                                 entry->needs_wakeup = FALSE;
5938                                 need_wakeup = TRUE;
5939                         }
5940                         entry = entry->vme_next;
5941                 }
5942         }
5943
5944         /*
5945          * We might have fragmented the address space when we wired this
5946          * range of addresses.  Attempt to re-coalesce these VM map entries
5947          * with their neighbors now that they're no longer wired.
5948          * Under some circumstances, address space fragmentation can
5949          * prevent VM object shadow chain collapsing, which can cause
5950          * swap space leaks.
5951          */
5952         vm_map_simplify_range(map, start, end);
5953
5954         vm_map_unlock(map);
5955         /*
5956          * wake up anybody waiting on entries that we have unwired.
5957          */
5958         if (need_wakeup)
5959                 vm_map_entry_wakeup(map);
5960         return(KERN_SUCCESS);
5961
5962 }
5963
5964 kern_return_t
5965 vm_map_unwire(
5966         vm_map_t                map,
5967         vm_map_offset_t         start,
5968         vm_map_offset_t         end,
5969         boolean_t               user_wire)
5970 {
5971         return vm_map_unwire_nested(map, start, end,
5972                                     user_wire, (pmap_t)NULL, 0);
5973 }
5974
5975
5976 /*
5977  *      vm_map_entry_delete:    [ internal use only ]
5978  *
5979  *      Deallocate the given entry from the target map.
5980  */
5981 static void
5982 vm_map_entry_delete(
5983         vm_map_t        map,
5984         vm_map_entry_t  entry)
5985 {
5986         vm_map_offset_t s, e;
5987         vm_object_t     object;
5988         vm_map_t        submap;
5989
5990         s = entry->vme_start;
5991         e = entry->vme_end;
5992         assert(page_aligned(s));
5993         assert(page_aligned(e));
5994         if (entry->map_aligned == TRUE) {
5995                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5996                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5997         }
5998         assert(entry->wired_count == 0);
5999         assert(entry->user_wired_count == 0);
6000         assert(!entry->permanent);
6001
6002         if (entry->is_sub_map) {
6003                 object = NULL;
6004                 submap = VME_SUBMAP(entry);
6005         } else {
6006                 submap = NULL;
6007                 object = VME_OBJECT(entry);
6008         }
6009
6010         vm_map_store_entry_unlink(map, entry);
6011         map->size -= e - s;
6012
6013         vm_map_entry_dispose(map, entry);
6014
6015         vm_map_unlock(map);
6016         /*
6017          *      Deallocate the object only after removing all
6018          *      pmap entries pointing to its pages.
6019          */
6020         if (submap)
6021                 vm_map_deallocate(submap);
6022         else
6023                 vm_object_deallocate(object);
6024
6025 }
6026
6027 void
6028 vm_map_submap_pmap_clean(
6029         vm_map_t        map,
6030         vm_map_offset_t start,
6031         vm_map_offset_t end,
6032         vm_map_t        sub_map,
6033         vm_map_offset_t offset)
6034 {
6035         vm_map_offset_t submap_start;
6036         vm_map_offset_t submap_end;
6037         vm_map_size_t   remove_size;
6038         vm_map_entry_t  entry;
6039
6040         submap_end = offset + (end - start);
6041         submap_start = offset;
6042
6043         vm_map_lock_read(sub_map);
6044         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
6045
6046                 remove_size = (entry->vme_end - entry->vme_start);
6047                 if(offset > entry->vme_start)
6048                         remove_size -= offset - entry->vme_start;
6049
6050
6051                 if(submap_end < entry->vme_end) {
6052                         remove_size -=
6053                                 entry->vme_end - submap_end;
6054                 }
6055                 if(entry->is_sub_map) {
6056                         vm_map_submap_pmap_clean(
6057                                 sub_map,
6058                                 start,
6059                                 start + remove_size,
6060                                 VME_SUBMAP(entry),
6061                                 VME_OFFSET(entry));
6062                 } else {
6063
6064                         if((map->mapped_in_other_pmaps) && (map->ref_count)
6065                            && (VME_OBJECT(entry) != NULL)) {
6066                                 vm_object_pmap_protect_options(
6067                                         VME_OBJECT(entry),
6068                                         (VME_OFFSET(entry) +
6069                                          offset -
6070                                          entry->vme_start),
6071                                         remove_size,
6072                                         PMAP_NULL,
6073                                         entry->vme_start,
6074                                         VM_PROT_NONE,
6075                                         PMAP_OPTIONS_REMOVE);
6076                         } else {
6077                                 pmap_remove(map->pmap,
6078                                             (addr64_t)start,
6079                                             (addr64_t)(start + remove_size));
6080                         }
6081                 }
6082         }
6083
6084         entry = entry->vme_next;
6085
6086         while((entry != vm_map_to_entry(sub_map))
6087               && (entry->vme_start < submap_end)) {
6088                 remove_size = (entry->vme_end - entry->vme_start);
6089                 if(submap_end < entry->vme_end) {
6090                         remove_size -= entry->vme_end - submap_end;
6091                 }
6092                 if(entry->is_sub_map) {
6093                         vm_map_submap_pmap_clean(
6094                                 sub_map,
6095                                 (start + entry->vme_start) - offset,
6096                                 ((start + entry->vme_start) - offset) + remove_size,
6097                                 VME_SUBMAP(entry),
6098                                 VME_OFFSET(entry));
6099                 } else {
6100                         if((map->mapped_in_other_pmaps) && (map->ref_count)
6101                            && (VME_OBJECT(entry) != NULL)) {
6102                                 vm_object_pmap_protect_options(
6103                                         VME_OBJECT(entry),
6104                                         VME_OFFSET(entry),
6105                                         remove_size,
6106                                         PMAP_NULL,
6107                                         entry->vme_start,
6108                                         VM_PROT_NONE,
6109                                         PMAP_OPTIONS_REMOVE);
6110                         } else {
6111                                 pmap_remove(map->pmap,
6112                                             (addr64_t)((start + entry->vme_start)
6113                                                        - offset),
6114                                             (addr64_t)(((start + entry->vme_start)
6115                                                         - offset) + remove_size));
6116                         }
6117                 }
6118                 entry = entry->vme_next;
6119         }
6120         vm_map_unlock_read(sub_map);
6121         return;
6122 }
6123
6124 /*
6125  *      vm_map_delete:  [ internal use only ]
6126  *
6127  *      Deallocates the given address range from the target map.
6128  *      Removes all user wirings. Unwires one kernel wiring if
6129  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
6130  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
6131  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
6132  *
6133  *      This routine is called with map locked and leaves map locked.
6134  */
6135 static kern_return_t
6136 vm_map_delete(
6137         vm_map_t                map,
6138         vm_map_offset_t         start,
6139         vm_map_offset_t         end,
6140         int                     flags,
6141         vm_map_t                zap_map)
6142 {
6143         vm_map_entry_t          entry, next;
6144         struct   vm_map_entry   *first_entry, tmp_entry;
6145         vm_map_offset_t         s;
6146         vm_object_t             object;
6147         boolean_t               need_wakeup;
6148         unsigned int            last_timestamp = ~0; /* unlikely value */
6149         int                     interruptible;
6150
6151         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
6152                 THREAD_ABORTSAFE : THREAD_UNINT;
6153
6154         /*
6155          * All our DMA I/O operations in IOKit are currently done by
6156          * wiring through the map entries of the task requesting the I/O.
6157          * Because of this, we must always wait for kernel wirings
6158          * to go away on the entries before deleting them.
6159          *
6160          * Any caller who wants to actually remove a kernel wiring
6161          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6162          * properly remove one wiring instead of blasting through
6163          * them all.
6164          */
6165         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6166
6167         while(1) {
6168                 /*
6169                  *      Find the start of the region, and clip it
6170                  */
6171                 if (vm_map_lookup_entry(map, start, &first_entry)) {
6172                         entry = first_entry;
6173                         if (map == kalloc_map &&
6174                             (entry->vme_start != start ||
6175                              entry->vme_end != end)) {
6176                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6177                                       "mismatched entry %p [0x%llx:0x%llx]\n",
6178                                       map,
6179                                       (uint64_t)start,
6180                                       (uint64_t)end,
6181                                       entry,
6182                                       (uint64_t)entry->vme_start,
6183                                       (uint64_t)entry->vme_end);
6184                         }
6185                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
6186                                 start = SUPERPAGE_ROUND_DOWN(start);
6187                                 continue;
6188                         }
6189                         if (start == entry->vme_start) {
6190                                 /*
6191                                  * No need to clip.  We don't want to cause
6192                                  * any unnecessary unnesting in this case...
6193                                  */
6194                         } else {
6195                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6196                                     entry->map_aligned &&
6197                                     !VM_MAP_PAGE_ALIGNED(
6198                                             start,
6199                                             VM_MAP_PAGE_MASK(map))) {
6200                                         /*
6201                                          * The entry will no longer be
6202                                          * map-aligned after clipping
6203                                          * and the caller said it's OK.
6204                                          */
6205                                         entry->map_aligned = FALSE;
6206                                 }
6207                                 if (map == kalloc_map) {
6208                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
6209                                               " clipping %p at 0x%llx\n",
6210                                               map,
6211                                               (uint64_t)start,
6212                                               (uint64_t)end,
6213                                               entry,
6214                                               (uint64_t)start);
6215                                 }
6216                                 vm_map_clip_start(map, entry, start);
6217                         }
6218
6219                         /*
6220                          *      Fix the lookup hint now, rather than each
6221                          *      time through the loop.
6222                          */
6223                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6224                 } else {
6225                         if (map->pmap == kernel_pmap &&
6226                             map->ref_count != 0) {
6227                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6228                                       "no map entry at 0x%llx\n",
6229                                       map,
6230                                       (uint64_t)start,
6231                                       (uint64_t)end,
6232                                       (uint64_t)start);
6233                         }
6234                         entry = first_entry->vme_next;
6235                 }
6236                 break;
6237         }
6238         if (entry->superpage_size)
6239                 end = SUPERPAGE_ROUND_UP(end);
6240
6241         need_wakeup = FALSE;
6242         /*
6243          *      Step through all entries in this region
6244          */
6245         s = entry->vme_start;
6246         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6247                 /*
6248                  * At this point, we have deleted all the memory entries
6249                  * between "start" and "s".  We still need to delete
6250                  * all memory entries between "s" and "end".
6251                  * While we were blocked and the map was unlocked, some
6252                  * new memory entries could have been re-allocated between
6253                  * "start" and "s" and we don't want to mess with those.
6254                  * Some of those entries could even have been re-assembled
6255                  * with an entry after "s" (in vm_map_simplify_entry()), so
6256                  * we may have to vm_map_clip_start() again.
6257                  */
6258
6259                 if (entry->vme_start >= s) {
6260                         /*
6261                          * This entry starts on or after "s"
6262                          * so no need to clip its start.
6263                          */
6264                 } else {
6265                         /*
6266                          * This entry has been re-assembled by a
6267                          * vm_map_simplify_entry().  We need to
6268                          * re-clip its start.
6269                          */
6270                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6271                             entry->map_aligned &&
6272                             !VM_MAP_PAGE_ALIGNED(s,
6273                                                  VM_MAP_PAGE_MASK(map))) {
6274                                 /*
6275                                  * The entry will no longer be map-aligned
6276                                  * after clipping and the caller said it's OK.
6277                                  */
6278                                 entry->map_aligned = FALSE;
6279                         }
6280                         if (map == kalloc_map) {
6281                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6282                                       "clipping %p at 0x%llx\n",
6283                                       map,
6284                                       (uint64_t)start,
6285                                       (uint64_t)end,
6286                                       entry,
6287                                       (uint64_t)s);
6288                         }
6289                         vm_map_clip_start(map, entry, s);
6290                 }
6291                 if (entry->vme_end <= end) {
6292                         /*
6293                          * This entry is going away completely, so no need
6294                          * to clip and possibly cause an unnecessary unnesting.
6295                          */
6296                 } else {
6297                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6298                             entry->map_aligned &&
6299                             !VM_MAP_PAGE_ALIGNED(end,
6300                                                  VM_MAP_PAGE_MASK(map))) {
6301                                 /*
6302                                  * The entry will no longer be map-aligned
6303                                  * after clipping and the caller said it's OK.
6304                                  */
6305                                 entry->map_aligned = FALSE;
6306                         }
6307                         if (map == kalloc_map) {
6308                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6309                                       "clipping %p at 0x%llx\n",
6310                                       map,
6311                                       (uint64_t)start,
6312                                       (uint64_t)end,
6313                                       entry,
6314                                       (uint64_t)end);
6315                         }
6316                         vm_map_clip_end(map, entry, end);
6317                 }
6318
6319                 if (entry->permanent) {
6320                         panic("attempt to remove permanent VM map entry "
6321                               "%p [0x%llx:0x%llx]\n",
6322                               entry, (uint64_t) s, (uint64_t) end);
6323                 }
6324
6325
6326                 if (entry->in_transition) {
6327                         wait_result_t wait_result;
6328
6329                         /*
6330                          * Another thread is wiring/unwiring this entry.
6331                          * Let the other thread know we are waiting.
6332                          */
6333                         assert(s == entry->vme_start);
6334                         entry->needs_wakeup = TRUE;
6335
6336                         /*
6337                          * wake up anybody waiting on entries that we have
6338                          * already unwired/deleted.
6339                          */
6340                         if (need_wakeup) {
6341                                 vm_map_entry_wakeup(map);
6342                                 need_wakeup = FALSE;
6343                         }
6344
6345                         wait_result = vm_map_entry_wait(map, interruptible);
6346
6347                         if (interruptible &&
6348                             wait_result == THREAD_INTERRUPTED) {
6349                                 /*
6350                                  * We do not clear the needs_wakeup flag,
6351                                  * since we cannot tell if we were the only one.
6352                                  */
6353                                 return KERN_ABORTED;
6354                         }
6355
6356                         /*
6357                          * The entry could have been clipped or it
6358                          * may not exist anymore.  Look it up again.
6359                          */
6360                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6361                                 /*
6362                                  * User: use the next entry
6363                                  */
6364                                 entry = first_entry->vme_next;
6365                                 s = entry->vme_start;
6366                         } else {
6367                                 entry = first_entry;
6368                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6369                         }
6370                         last_timestamp = map->timestamp;
6371                         continue;
6372                 } /* end in_transition */
6373
6374                 if (entry->wired_count) {
6375                         boolean_t       user_wire;
6376
6377                         user_wire = entry->user_wired_count > 0;
6378
6379                         /*
6380                          *      Remove a kernel wiring if requested
6381                          */
6382                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
6383                                 entry->wired_count--;
6384                         }
6385
6386                         /*
6387                          *      Remove all user wirings for proper accounting
6388                          */
6389                         if (entry->user_wired_count > 0) {
6390                                 while (entry->user_wired_count)
6391                                         subtract_wire_counts(map, entry, user_wire);
6392                         }
6393
6394                         if (entry->wired_count != 0) {
6395                                 assert(map != kernel_map);
6396                                 /*
6397                                  * Cannot continue.  Typical case is when
6398                                  * a user thread has physical io pending on
6399                                  * on this page.  Either wait for the
6400                                  * kernel wiring to go away or return an
6401                                  * error.
6402                                  */
6403                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
6404                                         wait_result_t wait_result;
6405
6406                                         assert(s == entry->vme_start);
6407                                         entry->needs_wakeup = TRUE;
6408                                         wait_result = vm_map_entry_wait(map,
6409                                                                         interruptible);
6410
6411                                         if (interruptible &&
6412                                             wait_result == THREAD_INTERRUPTED) {
6413                                                 /*
6414                                                  * We do not clear the
6415                                                  * needs_wakeup flag, since we
6416                                                  * cannot tell if we were the
6417                                                  * only one.
6418                                                  */
6419                                                 return KERN_ABORTED;
6420                                         }
6421
6422                                         /*
6423                                          * The entry could have been clipped or
6424                                          * it may not exist anymore.  Look it
6425                                          * up again.
6426                                          */
6427                                         if (!vm_map_lookup_entry(map, s,
6428                                                                  &first_entry)) {
6429                                                 assert(map != kernel_map);
6430                                                 /*
6431                                                  * User: use the next entry
6432                                                  */
6433                                                 entry = first_entry->vme_next;
6434                                                 s = entry->vme_start;
6435                                         } else {
6436                                                 entry = first_entry;
6437                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6438                                         }
6439                                         last_timestamp = map->timestamp;
6440                                         continue;
6441                                 }
6442                                 else {
6443                                         return KERN_FAILURE;
6444                                 }
6445                         }
6446
6447                         entry->in_transition = TRUE;
6448                         /*
6449                          * copy current entry.  see comment in vm_map_wire()
6450                          */
6451                         tmp_entry = *entry;
6452                         assert(s == entry->vme_start);
6453
6454                         /*
6455                          * We can unlock the map now. The in_transition
6456                          * state guarentees existance of the entry.
6457                          */
6458                         vm_map_unlock(map);
6459
6460                         if (tmp_entry.is_sub_map) {
6461                                 vm_map_t sub_map;
6462                                 vm_map_offset_t sub_start, sub_end;
6463                                 pmap_t pmap;
6464                                 vm_map_offset_t pmap_addr;
6465
6466
6467                                 sub_map = VME_SUBMAP(&tmp_entry);
6468                                 sub_start = VME_OFFSET(&tmp_entry);
6469                                 sub_end = sub_start + (tmp_entry.vme_end -
6470                                                        tmp_entry.vme_start);
6471                                 if (tmp_entry.use_pmap) {
6472                                         pmap = sub_map->pmap;
6473                                         pmap_addr = tmp_entry.vme_start;
6474                                 } else {
6475                                         pmap = map->pmap;
6476                                         pmap_addr = tmp_entry.vme_start;
6477                                 }
6478                                 (void) vm_map_unwire_nested(sub_map,
6479                                                             sub_start, sub_end,
6480                                                             user_wire,
6481                                                             pmap, pmap_addr);
6482                         } else {
6483
6484                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
6485                                         pmap_protect_options(
6486                                                 map->pmap,
6487                                                 tmp_entry.vme_start,
6488                                                 tmp_entry.vme_end,
6489                                                 VM_PROT_NONE,
6490                                                 PMAP_OPTIONS_REMOVE,
6491                                                 NULL);
6492                                 }
6493                                 vm_fault_unwire(map, &tmp_entry,
6494                                                 VME_OBJECT(&tmp_entry) == kernel_object,
6495                                                 map->pmap, tmp_entry.vme_start);
6496                         }
6497
6498                         vm_map_lock(map);
6499
6500                         if (last_timestamp+1 != map->timestamp) {
6501                                 /*
6502                                  * Find the entry again.  It could have
6503                                  * been clipped after we unlocked the map.
6504                                  */
6505                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
6506                                         assert((map != kernel_map) &&
6507                                                (!entry->is_sub_map));
6508                                         first_entry = first_entry->vme_next;
6509                                         s = first_entry->vme_start;
6510                                 } else {
6511                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6512                                 }
6513                         } else {
6514                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6515                                 first_entry = entry;
6516                         }
6517
6518                         last_timestamp = map->timestamp;
6519
6520                         entry = first_entry;
6521                         while ((entry != vm_map_to_entry(map)) &&
6522                                (entry->vme_start < tmp_entry.vme_end)) {
6523                                 assert(entry->in_transition);
6524                                 entry->in_transition = FALSE;
6525                                 if (entry->needs_wakeup) {
6526                                         entry->needs_wakeup = FALSE;
6527                                         need_wakeup = TRUE;
6528                                 }
6529                                 entry = entry->vme_next;
6530                         }
6531                         /*
6532                          * We have unwired the entry(s).  Go back and
6533                          * delete them.
6534                          */
6535                         entry = first_entry;
6536                         continue;
6537                 }
6538
6539                 /* entry is unwired */
6540                 assert(entry->wired_count == 0);
6541                 assert(entry->user_wired_count == 0);
6542
6543                 assert(s == entry->vme_start);
6544
6545                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6546                         /*
6547                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6548                          * vm_map_delete(), some map entries might have been
6549                          * transferred to a "zap_map", which doesn't have a
6550                          * pmap.  The original pmap has already been flushed
6551                          * in the vm_map_delete() call targeting the original
6552                          * map, but when we get to destroying the "zap_map",
6553                          * we don't have any pmap to flush, so let's just skip
6554                          * all this.
6555                          */
6556                 } else if (entry->is_sub_map) {
6557                         if (entry->use_pmap) {
6558 #ifndef NO_NESTED_PMAP
6559                                 int pmap_flags;
6560
6561                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6562                                         /*
6563                                          * This is the final cleanup of the
6564                                          * address space being terminated.
6565                                          * No new mappings are expected and
6566                                          * we don't really need to unnest the
6567                                          * shared region (and lose the "global"
6568                                          * pmap mappings, if applicable).
6569                                          *
6570                                          * Tell the pmap layer that we're
6571                                          * "clean" wrt nesting.
6572                                          */
6573                                         pmap_flags = PMAP_UNNEST_CLEAN;
6574                                 } else {
6575                                         /*
6576                                          * We're unmapping part of the nested
6577                                          * shared region, so we can't keep the
6578                                          * nested pmap.
6579                                          */
6580                                         pmap_flags = 0;
6581                                 }
6582                                 pmap_unnest_options(
6583                                         map->pmap,
6584                                         (addr64_t)entry->vme_start,
6585                                         entry->vme_end - entry->vme_start,
6586                                         pmap_flags);
6587 #endif  /* NO_NESTED_PMAP */
6588                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6589                                         /* clean up parent map/maps */
6590                                         vm_map_submap_pmap_clean(
6591                                                 map, entry->vme_start,
6592                                                 entry->vme_end,
6593                                                 VME_SUBMAP(entry),
6594                                                 VME_OFFSET(entry));
6595                                 }
6596                         } else {
6597                                 vm_map_submap_pmap_clean(
6598                                         map, entry->vme_start, entry->vme_end,
6599                                         VME_SUBMAP(entry),
6600                                         VME_OFFSET(entry));
6601                         }
6602                 } else if (VME_OBJECT(entry) != kernel_object &&
6603                            VME_OBJECT(entry) != compressor_object) {
6604                         object = VME_OBJECT(entry);
6605                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6606                                 vm_object_pmap_protect_options(
6607                                         object, VME_OFFSET(entry),
6608                                         entry->vme_end - entry->vme_start,
6609                                         PMAP_NULL,
6610                                         entry->vme_start,
6611                                         VM_PROT_NONE,
6612                                         PMAP_OPTIONS_REMOVE);
6613                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
6614                                    (map->pmap == kernel_pmap)) {
6615                                 /* Remove translations associated
6616                                  * with this range unless the entry
6617                                  * does not have an object, or
6618                                  * it's the kernel map or a descendant
6619                                  * since the platform could potentially
6620                                  * create "backdoor" mappings invisible
6621                                  * to the VM. It is expected that
6622                                  * objectless, non-kernel ranges
6623                                  * do not have such VM invisible
6624                                  * translations.
6625                                  */
6626                                 pmap_remove_options(map->pmap,
6627                                                     (addr64_t)entry->vme_start,
6628                                                     (addr64_t)entry->vme_end,
6629                                                     PMAP_OPTIONS_REMOVE);
6630                         }
6631                 }
6632
6633                 if (entry->iokit_acct) {
6634                         /* alternate accounting */
6635                         DTRACE_VM4(vm_map_iokit_unmapped_region,
6636                                    vm_map_t, map,
6637                                    vm_map_offset_t, entry->vme_start,
6638                                    vm_map_offset_t, entry->vme_end,
6639                                    int, VME_ALIAS(entry));
6640                         vm_map_iokit_unmapped_region(map,
6641                                                      (entry->vme_end -
6642                                                       entry->vme_start));
6643                         entry->iokit_acct = FALSE;
6644                 }
6645
6646                 /*
6647                  * All pmap mappings for this map entry must have been
6648                  * cleared by now.
6649                  */
6650 #if DEBUG
6651                 assert(vm_map_pmap_is_empty(map,
6652                                             entry->vme_start,
6653                                             entry->vme_end));
6654 #endif /* DEBUG */
6655
6656                 next = entry->vme_next;
6657
6658                 if (map->pmap == kernel_pmap &&
6659                     map->ref_count != 0 &&
6660                     entry->vme_end < end &&
6661                     (next == vm_map_to_entry(map) ||
6662                      next->vme_start != entry->vme_end)) {
6663                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
6664                               "hole after %p at 0x%llx\n",
6665                               map,
6666                               (uint64_t)start,
6667                               (uint64_t)end,
6668                               entry,
6669                               (uint64_t)entry->vme_end);
6670                 }
6671
6672                 s = next->vme_start;
6673                 last_timestamp = map->timestamp;
6674
6675                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6676                     zap_map != VM_MAP_NULL) {
6677                         vm_map_size_t entry_size;
6678                         /*
6679                          * The caller wants to save the affected VM map entries
6680                          * into the "zap_map".  The caller will take care of
6681                          * these entries.
6682                          */
6683                         /* unlink the entry from "map" ... */
6684                         vm_map_store_entry_unlink(map, entry);
6685                         /* ... and add it to the end of the "zap_map" */
6686                         vm_map_store_entry_link(zap_map,
6687                                           vm_map_last_entry(zap_map),
6688                                           entry);
6689                         entry_size = entry->vme_end - entry->vme_start;
6690                         map->size -= entry_size;
6691                         zap_map->size += entry_size;
6692                         /* we didn't unlock the map, so no timestamp increase */
6693                         last_timestamp--;
6694                 } else {
6695                         vm_map_entry_delete(map, entry);
6696                         /* vm_map_entry_delete unlocks the map */
6697                         vm_map_lock(map);
6698                 }
6699
6700                 entry = next;
6701
6702                 if(entry == vm_map_to_entry(map)) {
6703                         break;
6704                 }
6705                 if (last_timestamp+1 != map->timestamp) {
6706                         /*
6707                          * we are responsible for deleting everything
6708                          * from the give space, if someone has interfered
6709                          * we pick up where we left off, back fills should
6710                          * be all right for anyone except map_delete and
6711                          * we have to assume that the task has been fully
6712                          * disabled before we get here
6713                          */
6714                         if (!vm_map_lookup_entry(map, s, &entry)){
6715                                 entry = entry->vme_next;
6716                                 s = entry->vme_start;
6717                         } else {
6718                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6719                         }
6720                         /*
6721                          * others can not only allocate behind us, we can
6722                          * also see coalesce while we don't have the map lock
6723                          */
6724                         if(entry == vm_map_to_entry(map)) {
6725                                 break;
6726                         }
6727                 }
6728                 last_timestamp = map->timestamp;
6729         }
6730
6731         if (map->wait_for_space)
6732                 thread_wakeup((event_t) map);
6733         /*
6734          * wake up anybody waiting on entries that we have already deleted.
6735          */
6736         if (need_wakeup)
6737                 vm_map_entry_wakeup(map);
6738
6739         return KERN_SUCCESS;
6740 }
6741
6742 /*
6743  *      vm_map_remove:
6744  *
6745  *      Remove the given address range from the target map.
6746  *      This is the exported form of vm_map_delete.
6747  */
6748 kern_return_t
6749 vm_map_remove(
6750         vm_map_t        map,
6751         vm_map_offset_t start,
6752         vm_map_offset_t end,
6753          boolean_t      flags)
6754 {
6755         kern_return_t   result;
6756
6757         vm_map_lock(map);
6758         VM_MAP_RANGE_CHECK(map, start, end);
6759         /*
6760          * For the zone_map, the kernel controls the allocation/freeing of memory.
6761          * Any free to the zone_map should be within the bounds of the map and
6762          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6763          * free to the zone_map into a no-op, there is a problem and we should
6764          * panic.
6765          */
6766         if ((map == zone_map) && (start == end))
6767                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6768         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6769         vm_map_unlock(map);
6770
6771         return(result);
6772 }
6773
6774 /*
6775  *      vm_map_remove_locked:
6776  *
6777  *      Remove the given address range from the target locked map.
6778  *      This is the exported form of vm_map_delete.
6779  */
6780 kern_return_t
6781 vm_map_remove_locked(
6782         vm_map_t        map,
6783         vm_map_offset_t start,
6784         vm_map_offset_t end,
6785         boolean_t       flags)
6786 {
6787         kern_return_t   result;
6788
6789         VM_MAP_RANGE_CHECK(map, start, end);
6790         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6791         return(result);
6792 }
6793
6794
6795 /*
6796  *      Routine:        vm_map_copy_discard
6797  *
6798  *      Description:
6799  *              Dispose of a map copy object (returned by
6800  *              vm_map_copyin).
6801  */
6802 void
6803 vm_map_copy_discard(
6804         vm_map_copy_t   copy)
6805 {
6806         if (copy == VM_MAP_COPY_NULL)
6807                 return;
6808
6809         switch (copy->type) {
6810         case VM_MAP_COPY_ENTRY_LIST:
6811                 while (vm_map_copy_first_entry(copy) !=
6812                        vm_map_copy_to_entry(copy)) {
6813                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
6814
6815                         vm_map_copy_entry_unlink(copy, entry);
6816                         if (entry->is_sub_map) {
6817                                 vm_map_deallocate(VME_SUBMAP(entry));
6818                         } else {
6819                                 vm_object_deallocate(VME_OBJECT(entry));
6820                         }
6821                         vm_map_copy_entry_dispose(copy, entry);
6822                 }
6823                 break;
6824         case VM_MAP_COPY_OBJECT:
6825                 vm_object_deallocate(copy->cpy_object);
6826                 break;
6827         case VM_MAP_COPY_KERNEL_BUFFER:
6828
6829                 /*
6830                  * The vm_map_copy_t and possibly the data buffer were
6831                  * allocated by a single call to kalloc(), i.e. the
6832                  * vm_map_copy_t was not allocated out of the zone.
6833                  */
6834                 if (copy->size > msg_ool_size_small || copy->offset)
6835                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6836                               (long long)copy->size, (long long)copy->offset);
6837                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
6838                 return;
6839         }
6840         zfree(vm_map_copy_zone, copy);
6841 }
6842
6843 /*
6844  *      Routine:        vm_map_copy_copy
6845  *
6846  *      Description:
6847  *                      Move the information in a map copy object to
6848  *                      a new map copy object, leaving the old one
6849  *                      empty.
6850  *
6851  *                      This is used by kernel routines that need
6852  *                      to look at out-of-line data (in copyin form)
6853  *                      before deciding whether to return SUCCESS.
6854  *                      If the routine returns FAILURE, the original
6855  *                      copy object will be deallocated; therefore,
6856  *                      these routines must make a copy of the copy
6857  *                      object and leave the original empty so that
6858  *                      deallocation will not fail.
6859  */
6860 vm_map_copy_t
6861 vm_map_copy_copy(
6862         vm_map_copy_t   copy)
6863 {
6864         vm_map_copy_t   new_copy;
6865
6866         if (copy == VM_MAP_COPY_NULL)
6867                 return VM_MAP_COPY_NULL;
6868
6869         /*
6870          * Allocate a new copy object, and copy the information
6871          * from the old one into it.
6872          */
6873
6874         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6875         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6876         *new_copy = *copy;
6877
6878         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6879                 /*
6880                  * The links in the entry chain must be
6881                  * changed to point to the new copy object.
6882                  */
6883                 vm_map_copy_first_entry(copy)->vme_prev
6884                         = vm_map_copy_to_entry(new_copy);
6885                 vm_map_copy_last_entry(copy)->vme_next
6886                         = vm_map_copy_to_entry(new_copy);
6887         }
6888
6889         /*
6890          * Change the old copy object into one that contains
6891          * nothing to be deallocated.
6892          */
6893         copy->type = VM_MAP_COPY_OBJECT;
6894         copy->cpy_object = VM_OBJECT_NULL;
6895
6896         /*
6897          * Return the new object.
6898          */
6899         return new_copy;
6900 }
6901
6902 static kern_return_t
6903 vm_map_overwrite_submap_recurse(
6904         vm_map_t        dst_map,
6905         vm_map_offset_t dst_addr,
6906         vm_map_size_t   dst_size)
6907 {
6908         vm_map_offset_t dst_end;
6909         vm_map_entry_t  tmp_entry;
6910         vm_map_entry_t  entry;
6911         kern_return_t   result;
6912         boolean_t       encountered_sub_map = FALSE;
6913
6914
6915
6916         /*
6917          *      Verify that the destination is all writeable
6918          *      initially.  We have to trunc the destination
6919          *      address and round the copy size or we'll end up
6920          *      splitting entries in strange ways.
6921          */
6922
6923         dst_end = vm_map_round_page(dst_addr + dst_size,
6924                                     VM_MAP_PAGE_MASK(dst_map));
6925         vm_map_lock(dst_map);
6926
6927 start_pass_1:
6928         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6929                 vm_map_unlock(dst_map);
6930                 return(KERN_INVALID_ADDRESS);
6931         }
6932
6933         vm_map_clip_start(dst_map,
6934                           tmp_entry,
6935                           vm_map_trunc_page(dst_addr,
6936                                             VM_MAP_PAGE_MASK(dst_map)));
6937         if (tmp_entry->is_sub_map) {
6938                 /* clipping did unnest if needed */
6939                 assert(!tmp_entry->use_pmap);
6940         }
6941
6942         for (entry = tmp_entry;;) {
6943                 vm_map_entry_t  next;
6944
6945                 next = entry->vme_next;
6946                 while(entry->is_sub_map) {
6947                         vm_map_offset_t sub_start;
6948                         vm_map_offset_t sub_end;
6949                         vm_map_offset_t local_end;
6950
6951                         if (entry->in_transition) {
6952                                 /*
6953                                  * Say that we are waiting, and wait for entry.
6954                                  */
6955                                 entry->needs_wakeup = TRUE;
6956                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6957
6958                                 goto start_pass_1;
6959                         }
6960
6961                         encountered_sub_map = TRUE;
6962                         sub_start = VME_OFFSET(entry);
6963
6964                         if(entry->vme_end < dst_end)
6965                                 sub_end = entry->vme_end;
6966                         else
6967                                 sub_end = dst_end;
6968                         sub_end -= entry->vme_start;
6969                         sub_end += VME_OFFSET(entry);
6970                         local_end = entry->vme_end;
6971                         vm_map_unlock(dst_map);
6972
6973                         result = vm_map_overwrite_submap_recurse(
6974                                 VME_SUBMAP(entry),
6975                                 sub_start,
6976                                 sub_end - sub_start);
6977
6978                         if(result != KERN_SUCCESS)
6979                                 return result;
6980                         if (dst_end <= entry->vme_end)
6981                                 return KERN_SUCCESS;
6982                         vm_map_lock(dst_map);
6983                         if(!vm_map_lookup_entry(dst_map, local_end,
6984                                                 &tmp_entry)) {
6985                                 vm_map_unlock(dst_map);
6986                                 return(KERN_INVALID_ADDRESS);
6987                         }
6988                         entry = tmp_entry;
6989                         next = entry->vme_next;
6990                 }
6991
6992                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6993                         vm_map_unlock(dst_map);
6994                         return(KERN_PROTECTION_FAILURE);
6995                 }
6996
6997                 /*
6998                  *      If the entry is in transition, we must wait
6999                  *      for it to exit that state.  Anything could happen
7000                  *      when we unlock the map, so start over.
7001                  */
7002                 if (entry->in_transition) {
7003
7004                         /*
7005                          * Say that we are waiting, and wait for entry.
7006                          */
7007                         entry->needs_wakeup = TRUE;
7008                         vm_map_entry_wait(dst_map, THREAD_UNINT);
7009
7010                         goto start_pass_1;
7011                 }
7012
7013 /*
7014  *              our range is contained completely within this map entry
7015  */
7016                 if (dst_end <= entry->vme_end) {
7017                         vm_map_unlock(dst_map);
7018                         return KERN_SUCCESS;
7019                 }
7020 /*
7021  *              check that range specified is contiguous region
7022  */
7023                 if ((next == vm_map_to_entry(dst_map)) ||
7024                     (next->vme_start != entry->vme_end)) {
7025                         vm_map_unlock(dst_map);
7026                         return(KERN_INVALID_ADDRESS);
7027                 }
7028
7029                 /*
7030                  *      Check for permanent objects in the destination.
7031                  */
7032                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7033                     ((!VME_OBJECT(entry)->internal) ||
7034                      (VME_OBJECT(entry)->true_share))) {
7035                         if(encountered_sub_map) {
7036                                 vm_map_unlock(dst_map);
7037                                 return(KERN_FAILURE);
7038                         }
7039                 }
7040
7041
7042                 entry = next;
7043         }/* for */
7044         vm_map_unlock(dst_map);
7045         return(KERN_SUCCESS);
7046 }
7047
7048 /*
7049  *      Routine:        vm_map_copy_overwrite
7050  *
7051  *      Description:
7052  *              Copy the memory described by the map copy
7053  *              object (copy; returned by vm_map_copyin) onto
7054  *              the specified destination region (dst_map, dst_addr).
7055  *              The destination must be writeable.
7056  *
7057  *              Unlike vm_map_copyout, this routine actually
7058  *              writes over previously-mapped memory.  If the
7059  *              previous mapping was to a permanent (user-supplied)
7060  *              memory object, it is preserved.
7061  *
7062  *              The attributes (protection and inheritance) of the
7063  *              destination region are preserved.
7064  *
7065  *              If successful, consumes the copy object.
7066  *              Otherwise, the caller is responsible for it.
7067  *
7068  *      Implementation notes:
7069  *              To overwrite aligned temporary virtual memory, it is
7070  *              sufficient to remove the previous mapping and insert
7071  *              the new copy.  This replacement is done either on
7072  *              the whole region (if no permanent virtual memory
7073  *              objects are embedded in the destination region) or
7074  *              in individual map entries.
7075  *
7076  *              To overwrite permanent virtual memory , it is necessary
7077  *              to copy each page, as the external memory management
7078  *              interface currently does not provide any optimizations.
7079  *
7080  *              Unaligned memory also has to be copied.  It is possible
7081  *              to use 'vm_trickery' to copy the aligned data.  This is
7082  *              not done but not hard to implement.
7083  *
7084  *              Once a page of permanent memory has been overwritten,
7085  *              it is impossible to interrupt this function; otherwise,
7086  *              the call would be neither atomic nor location-independent.
7087  *              The kernel-state portion of a user thread must be
7088  *              interruptible.
7089  *
7090  *              It may be expensive to forward all requests that might
7091  *              overwrite permanent memory (vm_write, vm_copy) to
7092  *              uninterruptible kernel threads.  This routine may be
7093  *              called by interruptible threads; however, success is
7094  *              not guaranteed -- if the request cannot be performed
7095  *              atomically and interruptibly, an error indication is
7096  *              returned.
7097  */
7098
7099 static kern_return_t
7100 vm_map_copy_overwrite_nested(
7101         vm_map_t                dst_map,
7102         vm_map_address_t        dst_addr,
7103         vm_map_copy_t           copy,
7104         boolean_t               interruptible,
7105         pmap_t                  pmap,
7106         boolean_t               discard_on_success)
7107 {
7108         vm_map_offset_t         dst_end;
7109         vm_map_entry_t          tmp_entry;
7110         vm_map_entry_t          entry;
7111         kern_return_t           kr;
7112         boolean_t               aligned = TRUE;
7113         boolean_t               contains_permanent_objects = FALSE;
7114         boolean_t               encountered_sub_map = FALSE;
7115         vm_map_offset_t         base_addr;
7116         vm_map_size_t           copy_size;
7117         vm_map_size_t           total_size;
7118
7119
7120         /*
7121          *      Check for null copy object.
7122          */
7123
7124         if (copy == VM_MAP_COPY_NULL)
7125                 return(KERN_SUCCESS);
7126
7127         /*
7128          *      Check for special kernel buffer allocated
7129          *      by new_ipc_kmsg_copyin.
7130          */
7131
7132         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7133                 return(vm_map_copyout_kernel_buffer(
7134                                dst_map, &dst_addr,
7135                                copy, copy->size, TRUE, discard_on_success));
7136         }
7137
7138         /*
7139          *      Only works for entry lists at the moment.  Will
7140          *      support page lists later.
7141          */
7142
7143         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7144
7145         if (copy->size == 0) {
7146                 if (discard_on_success)
7147                         vm_map_copy_discard(copy);
7148                 return(KERN_SUCCESS);
7149         }
7150
7151         /*
7152          *      Verify that the destination is all writeable
7153          *      initially.  We have to trunc the destination
7154          *      address and round the copy size or we'll end up
7155          *      splitting entries in strange ways.
7156          */
7157
7158         if (!VM_MAP_PAGE_ALIGNED(copy->size,
7159                                  VM_MAP_PAGE_MASK(dst_map)) ||
7160             !VM_MAP_PAGE_ALIGNED(copy->offset,
7161                                  VM_MAP_PAGE_MASK(dst_map)) ||
7162             !VM_MAP_PAGE_ALIGNED(dst_addr,
7163                                  VM_MAP_PAGE_MASK(dst_map)))
7164         {
7165                 aligned = FALSE;
7166                 dst_end = vm_map_round_page(dst_addr + copy->size,
7167                                             VM_MAP_PAGE_MASK(dst_map));
7168         } else {
7169                 dst_end = dst_addr + copy->size;
7170         }
7171
7172         vm_map_lock(dst_map);
7173
7174         /* LP64todo - remove this check when vm_map_commpage64()
7175          * no longer has to stuff in a map_entry for the commpage
7176          * above the map's max_offset.
7177          */
7178         if (dst_addr >= dst_map->max_offset) {
7179                 vm_map_unlock(dst_map);
7180                 return(KERN_INVALID_ADDRESS);
7181         }
7182
7183 start_pass_1:
7184         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7185                 vm_map_unlock(dst_map);
7186                 return(KERN_INVALID_ADDRESS);
7187         }
7188         vm_map_clip_start(dst_map,
7189                           tmp_entry,
7190                           vm_map_trunc_page(dst_addr,
7191                                             VM_MAP_PAGE_MASK(dst_map)));
7192         for (entry = tmp_entry;;) {
7193                 vm_map_entry_t  next = entry->vme_next;
7194
7195                 while(entry->is_sub_map) {
7196                         vm_map_offset_t sub_start;
7197                         vm_map_offset_t sub_end;
7198                         vm_map_offset_t local_end;
7199
7200                         if (entry->in_transition) {
7201
7202                                 /*
7203                                  * Say that we are waiting, and wait for entry.
7204                                  */
7205                                 entry->needs_wakeup = TRUE;
7206                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7207
7208                                 goto start_pass_1;
7209                         }
7210
7211                         local_end = entry->vme_end;
7212                         if (!(entry->needs_copy)) {
7213                                 /* if needs_copy we are a COW submap */
7214                                 /* in such a case we just replace so */
7215                                 /* there is no need for the follow-  */
7216                                 /* ing check.                        */
7217                                 encountered_sub_map = TRUE;
7218                                 sub_start = VME_OFFSET(entry);
7219
7220                                 if(entry->vme_end < dst_end)
7221                                         sub_end = entry->vme_end;
7222                                 else
7223                                         sub_end = dst_end;
7224                                 sub_end -= entry->vme_start;
7225                                 sub_end += VME_OFFSET(entry);
7226                                 vm_map_unlock(dst_map);
7227
7228                                 kr = vm_map_overwrite_submap_recurse(
7229                                         VME_SUBMAP(entry),
7230                                         sub_start,
7231                                         sub_end - sub_start);
7232                                 if(kr != KERN_SUCCESS)
7233                                         return kr;
7234                                 vm_map_lock(dst_map);
7235                         }
7236
7237                         if (dst_end <= entry->vme_end)
7238                                 goto start_overwrite;
7239                         if(!vm_map_lookup_entry(dst_map, local_end,
7240                                                 &entry)) {
7241                                 vm_map_unlock(dst_map);
7242                                 return(KERN_INVALID_ADDRESS);
7243                         }
7244                         next = entry->vme_next;
7245                 }
7246
7247                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7248                         vm_map_unlock(dst_map);
7249                         return(KERN_PROTECTION_FAILURE);
7250                 }
7251
7252                 /*
7253                  *      If the entry is in transition, we must wait
7254                  *      for it to exit that state.  Anything could happen
7255                  *      when we unlock the map, so start over.
7256                  */
7257                 if (entry->in_transition) {
7258
7259                         /*
7260                          * Say that we are waiting, and wait for entry.
7261                          */
7262                         entry->needs_wakeup = TRUE;
7263                         vm_map_entry_wait(dst_map, THREAD_UNINT);
7264
7265                         goto start_pass_1;
7266                 }
7267
7268 /*
7269  *              our range is contained completely within this map entry
7270  */
7271                 if (dst_end <= entry->vme_end)
7272                         break;
7273 /*
7274  *              check that range specified is contiguous region
7275  */
7276                 if ((next == vm_map_to_entry(dst_map)) ||
7277                     (next->vme_start != entry->vme_end)) {
7278                         vm_map_unlock(dst_map);
7279                         return(KERN_INVALID_ADDRESS);
7280                 }
7281
7282
7283                 /*
7284                  *      Check for permanent objects in the destination.
7285                  */
7286                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7287                     ((!VME_OBJECT(entry)->internal) ||
7288                      (VME_OBJECT(entry)->true_share))) {
7289                         contains_permanent_objects = TRUE;
7290                 }
7291
7292                 entry = next;
7293         }/* for */
7294
7295 start_overwrite:
7296         /*
7297          *      If there are permanent objects in the destination, then
7298          *      the copy cannot be interrupted.
7299          */
7300
7301         if (interruptible && contains_permanent_objects) {
7302                 vm_map_unlock(dst_map);
7303                 return(KERN_FAILURE);   /* XXX */
7304         }
7305
7306         /*
7307          *
7308          *      Make a second pass, overwriting the data
7309          *      At the beginning of each loop iteration,
7310          *      the next entry to be overwritten is "tmp_entry"
7311          *      (initially, the value returned from the lookup above),
7312          *      and the starting address expected in that entry
7313          *      is "start".
7314          */
7315
7316         total_size = copy->size;
7317         if(encountered_sub_map) {
7318                 copy_size = 0;
7319                 /* re-calculate tmp_entry since we've had the map */
7320                 /* unlocked */
7321                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7322                         vm_map_unlock(dst_map);
7323                         return(KERN_INVALID_ADDRESS);
7324                 }
7325         } else {
7326                 copy_size = copy->size;
7327         }
7328
7329         base_addr = dst_addr;
7330         while(TRUE) {
7331                 /* deconstruct the copy object and do in parts */
7332                 /* only in sub_map, interruptable case */
7333                 vm_map_entry_t  copy_entry;
7334                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
7335                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
7336                 int             nentries;
7337                 int             remaining_entries = 0;
7338                 vm_map_offset_t new_offset = 0;
7339
7340                 for (entry = tmp_entry; copy_size == 0;) {
7341                         vm_map_entry_t  next;
7342
7343                         next = entry->vme_next;
7344
7345                         /* tmp_entry and base address are moved along */
7346                         /* each time we encounter a sub-map.  Otherwise */
7347                         /* entry can outpase tmp_entry, and the copy_size */
7348                         /* may reflect the distance between them */
7349                         /* if the current entry is found to be in transition */
7350                         /* we will start over at the beginning or the last */
7351                         /* encounter of a submap as dictated by base_addr */
7352                         /* we will zero copy_size accordingly. */
7353                         if (entry->in_transition) {
7354                                 /*
7355                                  * Say that we are waiting, and wait for entry.
7356                                  */
7357                                 entry->needs_wakeup = TRUE;
7358                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7359
7360                                 if(!vm_map_lookup_entry(dst_map, base_addr,
7361                                                         &tmp_entry)) {
7362                                         vm_map_unlock(dst_map);
7363                                         return(KERN_INVALID_ADDRESS);
7364                                 }
7365                                 copy_size = 0;
7366                                 entry = tmp_entry;
7367                                 continue;
7368                         }
7369                         if(entry->is_sub_map) {
7370                                 vm_map_offset_t sub_start;
7371                                 vm_map_offset_t sub_end;
7372                                 vm_map_offset_t local_end;
7373
7374                                 if (entry->needs_copy) {
7375                                         /* if this is a COW submap */
7376                                         /* just back the range with a */
7377                                         /* anonymous entry */
7378                                         if(entry->vme_end < dst_end)
7379                                                 sub_end = entry->vme_end;
7380                                         else
7381                                                 sub_end = dst_end;
7382                                         if(entry->vme_start < base_addr)
7383                                                 sub_start = base_addr;
7384                                         else
7385                                                 sub_start = entry->vme_start;
7386                                         vm_map_clip_end(
7387                                                 dst_map, entry, sub_end);
7388                                         vm_map_clip_start(
7389                                                 dst_map, entry, sub_start);
7390                                         assert(!entry->use_pmap);
7391                                         entry->is_sub_map = FALSE;
7392                                         vm_map_deallocate(
7393                                                 VME_SUBMAP(entry));
7394                                         VME_SUBMAP_SET(entry, NULL);
7395                                         entry->is_shared = FALSE;
7396                                         entry->needs_copy = FALSE;
7397                                         VME_OFFSET_SET(entry, 0);
7398                                         /*
7399                                          * XXX FBDP
7400                                          * We should propagate the protections
7401                                          * of the submap entry here instead
7402                                          * of forcing them to VM_PROT_ALL...
7403                                          * Or better yet, we should inherit
7404                                          * the protection of the copy_entry.
7405                                          */
7406                                         entry->protection = VM_PROT_ALL;
7407                                         entry->max_protection = VM_PROT_ALL;
7408                                         entry->wired_count = 0;
7409                                         entry->user_wired_count = 0;
7410                                         if(entry->inheritance
7411                                            == VM_INHERIT_SHARE)
7412                                                 entry->inheritance = VM_INHERIT_COPY;
7413                                         continue;
7414                                 }
7415                                 /* first take care of any non-sub_map */
7416                                 /* entries to send */
7417                                 if(base_addr < entry->vme_start) {
7418                                         /* stuff to send */
7419                                         copy_size =
7420                                                 entry->vme_start - base_addr;
7421                                         break;
7422                                 }
7423                                 sub_start = VME_OFFSET(entry);
7424
7425                                 if(entry->vme_end < dst_end)
7426                                         sub_end = entry->vme_end;
7427                                 else
7428                                         sub_end = dst_end;
7429                                 sub_end -= entry->vme_start;
7430                                 sub_end += VME_OFFSET(entry);
7431                                 local_end = entry->vme_end;
7432                                 vm_map_unlock(dst_map);
7433                                 copy_size = sub_end - sub_start;
7434
7435                                 /* adjust the copy object */
7436                                 if (total_size > copy_size) {
7437                                         vm_map_size_t   local_size = 0;
7438                                         vm_map_size_t   entry_size;
7439
7440                                         nentries = 1;
7441                                         new_offset = copy->offset;
7442                                         copy_entry = vm_map_copy_first_entry(copy);
7443                                         while(copy_entry !=
7444                                               vm_map_copy_to_entry(copy)){
7445                                                 entry_size = copy_entry->vme_end -
7446                                                         copy_entry->vme_start;
7447                                                 if((local_size < copy_size) &&
7448                                                    ((local_size + entry_size)
7449                                                     >= copy_size)) {
7450                                                         vm_map_copy_clip_end(copy,
7451                                                                              copy_entry,
7452                                                                              copy_entry->vme_start +
7453                                                                              (copy_size - local_size));
7454                                                         entry_size = copy_entry->vme_end -
7455                                                                 copy_entry->vme_start;
7456                                                         local_size += entry_size;
7457                                                         new_offset += entry_size;
7458                                                 }
7459                                                 if(local_size >= copy_size) {
7460                                                         next_copy = copy_entry->vme_next;
7461                                                         copy_entry->vme_next =
7462                                                                 vm_map_copy_to_entry(copy);
7463                                                         previous_prev =
7464                                                                 copy->cpy_hdr.links.prev;
7465                                                         copy->cpy_hdr.links.prev = copy_entry;
7466                                                         copy->size = copy_size;
7467                                                         remaining_entries =
7468                                                                 copy->cpy_hdr.nentries;
7469                                                         remaining_entries -= nentries;
7470                                                         copy->cpy_hdr.nentries = nentries;
7471                                                         break;
7472                                                 } else {
7473                                                         local_size += entry_size;
7474                                                         new_offset += entry_size;
7475                                                         nentries++;
7476                                                 }
7477                                                 copy_entry = copy_entry->vme_next;
7478                                         }
7479                                 }
7480
7481                                 if((entry->use_pmap) && (pmap == NULL)) {
7482                                         kr = vm_map_copy_overwrite_nested(
7483                                                 VME_SUBMAP(entry),
7484                                                 sub_start,
7485                                                 copy,
7486                                                 interruptible,
7487                                                 VME_SUBMAP(entry)->pmap,
7488                                                 TRUE);
7489                                 } else if (pmap != NULL) {
7490                                         kr = vm_map_copy_overwrite_nested(
7491                                                 VME_SUBMAP(entry),
7492                                                 sub_start,
7493                                                 copy,
7494                                                 interruptible, pmap,
7495                                                 TRUE);
7496                                 } else {
7497                                         kr = vm_map_copy_overwrite_nested(
7498                                                 VME_SUBMAP(entry),
7499                                                 sub_start,
7500                                                 copy,
7501                                                 interruptible,
7502                                                 dst_map->pmap,
7503                                                 TRUE);
7504                                 }
7505                                 if(kr != KERN_SUCCESS) {
7506                                         if(next_copy != NULL) {
7507                                                 copy->cpy_hdr.nentries +=
7508                                                         remaining_entries;
7509                                                 copy->cpy_hdr.links.prev->vme_next =
7510                                                         next_copy;
7511                                                 copy->cpy_hdr.links.prev
7512                                                         = previous_prev;
7513                                                 copy->size = total_size;
7514                                         }
7515                                         return kr;
7516                                 }
7517                                 if (dst_end <= local_end) {
7518                                         return(KERN_SUCCESS);
7519                                 }
7520                                 /* otherwise copy no longer exists, it was */
7521                                 /* destroyed after successful copy_overwrite */
7522                                 copy = (vm_map_copy_t)
7523                                         zalloc(vm_map_copy_zone);
7524                                 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7525                                 vm_map_copy_first_entry(copy) =
7526                                         vm_map_copy_last_entry(copy) =
7527                                         vm_map_copy_to_entry(copy);
7528                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
7529                                 copy->offset = new_offset;
7530
7531                                 /*
7532                                  * XXX FBDP
7533                                  * this does not seem to deal with
7534                                  * the VM map store (R&B tree)
7535                                  */
7536
7537                                 total_size -= copy_size;
7538                                 copy_size = 0;
7539                                 /* put back remainder of copy in container */
7540                                 if(next_copy != NULL) {
7541                                         copy->cpy_hdr.nentries = remaining_entries;
7542                                         copy->cpy_hdr.links.next = next_copy;
7543                                         copy->cpy_hdr.links.prev = previous_prev;
7544                                         copy->size = total_size;
7545                                         next_copy->vme_prev =
7546                                                 vm_map_copy_to_entry(copy);
7547                                         next_copy = NULL;
7548                                 }
7549                                 base_addr = local_end;
7550                                 vm_map_lock(dst_map);
7551                                 if(!vm_map_lookup_entry(dst_map,
7552                                                         local_end, &tmp_entry)) {
7553                                         vm_map_unlock(dst_map);
7554                                         return(KERN_INVALID_ADDRESS);
7555                                 }
7556                                 entry = tmp_entry;
7557                                 continue;
7558                         }
7559                         if (dst_end <= entry->vme_end) {
7560                                 copy_size = dst_end - base_addr;
7561                                 break;
7562                         }
7563
7564                         if ((next == vm_map_to_entry(dst_map)) ||
7565                             (next->vme_start != entry->vme_end)) {
7566                                 vm_map_unlock(dst_map);
7567                                 return(KERN_INVALID_ADDRESS);
7568                         }
7569
7570                         entry = next;
7571                 }/* for */
7572
7573                 next_copy = NULL;
7574                 nentries = 1;
7575
7576                 /* adjust the copy object */
7577                 if (total_size > copy_size) {
7578                         vm_map_size_t   local_size = 0;
7579                         vm_map_size_t   entry_size;
7580
7581                         new_offset = copy->offset;
7582                         copy_entry = vm_map_copy_first_entry(copy);
7583                         while(copy_entry != vm_map_copy_to_entry(copy)) {
7584                                 entry_size = copy_entry->vme_end -
7585                                         copy_entry->vme_start;
7586                                 if((local_size < copy_size) &&
7587                                    ((local_size + entry_size)
7588                                     >= copy_size)) {
7589                                         vm_map_copy_clip_end(copy, copy_entry,
7590                                                              copy_entry->vme_start +
7591                                                              (copy_size - local_size));
7592                                         entry_size = copy_entry->vme_end -
7593                                                 copy_entry->vme_start;
7594                                         local_size += entry_size;
7595                                         new_offset += entry_size;
7596                                 }
7597                                 if(local_size >= copy_size) {
7598                                         next_copy = copy_entry->vme_next;
7599                                         copy_entry->vme_next =
7600                                                 vm_map_copy_to_entry(copy);
7601                                         previous_prev =
7602                                                 copy->cpy_hdr.links.prev;
7603                                         copy->cpy_hdr.links.prev = copy_entry;
7604                                         copy->size = copy_size;
7605                                         remaining_entries =
7606                                                 copy->cpy_hdr.nentries;
7607                                         remaining_entries -= nentries;
7608                                         copy->cpy_hdr.nentries = nentries;
7609                                         break;
7610                                 } else {
7611                                         local_size += entry_size;
7612                                         new_offset += entry_size;
7613                                         nentries++;
7614                                 }
7615                                 copy_entry = copy_entry->vme_next;
7616                         }
7617                 }
7618
7619                 if (aligned) {
7620                         pmap_t  local_pmap;
7621
7622                         if(pmap)
7623                                 local_pmap = pmap;
7624                         else
7625                                 local_pmap = dst_map->pmap;
7626
7627                         if ((kr =  vm_map_copy_overwrite_aligned(
7628                                      dst_map, tmp_entry, copy,
7629                                      base_addr, local_pmap)) != KERN_SUCCESS) {
7630                                 if(next_copy != NULL) {
7631                                         copy->cpy_hdr.nentries +=
7632                                                 remaining_entries;
7633                                         copy->cpy_hdr.links.prev->vme_next =
7634                                                 next_copy;
7635                                         copy->cpy_hdr.links.prev =
7636                                                 previous_prev;
7637                                         copy->size += copy_size;
7638                                 }
7639                                 return kr;
7640                         }
7641                         vm_map_unlock(dst_map);
7642                 } else {
7643                         /*
7644                          * Performance gain:
7645                          *
7646                          * if the copy and dst address are misaligned but the same
7647                          * offset within the page we can copy_not_aligned the
7648                          * misaligned parts and copy aligned the rest.  If they are
7649                          * aligned but len is unaligned we simply need to copy
7650                          * the end bit unaligned.  We'll need to split the misaligned
7651                          * bits of the region in this case !
7652                          */
7653                         /* ALWAYS UNLOCKS THE dst_map MAP */
7654                         kr = vm_map_copy_overwrite_unaligned(
7655                                 dst_map,
7656                                 tmp_entry,
7657                                 copy,
7658                                 base_addr,
7659                                 discard_on_success);
7660                         if (kr != KERN_SUCCESS) {
7661                                 if(next_copy != NULL) {
7662                                         copy->cpy_hdr.nentries +=
7663                                                 remaining_entries;
7664                                         copy->cpy_hdr.links.prev->vme_next =
7665                                                 next_copy;
7666                                         copy->cpy_hdr.links.prev =
7667                                                 previous_prev;
7668                                         copy->size += copy_size;
7669                                 }
7670                                 return kr;
7671                         }
7672                 }
7673                 total_size -= copy_size;
7674                 if(total_size == 0)
7675                         break;
7676                 base_addr += copy_size;
7677                 copy_size = 0;
7678                 copy->offset = new_offset;
7679                 if(next_copy != NULL) {
7680                         copy->cpy_hdr.nentries = remaining_entries;
7681                         copy->cpy_hdr.links.next = next_copy;
7682                         copy->cpy_hdr.links.prev = previous_prev;
7683                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
7684                         copy->size = total_size;
7685                 }
7686                 vm_map_lock(dst_map);
7687                 while(TRUE) {
7688                         if (!vm_map_lookup_entry(dst_map,
7689                                                  base_addr, &tmp_entry)) {
7690                                 vm_map_unlock(dst_map);
7691                                 return(KERN_INVALID_ADDRESS);
7692                         }
7693                         if (tmp_entry->in_transition) {
7694                                 entry->needs_wakeup = TRUE;
7695                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7696                         } else {
7697                                 break;
7698                         }
7699                 }
7700                 vm_map_clip_start(dst_map,
7701                                   tmp_entry,
7702                                   vm_map_trunc_page(base_addr,
7703                                                     VM_MAP_PAGE_MASK(dst_map)));
7704
7705                 entry = tmp_entry;
7706         } /* while */
7707
7708         /*
7709          *      Throw away the vm_map_copy object
7710          */
7711         if (discard_on_success)
7712                 vm_map_copy_discard(copy);
7713
7714         return(KERN_SUCCESS);
7715 }/* vm_map_copy_overwrite */
7716
7717 kern_return_t
7718 vm_map_copy_overwrite(
7719         vm_map_t        dst_map,
7720         vm_map_offset_t dst_addr,
7721         vm_map_copy_t   copy,
7722         boolean_t       interruptible)
7723 {
7724         vm_map_size_t   head_size, tail_size;
7725         vm_map_copy_t   head_copy, tail_copy;
7726         vm_map_offset_t head_addr, tail_addr;
7727         vm_map_entry_t  entry;
7728         kern_return_t   kr;
7729
7730         head_size = 0;
7731         tail_size = 0;
7732         head_copy = NULL;
7733         tail_copy = NULL;
7734         head_addr = 0;
7735         tail_addr = 0;
7736
7737         if (interruptible ||
7738             copy == VM_MAP_COPY_NULL ||
7739             copy->type != VM_MAP_COPY_ENTRY_LIST) {
7740                 /*
7741                  * We can't split the "copy" map if we're interruptible
7742                  * or if we don't have a "copy" map...
7743                  */
7744         blunt_copy:
7745                 return vm_map_copy_overwrite_nested(dst_map,
7746                                                     dst_addr,
7747                                                     copy,
7748                                                     interruptible,
7749                                                     (pmap_t) NULL,
7750                                                     TRUE);
7751         }
7752
7753         if (copy->size < 3 * PAGE_SIZE) {
7754                 /*
7755                  * Too small to bother with optimizing...
7756                  */
7757                 goto blunt_copy;
7758         }
7759
7760         if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7761             (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7762                 /*
7763                  * Incompatible mis-alignment of source and destination...
7764                  */
7765                 goto blunt_copy;
7766         }
7767
7768         /*
7769          * Proper alignment or identical mis-alignment at the beginning.
7770          * Let's try and do a small unaligned copy first (if needed)
7771          * and then an aligned copy for the rest.
7772          */
7773         if (!page_aligned(dst_addr)) {
7774                 head_addr = dst_addr;
7775                 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7776                              (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7777         }
7778         if (!page_aligned(copy->offset + copy->size)) {
7779                 /*
7780                  * Mis-alignment at the end.
7781                  * Do an aligned copy up to the last page and
7782                  * then an unaligned copy for the remaining bytes.
7783                  */
7784                 tail_size = ((copy->offset + copy->size) &
7785                              VM_MAP_PAGE_MASK(dst_map));
7786                 tail_addr = dst_addr + copy->size - tail_size;
7787         }
7788
7789         if (head_size + tail_size == copy->size) {
7790                 /*
7791                  * It's all unaligned, no optimization possible...
7792                  */
7793                 goto blunt_copy;
7794         }
7795
7796         /*
7797          * Can't optimize if there are any submaps in the
7798          * destination due to the way we free the "copy" map
7799          * progressively in vm_map_copy_overwrite_nested()
7800          * in that case.
7801          */
7802         vm_map_lock_read(dst_map);
7803         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7804                 vm_map_unlock_read(dst_map);
7805                 goto blunt_copy;
7806         }
7807         for (;
7808              (entry != vm_map_copy_to_entry(copy) &&
7809               entry->vme_start < dst_addr + copy->size);
7810              entry = entry->vme_next) {
7811                 if (entry->is_sub_map) {
7812                         vm_map_unlock_read(dst_map);
7813                         goto blunt_copy;
7814                 }
7815         }
7816         vm_map_unlock_read(dst_map);
7817
7818         if (head_size) {
7819                 /*
7820                  * Unaligned copy of the first "head_size" bytes, to reach
7821                  * a page boundary.
7822                  */
7823
7824                 /*
7825                  * Extract "head_copy" out of "copy".
7826                  */
7827                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7828                 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7829                 vm_map_copy_first_entry(head_copy) =
7830                         vm_map_copy_to_entry(head_copy);
7831                 vm_map_copy_last_entry(head_copy) =
7832                         vm_map_copy_to_entry(head_copy);
7833                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7834                 head_copy->cpy_hdr.nentries = 0;
7835                 head_copy->cpy_hdr.entries_pageable =
7836                         copy->cpy_hdr.entries_pageable;
7837                 vm_map_store_init(&head_copy->cpy_hdr);
7838
7839                 head_copy->offset = copy->offset;
7840                 head_copy->size = head_size;
7841
7842                 copy->offset += head_size;
7843                 copy->size -= head_size;
7844
7845                 entry = vm_map_copy_first_entry(copy);
7846                 vm_map_copy_clip_end(copy, entry, copy->offset);
7847                 vm_map_copy_entry_unlink(copy, entry);
7848                 vm_map_copy_entry_link(head_copy,
7849                                        vm_map_copy_to_entry(head_copy),
7850                                        entry);
7851
7852                 /*
7853                  * Do the unaligned copy.
7854                  */
7855                 kr = vm_map_copy_overwrite_nested(dst_map,
7856                                                   head_addr,
7857                                                   head_copy,
7858                                                   interruptible,
7859                                                   (pmap_t) NULL,
7860                                                   FALSE);
7861                 if (kr != KERN_SUCCESS)
7862                         goto done;
7863         }
7864
7865         if (tail_size) {
7866                 /*
7867                  * Extract "tail_copy" out of "copy".
7868                  */
7869                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7870                 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7871                 vm_map_copy_first_entry(tail_copy) =
7872                         vm_map_copy_to_entry(tail_copy);
7873                 vm_map_copy_last_entry(tail_copy) =
7874                         vm_map_copy_to_entry(tail_copy);
7875                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7876                 tail_copy->cpy_hdr.nentries = 0;
7877                 tail_copy->cpy_hdr.entries_pageable =
7878                         copy->cpy_hdr.entries_pageable;
7879                 vm_map_store_init(&tail_copy->cpy_hdr);
7880
7881                 tail_copy->offset = copy->offset + copy->size - tail_size;
7882                 tail_copy->size = tail_size;
7883
7884                 copy->size -= tail_size;
7885
7886                 entry = vm_map_copy_last_entry(copy);
7887                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7888                 entry = vm_map_copy_last_entry(copy);
7889                 vm_map_copy_entry_unlink(copy, entry);
7890                 vm_map_copy_entry_link(tail_copy,
7891                                        vm_map_copy_last_entry(tail_copy),
7892                                        entry);
7893         }
7894
7895         /*
7896          * Copy most (or possibly all) of the data.
7897          */
7898         kr = vm_map_copy_overwrite_nested(dst_map,
7899                                           dst_addr + head_size,
7900                                           copy,
7901                                           interruptible,
7902                                           (pmap_t) NULL,
7903                                           FALSE);
7904         if (kr != KERN_SUCCESS) {
7905                 goto done;
7906         }
7907
7908         if (tail_size) {
7909                 kr = vm_map_copy_overwrite_nested(dst_map,
7910                                                   tail_addr,
7911                                                   tail_copy,
7912                                                   interruptible,
7913                                                   (pmap_t) NULL,
7914                                                   FALSE);
7915         }
7916
7917 done:
7918         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7919         if (kr == KERN_SUCCESS) {
7920                 /*
7921                  * Discard all the copy maps.
7922                  */
7923                 if (head_copy) {
7924                         vm_map_copy_discard(head_copy);
7925                         head_copy = NULL;
7926                 }
7927                 vm_map_copy_discard(copy);
7928                 if (tail_copy) {
7929                         vm_map_copy_discard(tail_copy);
7930                         tail_copy = NULL;
7931                 }
7932         } else {
7933                 /*
7934                  * Re-assemble the original copy map.
7935                  */
7936                 if (head_copy) {
7937                         entry = vm_map_copy_first_entry(head_copy);
7938                         vm_map_copy_entry_unlink(head_copy, entry);
7939                         vm_map_copy_entry_link(copy,
7940                                                vm_map_copy_to_entry(copy),
7941                                                entry);
7942                         copy->offset -= head_size;
7943                         copy->size += head_size;
7944                         vm_map_copy_discard(head_copy);
7945                         head_copy = NULL;
7946                 }
7947                 if (tail_copy) {
7948                         entry = vm_map_copy_last_entry(tail_copy);
7949                         vm_map_copy_entry_unlink(tail_copy, entry);
7950                         vm_map_copy_entry_link(copy,
7951                                                vm_map_copy_last_entry(copy),
7952                                                entry);
7953                         copy->size += tail_size;
7954                         vm_map_copy_discard(tail_copy);
7955                         tail_copy = NULL;
7956                 }
7957         }
7958         return kr;
7959 }
7960
7961
7962 /*
7963  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
7964  *
7965  *      Decription:
7966  *      Physically copy unaligned data
7967  *
7968  *      Implementation:
7969  *      Unaligned parts of pages have to be physically copied.  We use
7970  *      a modified form of vm_fault_copy (which understands none-aligned
7971  *      page offsets and sizes) to do the copy.  We attempt to copy as
7972  *      much memory in one go as possibly, however vm_fault_copy copies
7973  *      within 1 memory object so we have to find the smaller of "amount left"
7974  *      "source object data size" and "target object data size".  With
7975  *      unaligned data we don't need to split regions, therefore the source
7976  *      (copy) object should be one map entry, the target range may be split
7977  *      over multiple map entries however.  In any event we are pessimistic
7978  *      about these assumptions.
7979  *
7980  *      Assumptions:
7981  *      dst_map is locked on entry and is return locked on success,
7982  *      unlocked on error.
7983  */
7984
7985 static kern_return_t
7986 vm_map_copy_overwrite_unaligned(
7987         vm_map_t        dst_map,
7988         vm_map_entry_t  entry,
7989         vm_map_copy_t   copy,
7990         vm_map_offset_t start,
7991         boolean_t       discard_on_success)
7992 {
7993         vm_map_entry_t          copy_entry;
7994         vm_map_entry_t          copy_entry_next;
7995         vm_map_version_t        version;
7996         vm_object_t             dst_object;
7997         vm_object_offset_t      dst_offset;
7998         vm_object_offset_t      src_offset;
7999         vm_object_offset_t      entry_offset;
8000         vm_map_offset_t         entry_end;
8001         vm_map_size_t           src_size,
8002                                 dst_size,
8003                                 copy_size,
8004                                 amount_left;
8005         kern_return_t           kr = KERN_SUCCESS;
8006
8007
8008         copy_entry = vm_map_copy_first_entry(copy);
8009
8010         vm_map_lock_write_to_read(dst_map);
8011
8012         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
8013         amount_left = copy->size;
8014 /*
8015  *      unaligned so we never clipped this entry, we need the offset into
8016  *      the vm_object not just the data.
8017  */
8018         while (amount_left > 0) {
8019
8020                 if (entry == vm_map_to_entry(dst_map)) {
8021                         vm_map_unlock_read(dst_map);
8022                         return KERN_INVALID_ADDRESS;
8023                 }
8024
8025                 /* "start" must be within the current map entry */
8026                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
8027
8028                 dst_offset = start - entry->vme_start;
8029
8030                 dst_size = entry->vme_end - start;
8031
8032                 src_size = copy_entry->vme_end -
8033                         (copy_entry->vme_start + src_offset);
8034
8035                 if (dst_size < src_size) {
8036 /*
8037  *                      we can only copy dst_size bytes before
8038  *                      we have to get the next destination entry
8039  */
8040                         copy_size = dst_size;
8041                 } else {
8042 /*
8043  *                      we can only copy src_size bytes before
8044  *                      we have to get the next source copy entry
8045  */
8046                         copy_size = src_size;
8047                 }
8048
8049                 if (copy_size > amount_left) {
8050                         copy_size = amount_left;
8051                 }
8052 /*
8053  *              Entry needs copy, create a shadow shadow object for
8054  *              Copy on write region.
8055  */
8056                 if (entry->needs_copy &&
8057                     ((entry->protection & VM_PROT_WRITE) != 0))
8058                 {
8059                         if (vm_map_lock_read_to_write(dst_map)) {
8060                                 vm_map_lock_read(dst_map);
8061                                 goto RetryLookup;
8062                         }
8063                         VME_OBJECT_SHADOW(entry,
8064                                           (vm_map_size_t)(entry->vme_end
8065                                                           - entry->vme_start));
8066                         entry->needs_copy = FALSE;
8067                         vm_map_lock_write_to_read(dst_map);
8068                 }
8069                 dst_object = VME_OBJECT(entry);
8070 /*
8071  *              unlike with the virtual (aligned) copy we're going
8072  *              to fault on it therefore we need a target object.
8073  */
8074                 if (dst_object == VM_OBJECT_NULL) {
8075                         if (vm_map_lock_read_to_write(dst_map)) {
8076                                 vm_map_lock_read(dst_map);
8077                                 goto RetryLookup;
8078                         }
8079                         dst_object = vm_object_allocate((vm_map_size_t)
8080                                                         entry->vme_end - entry->vme_start);
8081                         VME_OBJECT(entry) = dst_object;
8082                         VME_OFFSET_SET(entry, 0);
8083                         assert(entry->use_pmap);
8084                         vm_map_lock_write_to_read(dst_map);
8085                 }
8086 /*
8087  *              Take an object reference and unlock map. The "entry" may
8088  *              disappear or change when the map is unlocked.
8089  */
8090                 vm_object_reference(dst_object);
8091                 version.main_timestamp = dst_map->timestamp;
8092                 entry_offset = VME_OFFSET(entry);
8093                 entry_end = entry->vme_end;
8094                 vm_map_unlock_read(dst_map);
8095 /*
8096  *              Copy as much as possible in one pass
8097  */
8098                 kr = vm_fault_copy(
8099                         VME_OBJECT(copy_entry),
8100                         VME_OFFSET(copy_entry) + src_offset,
8101                         &copy_size,
8102                         dst_object,
8103                         entry_offset + dst_offset,
8104                         dst_map,
8105                         &version,
8106                         THREAD_UNINT );
8107
8108                 start += copy_size;
8109                 src_offset += copy_size;
8110                 amount_left -= copy_size;
8111 /*
8112  *              Release the object reference
8113  */
8114                 vm_object_deallocate(dst_object);
8115 /*
8116  *              If a hard error occurred, return it now
8117  */
8118                 if (kr != KERN_SUCCESS)
8119                         return kr;
8120
8121                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
8122                     || amount_left == 0)
8123                 {
8124 /*
8125  *                      all done with this copy entry, dispose.
8126  */
8127                         copy_entry_next = copy_entry->vme_next;
8128
8129                         if (discard_on_success) {
8130                                 vm_map_copy_entry_unlink(copy, copy_entry);
8131                                 assert(!copy_entry->is_sub_map);
8132                                 vm_object_deallocate(VME_OBJECT(copy_entry));
8133                                 vm_map_copy_entry_dispose(copy, copy_entry);
8134                         }
8135
8136                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
8137                             amount_left) {
8138 /*
8139  *                              not finished copying but run out of source
8140  */
8141                                 return KERN_INVALID_ADDRESS;
8142                         }
8143
8144                         copy_entry = copy_entry_next;
8145
8146                         src_offset = 0;
8147                 }
8148
8149                 if (amount_left == 0)
8150                         return KERN_SUCCESS;
8151
8152                 vm_map_lock_read(dst_map);
8153                 if (version.main_timestamp == dst_map->timestamp) {
8154                         if (start == entry_end) {
8155 /*
8156  *                              destination region is split.  Use the version
8157  *                              information to avoid a lookup in the normal
8158  *                              case.
8159  */
8160                                 entry = entry->vme_next;
8161 /*
8162  *                              should be contiguous. Fail if we encounter
8163  *                              a hole in the destination.
8164  */
8165                                 if (start != entry->vme_start) {
8166                                         vm_map_unlock_read(dst_map);
8167                                         return KERN_INVALID_ADDRESS ;
8168                                 }
8169                         }
8170                 } else {
8171 /*
8172  *                      Map version check failed.
8173  *                      we must lookup the entry because somebody
8174  *                      might have changed the map behind our backs.
8175  */
8176                 RetryLookup:
8177                         if (!vm_map_lookup_entry(dst_map, start, &entry))
8178                         {
8179                                 vm_map_unlock_read(dst_map);
8180                                 return KERN_INVALID_ADDRESS ;
8181                         }
8182                 }
8183         }/* while */
8184
8185         return KERN_SUCCESS;
8186 }/* vm_map_copy_overwrite_unaligned */
8187
8188 /*
8189  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
8190  *
8191  *      Description:
8192  *      Does all the vm_trickery possible for whole pages.
8193  *
8194  *      Implementation:
8195  *
8196  *      If there are no permanent objects in the destination,
8197  *      and the source and destination map entry zones match,
8198  *      and the destination map entry is not shared,
8199  *      then the map entries can be deleted and replaced
8200  *      with those from the copy.  The following code is the
8201  *      basic idea of what to do, but there are lots of annoying
8202  *      little details about getting protection and inheritance
8203  *      right.  Should add protection, inheritance, and sharing checks
8204  *      to the above pass and make sure that no wiring is involved.
8205  */
8206
8207 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8208 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8209 int vm_map_copy_overwrite_aligned_src_large = 0;
8210
8211 static kern_return_t
8212 vm_map_copy_overwrite_aligned(
8213         vm_map_t        dst_map,
8214         vm_map_entry_t  tmp_entry,
8215         vm_map_copy_t   copy,
8216         vm_map_offset_t start,
8217         __unused pmap_t pmap)
8218 {
8219         vm_object_t     object;
8220         vm_map_entry_t  copy_entry;
8221         vm_map_size_t   copy_size;
8222         vm_map_size_t   size;
8223         vm_map_entry_t  entry;
8224
8225         while ((copy_entry = vm_map_copy_first_entry(copy))
8226                != vm_map_copy_to_entry(copy))
8227         {
8228                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8229
8230                 entry = tmp_entry;
8231                 if (entry->is_sub_map) {
8232                         /* unnested when clipped earlier */
8233                         assert(!entry->use_pmap);
8234                 }
8235                 if (entry == vm_map_to_entry(dst_map)) {
8236                         vm_map_unlock(dst_map);
8237                         return KERN_INVALID_ADDRESS;
8238                 }
8239                 size = (entry->vme_end - entry->vme_start);
8240                 /*
8241                  *      Make sure that no holes popped up in the
8242                  *      address map, and that the protection is
8243                  *      still valid, in case the map was unlocked
8244                  *      earlier.
8245                  */
8246
8247                 if ((entry->vme_start != start) || ((entry->is_sub_map)
8248                                                     && !entry->needs_copy)) {
8249                         vm_map_unlock(dst_map);
8250                         return(KERN_INVALID_ADDRESS);
8251                 }
8252                 assert(entry != vm_map_to_entry(dst_map));
8253
8254                 /*
8255                  *      Check protection again
8256                  */
8257
8258                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8259                         vm_map_unlock(dst_map);
8260                         return(KERN_PROTECTION_FAILURE);
8261                 }
8262
8263                 /*
8264                  *      Adjust to source size first
8265                  */
8266
8267                 if (copy_size < size) {
8268                         if (entry->map_aligned &&
8269                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8270                                                  VM_MAP_PAGE_MASK(dst_map))) {
8271                                 /* no longer map-aligned */
8272                                 entry->map_aligned = FALSE;
8273                         }
8274                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8275                         size = copy_size;
8276                 }
8277
8278                 /*
8279                  *      Adjust to destination size
8280                  */
8281
8282                 if (size < copy_size) {
8283                         vm_map_copy_clip_end(copy, copy_entry,
8284                                              copy_entry->vme_start + size);
8285                         copy_size = size;
8286                 }
8287
8288                 assert((entry->vme_end - entry->vme_start) == size);
8289                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8290                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8291
8292                 /*
8293                  *      If the destination contains temporary unshared memory,
8294                  *      we can perform the copy by throwing it away and
8295                  *      installing the source data.
8296                  */
8297
8298                 object = VME_OBJECT(entry);
8299                 if ((!entry->is_shared &&
8300                      ((object == VM_OBJECT_NULL) ||
8301                       (object->internal && !object->true_share))) ||
8302                     entry->needs_copy) {
8303                         vm_object_t     old_object = VME_OBJECT(entry);
8304                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
8305                         vm_object_offset_t      offset;
8306
8307                         /*
8308                          * Ensure that the source and destination aren't
8309                          * identical
8310                          */
8311                         if (old_object == VME_OBJECT(copy_entry) &&
8312                             old_offset == VME_OFFSET(copy_entry)) {
8313                                 vm_map_copy_entry_unlink(copy, copy_entry);
8314                                 vm_map_copy_entry_dispose(copy, copy_entry);
8315
8316                                 if (old_object != VM_OBJECT_NULL)
8317                                         vm_object_deallocate(old_object);
8318
8319                                 start = tmp_entry->vme_end;
8320                                 tmp_entry = tmp_entry->vme_next;
8321                                 continue;
8322                         }
8323
8324 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8325 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
8326                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8327                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
8328                             copy_size <= __TRADEOFF1_COPY_SIZE) {
8329                                 /*
8330                                  * Virtual vs. Physical copy tradeoff #1.
8331                                  *
8332                                  * Copying only a few pages out of a large
8333                                  * object:  do a physical copy instead of
8334                                  * a virtual copy, to avoid possibly keeping
8335                                  * the entire large object alive because of
8336                                  * those few copy-on-write pages.
8337                                  */
8338                                 vm_map_copy_overwrite_aligned_src_large++;
8339                                 goto slow_copy;
8340                         }
8341
8342                         if ((dst_map->pmap != kernel_pmap) &&
8343                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8344                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
8345                                 vm_object_t new_object, new_shadow;
8346
8347                                 /*
8348                                  * We're about to map something over a mapping
8349                                  * established by malloc()...
8350                                  */
8351                                 new_object = VME_OBJECT(copy_entry);
8352                                 if (new_object != VM_OBJECT_NULL) {
8353                                         vm_object_lock_shared(new_object);
8354                                 }
8355                                 while (new_object != VM_OBJECT_NULL &&
8356                                        !new_object->true_share &&
8357                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8358                                        new_object->internal) {
8359                                         new_shadow = new_object->shadow;
8360                                         if (new_shadow == VM_OBJECT_NULL) {
8361                                                 break;
8362                                         }
8363                                         vm_object_lock_shared(new_shadow);
8364                                         vm_object_unlock(new_object);
8365                                         new_object = new_shadow;
8366                                 }
8367                                 if (new_object != VM_OBJECT_NULL) {
8368                                         if (!new_object->internal) {
8369                                                 /*
8370                                                  * The new mapping is backed
8371                                                  * by an external object.  We
8372                                                  * don't want malloc'ed memory
8373                                                  * to be replaced with such a
8374                                                  * non-anonymous mapping, so
8375                                                  * let's go off the optimized
8376                                                  * path...
8377                                                  */
8378                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
8379                                                 vm_object_unlock(new_object);
8380                                                 goto slow_copy;
8381                                         }
8382                                         if (new_object->true_share ||
8383                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8384                                                 /*
8385                                                  * Same if there's a "true_share"
8386                                                  * object in the shadow chain, or
8387                                                  * an object with a non-default
8388                                                  * (SYMMETRIC) copy strategy.
8389                                                  */
8390                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8391                                                 vm_object_unlock(new_object);
8392                                                 goto slow_copy;
8393                                         }
8394                                         vm_object_unlock(new_object);
8395                                 }
8396                                 /*
8397                                  * The new mapping is still backed by
8398                                  * anonymous (internal) memory, so it's
8399                                  * OK to substitute it for the original
8400                                  * malloc() mapping.
8401                                  */
8402                         }
8403
8404                         if (old_object != VM_OBJECT_NULL) {
8405                                 if(entry->is_sub_map) {
8406                                         if(entry->use_pmap) {
8407 #ifndef NO_NESTED_PMAP
8408                                                 pmap_unnest(dst_map->pmap,
8409                                                             (addr64_t)entry->vme_start,
8410                                                             entry->vme_end - entry->vme_start);
8411 #endif  /* NO_NESTED_PMAP */
8412                                                 if(dst_map->mapped_in_other_pmaps) {
8413                                                         /* clean up parent */
8414                                                         /* map/maps */
8415                                                         vm_map_submap_pmap_clean(
8416                                                                 dst_map, entry->vme_start,
8417                                                                 entry->vme_end,
8418                                                                 VME_SUBMAP(entry),
8419                                                                 VME_OFFSET(entry));
8420                                                 }
8421                                         } else {
8422                                                 vm_map_submap_pmap_clean(
8423                                                         dst_map, entry->vme_start,
8424                                                         entry->vme_end,
8425                                                         VME_SUBMAP(entry),
8426                                                         VME_OFFSET(entry));
8427                                         }
8428                                         vm_map_deallocate(VME_SUBMAP(entry));
8429                                 } else {
8430                                         if(dst_map->mapped_in_other_pmaps) {
8431                                                 vm_object_pmap_protect_options(
8432                                                         VME_OBJECT(entry),
8433                                                         VME_OFFSET(entry),
8434                                                         entry->vme_end
8435                                                         - entry->vme_start,
8436                                                         PMAP_NULL,
8437                                                         entry->vme_start,
8438                                                         VM_PROT_NONE,
8439                                                         PMAP_OPTIONS_REMOVE);
8440                                         } else {
8441                                                 pmap_remove_options(
8442                                                         dst_map->pmap,
8443                                                         (addr64_t)(entry->vme_start),
8444                                                         (addr64_t)(entry->vme_end),
8445                                                         PMAP_OPTIONS_REMOVE);
8446                                         }
8447                                         vm_object_deallocate(old_object);
8448                                 }
8449                         }
8450
8451                         entry->is_sub_map = FALSE;
8452                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8453                         object = VME_OBJECT(entry);
8454                         entry->needs_copy = copy_entry->needs_copy;
8455                         entry->wired_count = 0;
8456                         entry->user_wired_count = 0;
8457                         offset = VME_OFFSET(copy_entry);
8458                         VME_OFFSET_SET(entry, offset);
8459
8460                         vm_map_copy_entry_unlink(copy, copy_entry);
8461                         vm_map_copy_entry_dispose(copy, copy_entry);
8462
8463                         /*
8464                          * we could try to push pages into the pmap at this point, BUT
8465                          * this optimization only saved on average 2 us per page if ALL
8466                          * the pages in the source were currently mapped
8467                          * and ALL the pages in the dest were touched, if there were fewer
8468                          * than 2/3 of the pages touched, this optimization actually cost more cycles
8469                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
8470                          */
8471
8472                         /*
8473                          *      Set up for the next iteration.  The map
8474                          *      has not been unlocked, so the next
8475                          *      address should be at the end of this
8476                          *      entry, and the next map entry should be
8477                          *      the one following it.
8478                          */
8479
8480                         start = tmp_entry->vme_end;
8481                         tmp_entry = tmp_entry->vme_next;
8482                 } else {
8483                         vm_map_version_t        version;
8484                         vm_object_t             dst_object;
8485                         vm_object_offset_t      dst_offset;
8486                         kern_return_t           r;
8487
8488                 slow_copy:
8489                         if (entry->needs_copy) {
8490                                 VME_OBJECT_SHADOW(entry,
8491                                                   (entry->vme_end -
8492                                                    entry->vme_start));
8493                                 entry->needs_copy = FALSE;
8494                         }
8495
8496                         dst_object = VME_OBJECT(entry);
8497                         dst_offset = VME_OFFSET(entry);
8498
8499                         /*
8500                          *      Take an object reference, and record
8501                          *      the map version information so that the
8502                          *      map can be safely unlocked.
8503                          */
8504
8505                         if (dst_object == VM_OBJECT_NULL) {
8506                                 /*
8507                                  * We would usually have just taken the
8508                                  * optimized path above if the destination
8509                                  * object has not been allocated yet.  But we
8510                                  * now disable that optimization if the copy
8511                                  * entry's object is not backed by anonymous
8512                                  * memory to avoid replacing malloc'ed
8513                                  * (i.e. re-usable) anonymous memory with a
8514                                  * not-so-anonymous mapping.
8515                                  * So we have to handle this case here and
8516                                  * allocate a new VM object for this map entry.
8517                                  */
8518                                 dst_object = vm_object_allocate(
8519                                         entry->vme_end - entry->vme_start);
8520                                 dst_offset = 0;
8521                                 VME_OBJECT_SET(entry, dst_object);
8522                                 VME_OFFSET_SET(entry, dst_offset);
8523                                 assert(entry->use_pmap);
8524
8525                         }
8526
8527                         vm_object_reference(dst_object);
8528
8529                         /* account for unlock bumping up timestamp */
8530                         version.main_timestamp = dst_map->timestamp + 1;
8531
8532                         vm_map_unlock(dst_map);
8533
8534                         /*
8535                          *      Copy as much as possible in one pass
8536                          */
8537
8538                         copy_size = size;
8539                         r = vm_fault_copy(
8540                                 VME_OBJECT(copy_entry),
8541                                 VME_OFFSET(copy_entry),
8542                                 &copy_size,
8543                                 dst_object,
8544                                 dst_offset,
8545                                 dst_map,
8546                                 &version,
8547                                 THREAD_UNINT );
8548
8549                         /*
8550                          *      Release the object reference
8551                          */
8552
8553                         vm_object_deallocate(dst_object);
8554
8555                         /*
8556                          *      If a hard error occurred, return it now
8557                          */
8558
8559                         if (r != KERN_SUCCESS)
8560                                 return(r);
8561
8562                         if (copy_size != 0) {
8563                                 /*
8564                                  *      Dispose of the copied region
8565                                  */
8566
8567                                 vm_map_copy_clip_end(copy, copy_entry,
8568                                                      copy_entry->vme_start + copy_size);
8569                                 vm_map_copy_entry_unlink(copy, copy_entry);
8570                                 vm_object_deallocate(VME_OBJECT(copy_entry));
8571                                 vm_map_copy_entry_dispose(copy, copy_entry);
8572                         }
8573
8574                         /*
8575                          *      Pick up in the destination map where we left off.
8576                          *
8577                          *      Use the version information to avoid a lookup
8578                          *      in the normal case.
8579                          */
8580
8581                         start += copy_size;
8582                         vm_map_lock(dst_map);
8583                         if (version.main_timestamp == dst_map->timestamp &&
8584                             copy_size != 0) {
8585                                 /* We can safely use saved tmp_entry value */
8586
8587                                 if (tmp_entry->map_aligned &&
8588                                     !VM_MAP_PAGE_ALIGNED(
8589                                             start,
8590                                             VM_MAP_PAGE_MASK(dst_map))) {
8591                                         /* no longer map-aligned */
8592                                         tmp_entry->map_aligned = FALSE;
8593                                 }
8594                                 vm_map_clip_end(dst_map, tmp_entry, start);
8595                                 tmp_entry = tmp_entry->vme_next;
8596                         } else {
8597                                 /* Must do lookup of tmp_entry */
8598
8599                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8600                                         vm_map_unlock(dst_map);
8601                                         return(KERN_INVALID_ADDRESS);
8602                                 }
8603                                 if (tmp_entry->map_aligned &&
8604                                     !VM_MAP_PAGE_ALIGNED(
8605                                             start,
8606                                             VM_MAP_PAGE_MASK(dst_map))) {
8607                                         /* no longer map-aligned */
8608                                         tmp_entry->map_aligned = FALSE;
8609                                 }
8610                                 vm_map_clip_start(dst_map, tmp_entry, start);
8611                         }
8612                 }
8613         }/* while */
8614
8615         return(KERN_SUCCESS);
8616 }/* vm_map_copy_overwrite_aligned */
8617
8618 /*
8619  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
8620  *
8621  *      Description:
8622  *              Copy in data to a kernel buffer from space in the
8623  *              source map. The original space may be optionally
8624  *              deallocated.
8625  *
8626  *              If successful, returns a new copy object.
8627  */
8628 static kern_return_t
8629 vm_map_copyin_kernel_buffer(
8630         vm_map_t        src_map,
8631         vm_map_offset_t src_addr,
8632         vm_map_size_t   len,
8633         boolean_t       src_destroy,
8634         vm_map_copy_t   *copy_result)
8635 {
8636         kern_return_t kr;
8637         vm_map_copy_t copy;
8638         vm_size_t kalloc_size;
8639
8640         if (len > msg_ool_size_small)
8641                 return KERN_INVALID_ARGUMENT;
8642
8643         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8644
8645         copy = (vm_map_copy_t)kalloc(kalloc_size);
8646         if (copy == VM_MAP_COPY_NULL)
8647                 return KERN_RESOURCE_SHORTAGE;
8648         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8649         copy->size = len;
8650         copy->offset = 0;
8651
8652         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
8653         if (kr != KERN_SUCCESS) {
8654                 kfree(copy, kalloc_size);
8655                 return kr;
8656         }
8657         if (src_destroy) {
8658                 (void) vm_map_remove(
8659                         src_map,
8660                         vm_map_trunc_page(src_addr,
8661                                           VM_MAP_PAGE_MASK(src_map)),
8662                         vm_map_round_page(src_addr + len,
8663                                           VM_MAP_PAGE_MASK(src_map)),
8664                         (VM_MAP_REMOVE_INTERRUPTIBLE |
8665                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8666                          ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
8667         }
8668         *copy_result = copy;
8669         return KERN_SUCCESS;
8670 }
8671
8672 /*
8673  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
8674  *
8675  *      Description:
8676  *              Copy out data from a kernel buffer into space in the
8677  *              destination map. The space may be otpionally dynamically
8678  *              allocated.
8679  *
8680  *              If successful, consumes the copy object.
8681  *              Otherwise, the caller is responsible for it.
8682  */
8683 static int vm_map_copyout_kernel_buffer_failures = 0;
8684 static kern_return_t
8685 vm_map_copyout_kernel_buffer(
8686         vm_map_t                map,
8687         vm_map_address_t        *addr,  /* IN/OUT */
8688         vm_map_copy_t           copy,
8689         vm_map_size_t           copy_size,
8690         boolean_t               overwrite,
8691         boolean_t               consume_on_success)
8692 {
8693         kern_return_t kr = KERN_SUCCESS;
8694         thread_t thread = current_thread();
8695
8696         assert(copy->size == copy_size);
8697
8698         /*
8699          * check for corrupted vm_map_copy structure
8700          */
8701         if (copy_size > msg_ool_size_small || copy->offset)
8702                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8703                       (long long)copy->size, (long long)copy->offset);
8704
8705         if (!overwrite) {
8706
8707                 /*
8708                  * Allocate space in the target map for the data
8709                  */
8710                 *addr = 0;
8711                 kr = vm_map_enter(map,
8712                                   addr,
8713                                   vm_map_round_page(copy_size,
8714                                                     VM_MAP_PAGE_MASK(map)),
8715                                   (vm_map_offset_t) 0,
8716                                   VM_FLAGS_ANYWHERE,
8717                                   VM_OBJECT_NULL,
8718                                   (vm_object_offset_t) 0,
8719                                   FALSE,
8720                                   VM_PROT_DEFAULT,
8721                                   VM_PROT_ALL,
8722                                   VM_INHERIT_DEFAULT);
8723                 if (kr != KERN_SUCCESS)
8724                         return kr;
8725         }
8726
8727         /*
8728          * Copyout the data from the kernel buffer to the target map.
8729          */
8730         if (thread->map == map) {
8731
8732                 /*
8733                  * If the target map is the current map, just do
8734                  * the copy.
8735                  */
8736                 assert((vm_size_t)copy_size == copy_size);
8737                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
8738                         kr = KERN_INVALID_ADDRESS;
8739                 }
8740         }
8741         else {
8742                 vm_map_t oldmap;
8743
8744                 /*
8745                  * If the target map is another map, assume the
8746                  * target's address space identity for the duration
8747                  * of the copy.
8748                  */
8749                 vm_map_reference(map);
8750                 oldmap = vm_map_switch(map);
8751
8752                 assert((vm_size_t)copy_size == copy_size);
8753                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
8754                         vm_map_copyout_kernel_buffer_failures++;
8755                         kr = KERN_INVALID_ADDRESS;
8756                 }
8757
8758                 (void) vm_map_switch(oldmap);
8759                 vm_map_deallocate(map);
8760         }
8761
8762         if (kr != KERN_SUCCESS) {
8763                 /* the copy failed, clean up */
8764                 if (!overwrite) {
8765                         /*
8766                          * Deallocate the space we allocated in the target map.
8767                          */
8768                         (void) vm_map_remove(
8769                                 map,
8770                                 vm_map_trunc_page(*addr,
8771                                                   VM_MAP_PAGE_MASK(map)),
8772                                 vm_map_round_page((*addr +
8773                                                    vm_map_round_page(copy_size,
8774                                                                      VM_MAP_PAGE_MASK(map))),
8775                                                   VM_MAP_PAGE_MASK(map)),
8776                                 VM_MAP_NO_FLAGS);
8777                         *addr = 0;
8778                 }
8779         } else {
8780                 /* copy was successful, dicard the copy structure */
8781                 if (consume_on_success) {
8782                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
8783                 }
8784         }
8785
8786         return kr;
8787 }
8788
8789 /*
8790  *      Macro:          vm_map_copy_insert
8791  *
8792  *      Description:
8793  *              Link a copy chain ("copy") into a map at the
8794  *              specified location (after "where").
8795  *      Side effects:
8796  *              The copy chain is destroyed.
8797  *      Warning:
8798  *              The arguments are evaluated multiple times.
8799  */
8800 #define vm_map_copy_insert(map, where, copy)                            \
8801 MACRO_BEGIN                                                             \
8802         vm_map_store_copy_insert(map, where, copy);       \
8803         zfree(vm_map_copy_zone, copy);          \
8804 MACRO_END
8805
8806 void
8807 vm_map_copy_remap(
8808         vm_map_t        map,
8809         vm_map_entry_t  where,
8810         vm_map_copy_t   copy,
8811         vm_map_offset_t adjustment,
8812         vm_prot_t       cur_prot,
8813         vm_prot_t       max_prot,
8814         vm_inherit_t    inheritance)
8815 {
8816         vm_map_entry_t  copy_entry, new_entry;
8817
8818         for (copy_entry = vm_map_copy_first_entry(copy);
8819              copy_entry != vm_map_copy_to_entry(copy);
8820              copy_entry = copy_entry->vme_next) {
8821                 /* get a new VM map entry for the map */
8822                 new_entry = vm_map_entry_create(map,
8823                                                 !map->hdr.entries_pageable);
8824                 /* copy the "copy entry" to the new entry */
8825                 vm_map_entry_copy(new_entry, copy_entry);
8826                 /* adjust "start" and "end" */
8827                 new_entry->vme_start += adjustment;
8828                 new_entry->vme_end += adjustment;
8829                 /* clear some attributes */
8830                 new_entry->inheritance = inheritance;
8831                 new_entry->protection = cur_prot;
8832                 new_entry->max_protection = max_prot;
8833                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8834                 /* take an extra reference on the entry's "object" */
8835                 if (new_entry->is_sub_map) {
8836                         assert(!new_entry->use_pmap); /* not nested */
8837                         vm_map_lock(VME_SUBMAP(new_entry));
8838                         vm_map_reference(VME_SUBMAP(new_entry));
8839                         vm_map_unlock(VME_SUBMAP(new_entry));
8840                 } else {
8841                         vm_object_reference(VME_OBJECT(new_entry));
8842                 }
8843                 /* insert the new entry in the map */
8844                 vm_map_store_entry_link(map, where, new_entry);
8845                 /* continue inserting the "copy entries" after the new entry */
8846                 where = new_entry;
8847         }
8848 }
8849
8850
8851 /*
8852  * Returns true if *size matches (or is in the range of) copy->size.
8853  * Upon returning true, the *size field is updated with the actual size of the
8854  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
8855  */
8856 boolean_t
8857 vm_map_copy_validate_size(
8858         vm_map_t                dst_map,
8859         vm_map_copy_t           copy,
8860         vm_map_size_t           *size)
8861 {
8862         if (copy == VM_MAP_COPY_NULL)
8863                 return FALSE;
8864         vm_map_size_t copy_sz = copy->size;
8865         vm_map_size_t sz = *size;
8866         switch (copy->type) {
8867         case VM_MAP_COPY_OBJECT:
8868         case VM_MAP_COPY_KERNEL_BUFFER:
8869                 if (sz == copy_sz)
8870                         return TRUE;
8871                 break;
8872         case VM_MAP_COPY_ENTRY_LIST:
8873                 /*
8874                  * potential page-size rounding prevents us from exactly
8875                  * validating this flavor of vm_map_copy, but we can at least
8876                  * assert that it's within a range.
8877                  */
8878                 if (copy_sz >= sz &&
8879                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
8880                         *size = copy_sz;
8881                         return TRUE;
8882                 }
8883                 break;
8884         default:
8885                 break;
8886         }
8887         return FALSE;
8888 }
8889
8890 /*
8891  *      Routine:        vm_map_copyout_size
8892  *
8893  *      Description:
8894  *              Copy out a copy chain ("copy") into newly-allocated
8895  *              space in the destination map. Uses a prevalidated
8896  *              size for the copy object (vm_map_copy_validate_size).
8897  *
8898  *              If successful, consumes the copy object.
8899  *              Otherwise, the caller is responsible for it.
8900  */
8901 kern_return_t
8902 vm_map_copyout_size(
8903         vm_map_t                dst_map,
8904         vm_map_address_t        *dst_addr,      /* OUT */
8905         vm_map_copy_t           copy,
8906         vm_map_size_t           copy_size)
8907 {
8908         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
8909                                        TRUE, /* consume_on_success */
8910                                        VM_PROT_DEFAULT,
8911                                        VM_PROT_ALL,
8912                                        VM_INHERIT_DEFAULT);
8913 }
8914
8915 /*
8916  *      Routine:        vm_map_copyout
8917  *
8918  *      Description:
8919  *              Copy out a copy chain ("copy") into newly-allocated
8920  *              space in the destination map.
8921  *
8922  *              If successful, consumes the copy object.
8923  *              Otherwise, the caller is responsible for it.
8924  */
8925 kern_return_t
8926 vm_map_copyout(
8927         vm_map_t                dst_map,
8928         vm_map_address_t        *dst_addr,      /* OUT */
8929         vm_map_copy_t           copy)
8930 {
8931         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
8932                                        TRUE, /* consume_on_success */
8933                                        VM_PROT_DEFAULT,
8934                                        VM_PROT_ALL,
8935                                        VM_INHERIT_DEFAULT);
8936 }
8937
8938 kern_return_t
8939 vm_map_copyout_internal(
8940         vm_map_t                dst_map,
8941         vm_map_address_t        *dst_addr,      /* OUT */
8942         vm_map_copy_t           copy,
8943         vm_map_size_t           copy_size,
8944         boolean_t               consume_on_success,
8945         vm_prot_t               cur_protection,
8946         vm_prot_t               max_protection,
8947         vm_inherit_t            inheritance)
8948 {
8949         vm_map_size_t           size;
8950         vm_map_size_t           adjustment;
8951         vm_map_offset_t         start;
8952         vm_object_offset_t      vm_copy_start;
8953         vm_map_entry_t          last;
8954         vm_map_entry_t          entry;
8955         vm_map_entry_t          hole_entry;
8956
8957         /*
8958          *      Check for null copy object.
8959          */
8960
8961         if (copy == VM_MAP_COPY_NULL) {
8962                 *dst_addr = 0;
8963                 return(KERN_SUCCESS);
8964         }
8965
8966         if (copy->size != copy_size) {
8967                 *dst_addr = 0;
8968                 return KERN_FAILURE;
8969         }
8970
8971         /*
8972          *      Check for special copy object, created
8973          *      by vm_map_copyin_object.
8974          */
8975
8976         if (copy->type == VM_MAP_COPY_OBJECT) {
8977                 vm_object_t             object = copy->cpy_object;
8978                 kern_return_t           kr;
8979                 vm_object_offset_t      offset;
8980
8981                 offset = vm_object_trunc_page(copy->offset);
8982                 size = vm_map_round_page((copy_size +
8983                                           (vm_map_size_t)(copy->offset -
8984                                                           offset)),
8985                                          VM_MAP_PAGE_MASK(dst_map));
8986                 *dst_addr = 0;
8987                 kr = vm_map_enter(dst_map, dst_addr, size,
8988                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8989                                   object, offset, FALSE,
8990                                   VM_PROT_DEFAULT, VM_PROT_ALL,
8991                                   VM_INHERIT_DEFAULT);
8992                 if (kr != KERN_SUCCESS)
8993                         return(kr);
8994                 /* Account for non-pagealigned copy object */
8995                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8996                 if (consume_on_success)
8997                         zfree(vm_map_copy_zone, copy);
8998                 return(KERN_SUCCESS);
8999         }
9000
9001         /*
9002          *      Check for special kernel buffer allocated
9003          *      by new_ipc_kmsg_copyin.
9004          */
9005
9006         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9007                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
9008                                                     copy, copy_size, FALSE,
9009                                                     consume_on_success);
9010         }
9011
9012
9013         /*
9014          *      Find space for the data
9015          */
9016
9017         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
9018                                           VM_MAP_COPY_PAGE_MASK(copy));
9019         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
9020                                  VM_MAP_COPY_PAGE_MASK(copy))
9021                 - vm_copy_start;
9022
9023
9024 StartAgain: ;
9025
9026         vm_map_lock(dst_map);
9027         if( dst_map->disable_vmentry_reuse == TRUE) {
9028                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
9029                 last = entry;
9030         } else {
9031                 if (dst_map->holelistenabled) {
9032                         hole_entry = (vm_map_entry_t)dst_map->holes_list;
9033
9034                         if (hole_entry == NULL) {
9035                                 /*
9036                                  * No more space in the map?
9037                                  */
9038                                 vm_map_unlock(dst_map);
9039                                 return(KERN_NO_SPACE);
9040                         }
9041
9042                         last = hole_entry;
9043                         start = last->vme_start;
9044                 } else {
9045                         assert(first_free_is_valid(dst_map));
9046                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
9047                         vm_map_min(dst_map) : last->vme_end;
9048                 }
9049                 start = vm_map_round_page(start,
9050                                           VM_MAP_PAGE_MASK(dst_map));
9051         }
9052
9053         while (TRUE) {
9054                 vm_map_entry_t  next = last->vme_next;
9055                 vm_map_offset_t end = start + size;
9056
9057                 if ((end > dst_map->max_offset) || (end < start)) {
9058                         if (dst_map->wait_for_space) {
9059                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
9060                                         assert_wait((event_t) dst_map,
9061                                                     THREAD_INTERRUPTIBLE);
9062                                         vm_map_unlock(dst_map);
9063                                         thread_block(THREAD_CONTINUE_NULL);
9064                                         goto StartAgain;
9065                                 }
9066                         }
9067                         vm_map_unlock(dst_map);
9068                         return(KERN_NO_SPACE);
9069                 }
9070
9071                 if (dst_map->holelistenabled) {
9072                         if (last->vme_end >= end)
9073                                 break;
9074                 } else {
9075                         /*
9076                          *      If there are no more entries, we must win.
9077                          *
9078                          *      OR
9079                          *
9080                          *      If there is another entry, it must be
9081                          *      after the end of the potential new region.
9082                          */
9083
9084                         if (next == vm_map_to_entry(dst_map))
9085                                 break;
9086
9087                         if (next->vme_start >= end)
9088                                 break;
9089                 }
9090
9091                 last = next;
9092
9093                 if (dst_map->holelistenabled) {
9094                         if (last == (vm_map_entry_t) dst_map->holes_list) {
9095                                 /*
9096                                  * Wrapped around
9097                                  */
9098                                 vm_map_unlock(dst_map);
9099                                 return(KERN_NO_SPACE);
9100                         }
9101                         start = last->vme_start;
9102                 } else {
9103                         start = last->vme_end;
9104                 }
9105                 start = vm_map_round_page(start,
9106                                           VM_MAP_PAGE_MASK(dst_map));
9107         }
9108
9109         if (dst_map->holelistenabled) {
9110                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
9111                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
9112                 }
9113         }
9114
9115
9116         adjustment = start - vm_copy_start;
9117         if (! consume_on_success) {
9118                 /*
9119                  * We're not allowed to consume "copy", so we'll have to
9120                  * copy its map entries into the destination map below.
9121                  * No need to re-allocate map entries from the correct
9122                  * (pageable or not) zone, since we'll get new map entries
9123                  * during the transfer.
9124                  * We'll also adjust the map entries's "start" and "end"
9125                  * during the transfer, to keep "copy"'s entries consistent
9126                  * with its "offset".
9127                  */
9128                 goto after_adjustments;
9129         }
9130
9131         /*
9132          *      Since we're going to just drop the map
9133          *      entries from the copy into the destination
9134          *      map, they must come from the same pool.
9135          */
9136
9137         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
9138                 /*
9139                  * Mismatches occur when dealing with the default
9140                  * pager.
9141                  */
9142                 zone_t          old_zone;
9143                 vm_map_entry_t  next, new;
9144
9145                 /*
9146                  * Find the zone that the copies were allocated from
9147                  */
9148
9149                 entry = vm_map_copy_first_entry(copy);
9150
9151                 /*
9152                  * Reinitialize the copy so that vm_map_copy_entry_link
9153                  * will work.
9154                  */
9155                 vm_map_store_copy_reset(copy, entry);
9156                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
9157
9158                 /*
9159                  * Copy each entry.
9160                  */
9161                 while (entry != vm_map_copy_to_entry(copy)) {
9162                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9163                         vm_map_entry_copy_full(new, entry);
9164                         assert(!new->iokit_acct);
9165                         if (new->is_sub_map) {
9166                                 /* clr address space specifics */
9167                                 new->use_pmap = FALSE;
9168                         }
9169                         vm_map_copy_entry_link(copy,
9170                                                vm_map_copy_last_entry(copy),
9171                                                new);
9172                         next = entry->vme_next;
9173                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
9174                         zfree(old_zone, entry);
9175                         entry = next;
9176                 }
9177         }
9178
9179         /*
9180          *      Adjust the addresses in the copy chain, and
9181          *      reset the region attributes.
9182          */
9183
9184         for (entry = vm_map_copy_first_entry(copy);
9185              entry != vm_map_copy_to_entry(copy);
9186              entry = entry->vme_next) {
9187                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
9188                         /*
9189                          * We're injecting this copy entry into a map that
9190                          * has the standard page alignment, so clear
9191                          * "map_aligned" (which might have been inherited
9192                          * from the original map entry).
9193                          */
9194                         entry->map_aligned = FALSE;
9195                 }
9196
9197                 entry->vme_start += adjustment;
9198                 entry->vme_end += adjustment;
9199
9200                 if (entry->map_aligned) {
9201                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
9202                                                    VM_MAP_PAGE_MASK(dst_map)));
9203                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
9204                                                    VM_MAP_PAGE_MASK(dst_map)));
9205                 }
9206
9207                 entry->inheritance = VM_INHERIT_DEFAULT;
9208                 entry->protection = VM_PROT_DEFAULT;
9209                 entry->max_protection = VM_PROT_ALL;
9210                 entry->behavior = VM_BEHAVIOR_DEFAULT;
9211
9212                 /*
9213                  * If the entry is now wired,
9214                  * map the pages into the destination map.
9215                  */
9216                 if (entry->wired_count != 0) {
9217                         vm_map_offset_t va;
9218                         vm_object_offset_t       offset;
9219                         vm_object_t object;
9220                         vm_prot_t prot;
9221                         int     type_of_fault;
9222
9223                         object = VME_OBJECT(entry);
9224                         offset = VME_OFFSET(entry);
9225                         va = entry->vme_start;
9226
9227                         pmap_pageable(dst_map->pmap,
9228                                       entry->vme_start,
9229                                       entry->vme_end,
9230                                       TRUE);
9231
9232                         while (va < entry->vme_end) {
9233                                 vm_page_t       m;
9234
9235                                 /*
9236                                  * Look up the page in the object.
9237                                  * Assert that the page will be found in the
9238                                  * top object:
9239                                  * either
9240                                  *      the object was newly created by
9241                                  *      vm_object_copy_slowly, and has
9242                                  *      copies of all of the pages from
9243                                  *      the source object
9244                                  * or
9245                                  *      the object was moved from the old
9246                                  *      map entry; because the old map
9247                                  *      entry was wired, all of the pages
9248                                  *      were in the top-level object.
9249                                  *      (XXX not true if we wire pages for
9250                                  *       reading)
9251                                  */
9252                                 vm_object_lock(object);
9253
9254                                 m = vm_page_lookup(object, offset);
9255                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
9256                                     m->absent)
9257                                         panic("vm_map_copyout: wiring %p", m);
9258
9259                                 /*
9260                                  * ENCRYPTED SWAP:
9261                                  * The page is assumed to be wired here, so it
9262                                  * shouldn't be encrypted.  Otherwise, we
9263                                  * couldn't enter it in the page table, since
9264                                  * we don't want the user to see the encrypted
9265                                  * data.
9266                                  */
9267                                 ASSERT_PAGE_DECRYPTED(m);
9268
9269                                 prot = entry->protection;
9270
9271                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9272                                     prot)
9273                                         prot |= VM_PROT_EXECUTE;
9274
9275                                 type_of_fault = DBG_CACHE_HIT_FAULT;
9276
9277                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
9278                                                VM_PAGE_WIRED(m), FALSE, FALSE,
9279                                                FALSE, VME_ALIAS(entry),
9280                                                ((entry->iokit_acct ||
9281                                                  (!entry->is_sub_map &&
9282                                                   !entry->use_pmap))
9283                                                 ? PMAP_OPTIONS_ALT_ACCT
9284                                                 : 0),
9285                                                NULL, &type_of_fault);
9286
9287                                 vm_object_unlock(object);
9288
9289                                 offset += PAGE_SIZE_64;
9290                                 va += PAGE_SIZE;
9291                         }
9292                 }
9293         }
9294
9295 after_adjustments:
9296
9297         /*
9298          *      Correct the page alignment for the result
9299          */
9300
9301         *dst_addr = start + (copy->offset - vm_copy_start);
9302
9303         /*
9304          *      Update the hints and the map size
9305          */
9306
9307         if (consume_on_success) {
9308                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9309         } else {
9310                 SAVE_HINT_MAP_WRITE(dst_map, last);
9311         }
9312
9313         dst_map->size += size;
9314
9315         /*
9316          *      Link in the copy
9317          */
9318
9319         if (consume_on_success) {
9320                 vm_map_copy_insert(dst_map, last, copy);
9321         } else {
9322                 vm_map_copy_remap(dst_map, last, copy, adjustment,
9323                                   cur_protection, max_protection,
9324                                   inheritance);
9325         }
9326
9327         vm_map_unlock(dst_map);
9328
9329         /*
9330          * XXX  If wiring_required, call vm_map_pageable
9331          */
9332
9333         return(KERN_SUCCESS);
9334 }
9335
9336 /*
9337  *      Routine:        vm_map_copyin
9338  *
9339  *      Description:
9340  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
9341  *
9342  */
9343
9344 #undef vm_map_copyin
9345
9346 kern_return_t
9347 vm_map_copyin(
9348         vm_map_t                        src_map,
9349         vm_map_address_t        src_addr,
9350         vm_map_size_t           len,
9351         boolean_t                       src_destroy,
9352         vm_map_copy_t           *copy_result)   /* OUT */
9353 {
9354         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9355                                         FALSE, copy_result, FALSE));
9356 }
9357
9358 /*
9359  *      Routine:        vm_map_copyin_common
9360  *
9361  *      Description:
9362  *              Copy the specified region (src_addr, len) from the
9363  *              source address space (src_map), possibly removing
9364  *              the region from the source address space (src_destroy).
9365  *
9366  *      Returns:
9367  *              A vm_map_copy_t object (copy_result), suitable for
9368  *              insertion into another address space (using vm_map_copyout),
9369  *              copying over another address space region (using
9370  *              vm_map_copy_overwrite).  If the copy is unused, it
9371  *              should be destroyed (using vm_map_copy_discard).
9372  *
9373  *      In/out conditions:
9374  *              The source map should not be locked on entry.
9375  */
9376
9377 typedef struct submap_map {
9378         vm_map_t        parent_map;
9379         vm_map_offset_t base_start;
9380         vm_map_offset_t base_end;
9381         vm_map_size_t   base_len;
9382         struct submap_map *next;
9383 } submap_map_t;
9384
9385 kern_return_t
9386 vm_map_copyin_common(
9387         vm_map_t        src_map,
9388         vm_map_address_t src_addr,
9389         vm_map_size_t   len,
9390         boolean_t       src_destroy,
9391         __unused boolean_t      src_volatile,
9392         vm_map_copy_t   *copy_result,   /* OUT */
9393         boolean_t       use_maxprot)
9394 {
9395         int flags;
9396
9397         flags = 0;
9398         if (src_destroy) {
9399                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9400         }
9401         if (use_maxprot) {
9402                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9403         }
9404         return vm_map_copyin_internal(src_map,
9405                                       src_addr,
9406                                       len,
9407                                       flags,
9408                                       copy_result);
9409 }
9410 kern_return_t
9411 vm_map_copyin_internal(
9412         vm_map_t        src_map,
9413         vm_map_address_t src_addr,
9414         vm_map_size_t   len,
9415         int             flags,
9416         vm_map_copy_t   *copy_result)   /* OUT */
9417 {
9418         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
9419                                          * in multi-level lookup, this
9420                                          * entry contains the actual
9421                                          * vm_object/offset.
9422                                          */
9423         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
9424
9425         vm_map_offset_t src_start;      /* Start of current entry --
9426                                          * where copy is taking place now
9427                                          */
9428         vm_map_offset_t src_end;        /* End of entire region to be
9429                                          * copied */
9430         vm_map_offset_t src_base;
9431         vm_map_t        base_map = src_map;
9432         boolean_t       map_share=FALSE;
9433         submap_map_t    *parent_maps = NULL;
9434
9435         vm_map_copy_t   copy;           /* Resulting copy */
9436         vm_map_address_t copy_addr;
9437         vm_map_size_t   copy_size;
9438         boolean_t       src_destroy;
9439         boolean_t       use_maxprot;
9440         boolean_t       preserve_purgeable;
9441
9442         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9443                 return KERN_INVALID_ARGUMENT;
9444         }
9445
9446         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9447         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
9448         preserve_purgeable =
9449                 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
9450
9451         /*
9452          *      Check for copies of zero bytes.
9453          */
9454
9455         if (len == 0) {
9456                 *copy_result = VM_MAP_COPY_NULL;
9457                 return(KERN_SUCCESS);
9458         }
9459
9460         /*
9461          *      Check that the end address doesn't overflow
9462          */
9463         src_end = src_addr + len;
9464         if (src_end < src_addr)
9465                 return KERN_INVALID_ADDRESS;
9466
9467         /*
9468          *      Compute (page aligned) start and end of region
9469          */
9470         src_start = vm_map_trunc_page(src_addr,
9471                                       VM_MAP_PAGE_MASK(src_map));
9472         src_end = vm_map_round_page(src_end,
9473                                     VM_MAP_PAGE_MASK(src_map));
9474
9475         /*
9476          * If the copy is sufficiently small, use a kernel buffer instead
9477          * of making a virtual copy.  The theory being that the cost of
9478          * setting up VM (and taking C-O-W faults) dominates the copy costs
9479          * for small regions.
9480          */
9481         if ((len < msg_ool_size_small) &&
9482             !use_maxprot &&
9483             !preserve_purgeable &&
9484             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
9485             /*
9486              * Since the "msg_ool_size_small" threshold was increased and
9487              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
9488              * address space limits, we revert to doing a virtual copy if the
9489              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
9490              * of the commpage would now fail when it used to work.
9491              */
9492             (src_start >= vm_map_min(src_map) &&
9493              src_start < vm_map_max(src_map) &&
9494              src_end >= vm_map_min(src_map) &&
9495              src_end < vm_map_max(src_map)))
9496                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9497                                                    src_destroy, copy_result);
9498
9499         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
9500
9501         /*
9502          *      Allocate a header element for the list.
9503          *
9504          *      Use the start and end in the header to
9505          *      remember the endpoints prior to rounding.
9506          */
9507
9508         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9509         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
9510         vm_map_copy_first_entry(copy) =
9511                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9512         copy->type = VM_MAP_COPY_ENTRY_LIST;
9513         copy->cpy_hdr.nentries = 0;
9514         copy->cpy_hdr.entries_pageable = TRUE;
9515 #if 00
9516         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9517 #else
9518         /*
9519          * The copy entries can be broken down for a variety of reasons,
9520          * so we can't guarantee that they will remain map-aligned...
9521          * Will need to adjust the first copy_entry's "vme_start" and
9522          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9523          * rather than the original map's alignment.
9524          */
9525         copy->cpy_hdr.page_shift = PAGE_SHIFT;
9526 #endif
9527
9528         vm_map_store_init( &(copy->cpy_hdr) );
9529
9530         copy->offset = src_addr;
9531         copy->size = len;
9532
9533         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9534
9535 #define RETURN(x)                                               \
9536         MACRO_BEGIN                                             \
9537         vm_map_unlock(src_map);                                 \
9538         if(src_map != base_map)                                 \
9539                 vm_map_deallocate(src_map);                     \
9540         if (new_entry != VM_MAP_ENTRY_NULL)                     \
9541                 vm_map_copy_entry_dispose(copy,new_entry);      \
9542         vm_map_copy_discard(copy);                              \
9543         {                                                       \
9544                 submap_map_t    *_ptr;                          \
9545                                                                 \
9546                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
9547                         parent_maps=parent_maps->next;          \
9548                         if (_ptr->parent_map != base_map)       \
9549                                 vm_map_deallocate(_ptr->parent_map);    \
9550                         kfree(_ptr, sizeof(submap_map_t));      \
9551                 }                                               \
9552         }                                                       \
9553         MACRO_RETURN(x);                                        \
9554         MACRO_END
9555
9556         /*
9557          *      Find the beginning of the region.
9558          */
9559
9560         vm_map_lock(src_map);
9561
9562         /*
9563          * Lookup the original "src_addr" rather than the truncated
9564          * "src_start", in case "src_start" falls in a non-map-aligned
9565          * map entry *before* the map entry that contains "src_addr"...
9566          */
9567         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
9568                 RETURN(KERN_INVALID_ADDRESS);
9569         if(!tmp_entry->is_sub_map) {
9570                 /*
9571                  * ... but clip to the map-rounded "src_start" rather than
9572                  * "src_addr" to preserve map-alignment.  We'll adjust the
9573                  * first copy entry at the end, if needed.
9574                  */
9575                 vm_map_clip_start(src_map, tmp_entry, src_start);
9576         }
9577         if (src_start < tmp_entry->vme_start) {
9578                 /*
9579                  * Move "src_start" up to the start of the
9580                  * first map entry to copy.
9581                  */
9582                 src_start = tmp_entry->vme_start;
9583         }
9584         /* set for later submap fix-up */
9585         copy_addr = src_start;
9586
9587         /*
9588          *      Go through entries until we get to the end.
9589          */
9590
9591         while (TRUE) {
9592                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
9593                 vm_map_size_t   src_size;               /* Size of source
9594                                                          * map entry (in both
9595                                                          * maps)
9596                                                          */
9597
9598                 vm_object_t             src_object;     /* Object to copy */
9599                 vm_object_offset_t      src_offset;
9600
9601                 boolean_t       src_needs_copy;         /* Should source map
9602                                                          * be made read-only
9603                                                          * for copy-on-write?
9604                                                          */
9605
9606                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
9607
9608                 boolean_t       was_wired;              /* Was source wired? */
9609                 vm_map_version_t version;               /* Version before locks
9610                                                          * dropped to make copy
9611                                                          */
9612                 kern_return_t   result;                 /* Return value from
9613                                                          * copy_strategically.
9614                                                          */
9615                 while(tmp_entry->is_sub_map) {
9616                         vm_map_size_t submap_len;
9617                         submap_map_t *ptr;
9618
9619                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9620                         ptr->next = parent_maps;
9621                         parent_maps = ptr;
9622                         ptr->parent_map = src_map;
9623                         ptr->base_start = src_start;
9624                         ptr->base_end = src_end;
9625                         submap_len = tmp_entry->vme_end - src_start;
9626                         if(submap_len > (src_end-src_start))
9627                                 submap_len = src_end-src_start;
9628                         ptr->base_len = submap_len;
9629
9630                         src_start -= tmp_entry->vme_start;
9631                         src_start += VME_OFFSET(tmp_entry);
9632                         src_end = src_start + submap_len;
9633                         src_map = VME_SUBMAP(tmp_entry);
9634                         vm_map_lock(src_map);
9635                         /* keep an outstanding reference for all maps in */
9636                         /* the parents tree except the base map */
9637                         vm_map_reference(src_map);
9638                         vm_map_unlock(ptr->parent_map);
9639                         if (!vm_map_lookup_entry(
9640                                     src_map, src_start, &tmp_entry))
9641                                 RETURN(KERN_INVALID_ADDRESS);
9642                         map_share = TRUE;
9643                         if(!tmp_entry->is_sub_map)
9644                                 vm_map_clip_start(src_map, tmp_entry, src_start);
9645                         src_entry = tmp_entry;
9646                 }
9647                 /* we are now in the lowest level submap... */
9648
9649                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9650                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
9651                         /* This is not, supported for now.In future */
9652                         /* we will need to detect the phys_contig   */
9653                         /* condition and then upgrade copy_slowly   */
9654                         /* to do physical copy from the device mem  */
9655                         /* based object. We can piggy-back off of   */
9656                         /* the was wired boolean to set-up the      */
9657                         /* proper handling */
9658                         RETURN(KERN_PROTECTION_FAILURE);
9659                 }
9660                 /*
9661                  *      Create a new address map entry to hold the result.
9662                  *      Fill in the fields from the appropriate source entries.
9663                  *      We must unlock the source map to do this if we need
9664                  *      to allocate a map entry.
9665                  */
9666                 if (new_entry == VM_MAP_ENTRY_NULL) {
9667                         version.main_timestamp = src_map->timestamp;
9668                         vm_map_unlock(src_map);
9669
9670                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9671
9672                         vm_map_lock(src_map);
9673                         if ((version.main_timestamp + 1) != src_map->timestamp) {
9674                                 if (!vm_map_lookup_entry(src_map, src_start,
9675                                                          &tmp_entry)) {
9676                                         RETURN(KERN_INVALID_ADDRESS);
9677                                 }
9678                                 if (!tmp_entry->is_sub_map)
9679                                         vm_map_clip_start(src_map, tmp_entry, src_start);
9680                                 continue; /* restart w/ new tmp_entry */
9681                         }
9682                 }
9683
9684                 /*
9685                  *      Verify that the region can be read.
9686                  */
9687                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
9688                      !use_maxprot) ||
9689                     (src_entry->max_protection & VM_PROT_READ) == 0)
9690                         RETURN(KERN_PROTECTION_FAILURE);
9691
9692                 /*
9693                  *      Clip against the endpoints of the entire region.
9694                  */
9695
9696                 vm_map_clip_end(src_map, src_entry, src_end);
9697
9698                 src_size = src_entry->vme_end - src_start;
9699                 src_object = VME_OBJECT(src_entry);
9700                 src_offset = VME_OFFSET(src_entry);
9701                 was_wired = (src_entry->wired_count != 0);
9702
9703                 vm_map_entry_copy(new_entry, src_entry);
9704                 if (new_entry->is_sub_map) {
9705                         /* clr address space specifics */
9706                         new_entry->use_pmap = FALSE;
9707                 }
9708
9709                 /*
9710                  *      Attempt non-blocking copy-on-write optimizations.
9711                  */
9712
9713                 if (src_destroy &&
9714                     (src_object == VM_OBJECT_NULL ||
9715                      (src_object->internal &&
9716                       src_object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
9717                       !src_object->true_share &&
9718                       !map_share))) {
9719                         /*
9720                          * If we are destroying the source, and the object
9721                          * is internal, we can move the object reference
9722                          * from the source to the copy.  The copy is
9723                          * copy-on-write only if the source is.
9724                          * We make another reference to the object, because
9725                          * destroying the source entry will deallocate it.
9726                          */
9727                         vm_object_reference(src_object);
9728
9729                         /*
9730                          * Copy is always unwired.  vm_map_copy_entry
9731                          * set its wired count to zero.
9732                          */
9733
9734                         goto CopySuccessful;
9735                 }
9736
9737
9738         RestartCopy:
9739                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
9740                     src_object, new_entry, VME_OBJECT(new_entry),
9741                     was_wired, 0);
9742                 if ((src_object == VM_OBJECT_NULL ||
9743                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9744                     vm_object_copy_quickly(
9745                             &VME_OBJECT(new_entry),
9746                             src_offset,
9747                             src_size,
9748                             &src_needs_copy,
9749                             &new_entry_needs_copy)) {
9750
9751                         new_entry->needs_copy = new_entry_needs_copy;
9752
9753                         /*
9754                          *      Handle copy-on-write obligations
9755                          */
9756
9757                         if (src_needs_copy && !tmp_entry->needs_copy) {
9758                                 vm_prot_t prot;
9759
9760                                 prot = src_entry->protection & ~VM_PROT_WRITE;
9761
9762                                 if (override_nx(src_map, VME_ALIAS(src_entry))
9763                                     && prot)
9764                                         prot |= VM_PROT_EXECUTE;
9765
9766                                 vm_object_pmap_protect(
9767                                         src_object,
9768                                         src_offset,
9769                                         src_size,
9770                                         (src_entry->is_shared ?
9771                                          PMAP_NULL
9772                                          : src_map->pmap),
9773                                         src_entry->vme_start,
9774                                         prot);
9775
9776                                 assert(tmp_entry->wired_count == 0);
9777                                 tmp_entry->needs_copy = TRUE;
9778                         }
9779
9780                         /*
9781                          *      The map has never been unlocked, so it's safe
9782                          *      to move to the next entry rather than doing
9783                          *      another lookup.
9784                          */
9785
9786                         goto CopySuccessful;
9787                 }
9788
9789                 /*
9790                  *      Take an object reference, so that we may
9791                  *      release the map lock(s).
9792                  */
9793
9794                 assert(src_object != VM_OBJECT_NULL);
9795                 vm_object_reference(src_object);
9796
9797                 /*
9798                  *      Record the timestamp for later verification.
9799                  *      Unlock the map.
9800                  */
9801
9802                 version.main_timestamp = src_map->timestamp;
9803                 vm_map_unlock(src_map); /* Increments timestamp once! */
9804
9805                 /*
9806                  *      Perform the copy
9807                  */
9808
9809                 if (was_wired) {
9810                 CopySlowly:
9811                         vm_object_lock(src_object);
9812                         result = vm_object_copy_slowly(
9813                                 src_object,
9814                                 src_offset,
9815                                 src_size,
9816                                 THREAD_UNINT,
9817                                 &VME_OBJECT(new_entry));
9818                         VME_OFFSET_SET(new_entry, 0);
9819                         new_entry->needs_copy = FALSE;
9820
9821                 }
9822                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9823                          (tmp_entry->is_shared  || map_share)) {
9824                         vm_object_t new_object;
9825
9826                         vm_object_lock_shared(src_object);
9827                         new_object = vm_object_copy_delayed(
9828                                 src_object,
9829                                 src_offset,
9830                                 src_size,
9831                                 TRUE);
9832                         if (new_object == VM_OBJECT_NULL)
9833                                 goto CopySlowly;
9834
9835                         VME_OBJECT_SET(new_entry, new_object);
9836                         assert(new_entry->wired_count == 0);
9837                         new_entry->needs_copy = TRUE;
9838                         assert(!new_entry->iokit_acct);
9839                         assert(new_object->purgable == VM_PURGABLE_DENY);
9840                         new_entry->use_pmap = TRUE;
9841                         result = KERN_SUCCESS;
9842
9843                 } else {
9844                         vm_object_offset_t new_offset;
9845                         new_offset = VME_OFFSET(new_entry);
9846                         result = vm_object_copy_strategically(src_object,
9847                                                               src_offset,
9848                                                               src_size,
9849                                                               &VME_OBJECT(new_entry),
9850                                                               &new_offset,
9851                                                               &new_entry_needs_copy);
9852                         if (new_offset != VME_OFFSET(new_entry)) {
9853                                 VME_OFFSET_SET(new_entry, new_offset);
9854                         }
9855
9856                         new_entry->needs_copy = new_entry_needs_copy;
9857                 }
9858
9859                 if (result == KERN_SUCCESS &&
9860                     preserve_purgeable &&
9861                     src_object->purgable != VM_PURGABLE_DENY) {
9862                         vm_object_t     new_object;
9863
9864                         new_object = VME_OBJECT(new_entry);
9865                         assert(new_object != src_object);
9866                         vm_object_lock(new_object);
9867                         assert(new_object->ref_count == 1);
9868                         assert(new_object->shadow == VM_OBJECT_NULL);
9869                         assert(new_object->copy == VM_OBJECT_NULL);
9870                         assert(new_object->vo_purgeable_owner == NULL);
9871
9872                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
9873                         new_object->true_share = TRUE;
9874                         /* start as non-volatile with no owner... */
9875                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
9876                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
9877                         /* ... and move to src_object's purgeable state */
9878                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
9879                                 int state;
9880                                 state = src_object->purgable;
9881                                 vm_object_purgable_control(
9882                                         new_object,
9883                                         VM_PURGABLE_SET_STATE,
9884                                         &state);
9885                         }
9886                         vm_object_unlock(new_object);
9887                         new_object = VM_OBJECT_NULL;
9888                 }
9889
9890                 if (result != KERN_SUCCESS &&
9891                     result != KERN_MEMORY_RESTART_COPY) {
9892                         vm_map_lock(src_map);
9893                         RETURN(result);
9894                 }
9895
9896                 /*
9897                  *      Throw away the extra reference
9898                  */
9899
9900                 vm_object_deallocate(src_object);
9901
9902                 /*
9903                  *      Verify that the map has not substantially
9904                  *      changed while the copy was being made.
9905                  */
9906
9907                 vm_map_lock(src_map);
9908
9909                 if ((version.main_timestamp + 1) == src_map->timestamp)
9910                         goto VerificationSuccessful;
9911
9912                 /*
9913                  *      Simple version comparison failed.
9914                  *
9915                  *      Retry the lookup and verify that the
9916                  *      same object/offset are still present.
9917                  *
9918                  *      [Note: a memory manager that colludes with
9919                  *      the calling task can detect that we have
9920                  *      cheated.  While the map was unlocked, the
9921                  *      mapping could have been changed and restored.]
9922                  */
9923
9924                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9925                         if (result != KERN_MEMORY_RESTART_COPY) {
9926                                 vm_object_deallocate(VME_OBJECT(new_entry));
9927                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
9928                                 assert(!new_entry->iokit_acct);
9929                                 new_entry->use_pmap = TRUE;
9930                         }
9931                         RETURN(KERN_INVALID_ADDRESS);
9932                 }
9933
9934                 src_entry = tmp_entry;
9935                 vm_map_clip_start(src_map, src_entry, src_start);
9936
9937                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9938                      !use_maxprot) ||
9939                     ((src_entry->max_protection & VM_PROT_READ) == 0))
9940                         goto VerificationFailed;
9941
9942                 if (src_entry->vme_end < new_entry->vme_end) {
9943                         /*
9944                          * This entry might have been shortened
9945                          * (vm_map_clip_end) or been replaced with
9946                          * an entry that ends closer to "src_start"
9947                          * than before.
9948                          * Adjust "new_entry" accordingly; copying
9949                          * less memory would be correct but we also
9950                          * redo the copy (see below) if the new entry
9951                          * no longer points at the same object/offset.
9952                          */
9953                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9954                                                    VM_MAP_COPY_PAGE_MASK(copy)));
9955                         new_entry->vme_end = src_entry->vme_end;
9956                         src_size = new_entry->vme_end - src_start;
9957                 } else if (src_entry->vme_end > new_entry->vme_end) {
9958                         /*
9959                          * This entry might have been extended
9960                          * (vm_map_entry_simplify() or coalesce)
9961                          * or been replaced with an entry that ends farther
9962                          * from "src_start" than before.
9963                          *
9964                          * We've called vm_object_copy_*() only on
9965                          * the previous <start:end> range, so we can't
9966                          * just extend new_entry.  We have to re-do
9967                          * the copy based on the new entry as if it was
9968                          * pointing at a different object/offset (see
9969                          * "Verification failed" below).
9970                          */
9971                 }
9972
9973                 if ((VME_OBJECT(src_entry) != src_object) ||
9974                     (VME_OFFSET(src_entry) != src_offset) ||
9975                     (src_entry->vme_end > new_entry->vme_end)) {
9976
9977                         /*
9978                          *      Verification failed.
9979                          *
9980                          *      Start over with this top-level entry.
9981                          */
9982
9983                 VerificationFailed: ;
9984
9985                         vm_object_deallocate(VME_OBJECT(new_entry));
9986                         tmp_entry = src_entry;
9987                         continue;
9988                 }
9989
9990                 /*
9991                  *      Verification succeeded.
9992                  */
9993
9994         VerificationSuccessful: ;
9995
9996                 if (result == KERN_MEMORY_RESTART_COPY)
9997                         goto RestartCopy;
9998
9999                 /*
10000                  *      Copy succeeded.
10001                  */
10002
10003         CopySuccessful: ;
10004
10005                 /*
10006                  *      Link in the new copy entry.
10007                  */
10008
10009                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
10010                                        new_entry);
10011
10012                 /*
10013                  *      Determine whether the entire region
10014                  *      has been copied.
10015                  */
10016                 src_base = src_start;
10017                 src_start = new_entry->vme_end;
10018                 new_entry = VM_MAP_ENTRY_NULL;
10019                 while ((src_start >= src_end) && (src_end != 0)) {
10020                         submap_map_t    *ptr;
10021
10022                         if (src_map == base_map) {
10023                                 /* back to the top */
10024                                 break;
10025                         }
10026
10027                         ptr = parent_maps;
10028                         assert(ptr != NULL);
10029                         parent_maps = parent_maps->next;
10030
10031                         /* fix up the damage we did in that submap */
10032                         vm_map_simplify_range(src_map,
10033                                               src_base,
10034                                               src_end);
10035
10036                         vm_map_unlock(src_map);
10037                         vm_map_deallocate(src_map);
10038                         vm_map_lock(ptr->parent_map);
10039                         src_map = ptr->parent_map;
10040                         src_base = ptr->base_start;
10041                         src_start = ptr->base_start + ptr->base_len;
10042                         src_end = ptr->base_end;
10043                         if (!vm_map_lookup_entry(src_map,
10044                                                  src_start,
10045                                                  &tmp_entry) &&
10046                             (src_end > src_start)) {
10047                                 RETURN(KERN_INVALID_ADDRESS);
10048                         }
10049                         kfree(ptr, sizeof(submap_map_t));
10050                         if (parent_maps == NULL)
10051                                 map_share = FALSE;
10052                         src_entry = tmp_entry->vme_prev;
10053                 }
10054
10055                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
10056                     (src_start >= src_addr + len) &&
10057                     (src_addr + len != 0)) {
10058                         /*
10059                          * Stop copying now, even though we haven't reached
10060                          * "src_end".  We'll adjust the end of the last copy
10061                          * entry at the end, if needed.
10062                          *
10063                          * If src_map's aligment is different from the
10064                          * system's page-alignment, there could be
10065                          * extra non-map-aligned map entries between
10066                          * the original (non-rounded) "src_addr + len"
10067                          * and the rounded "src_end".
10068                          * We do not want to copy those map entries since
10069                          * they're not part of the copied range.
10070                          */
10071                         break;
10072                 }
10073
10074                 if ((src_start >= src_end) && (src_end != 0))
10075                         break;
10076
10077                 /*
10078                  *      Verify that there are no gaps in the region
10079                  */
10080
10081                 tmp_entry = src_entry->vme_next;
10082                 if ((tmp_entry->vme_start != src_start) ||
10083                     (tmp_entry == vm_map_to_entry(src_map))) {
10084                         RETURN(KERN_INVALID_ADDRESS);
10085                 }
10086         }
10087
10088         /*
10089          * If the source should be destroyed, do it now, since the
10090          * copy was successful.
10091          */
10092         if (src_destroy) {
10093                 (void) vm_map_delete(
10094                         src_map,
10095                         vm_map_trunc_page(src_addr,
10096                                           VM_MAP_PAGE_MASK(src_map)),
10097                         src_end,
10098                         ((src_map == kernel_map) ?
10099                          VM_MAP_REMOVE_KUNWIRE :
10100                          VM_MAP_NO_FLAGS),
10101                         VM_MAP_NULL);
10102         } else {
10103                 /* fix up the damage we did in the base map */
10104                 vm_map_simplify_range(
10105                         src_map,
10106                         vm_map_trunc_page(src_addr,
10107                                           VM_MAP_PAGE_MASK(src_map)),
10108                         vm_map_round_page(src_end,
10109                                           VM_MAP_PAGE_MASK(src_map)));
10110         }
10111
10112         vm_map_unlock(src_map);
10113
10114         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
10115                 vm_map_offset_t original_start, original_offset, original_end;
10116
10117                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
10118
10119                 /* adjust alignment of first copy_entry's "vme_start" */
10120                 tmp_entry = vm_map_copy_first_entry(copy);
10121                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10122                         vm_map_offset_t adjustment;
10123
10124                         original_start = tmp_entry->vme_start;
10125                         original_offset = VME_OFFSET(tmp_entry);
10126
10127                         /* map-align the start of the first copy entry... */
10128                         adjustment = (tmp_entry->vme_start -
10129                                       vm_map_trunc_page(
10130                                               tmp_entry->vme_start,
10131                                               VM_MAP_PAGE_MASK(src_map)));
10132                         tmp_entry->vme_start -= adjustment;
10133                         VME_OFFSET_SET(tmp_entry,
10134                                        VME_OFFSET(tmp_entry) - adjustment);
10135                         copy_addr -= adjustment;
10136                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
10137                         /* ... adjust for mis-aligned start of copy range */
10138                         adjustment =
10139                                 (vm_map_trunc_page(copy->offset,
10140                                                    PAGE_MASK) -
10141                                  vm_map_trunc_page(copy->offset,
10142                                                    VM_MAP_PAGE_MASK(src_map)));
10143                         if (adjustment) {
10144                                 assert(page_aligned(adjustment));
10145                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10146                                 tmp_entry->vme_start += adjustment;
10147                                 VME_OFFSET_SET(tmp_entry,
10148                                                (VME_OFFSET(tmp_entry) +
10149                                                 adjustment));
10150                                 copy_addr += adjustment;
10151                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10152                         }
10153
10154                         /*
10155                          * Assert that the adjustments haven't exposed
10156                          * more than was originally copied...
10157                          */
10158                         assert(tmp_entry->vme_start >= original_start);
10159                         assert(VME_OFFSET(tmp_entry) >= original_offset);
10160                         /*
10161                          * ... and that it did not adjust outside of a
10162                          * a single 16K page.
10163                          */
10164                         assert(vm_map_trunc_page(tmp_entry->vme_start,
10165                                                  VM_MAP_PAGE_MASK(src_map)) ==
10166                                vm_map_trunc_page(original_start,
10167                                                  VM_MAP_PAGE_MASK(src_map)));
10168                 }
10169
10170                 /* adjust alignment of last copy_entry's "vme_end" */
10171                 tmp_entry = vm_map_copy_last_entry(copy);
10172                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10173                         vm_map_offset_t adjustment;
10174
10175                         original_end = tmp_entry->vme_end;
10176
10177                         /* map-align the end of the last copy entry... */
10178                         tmp_entry->vme_end =
10179                                 vm_map_round_page(tmp_entry->vme_end,
10180                                                   VM_MAP_PAGE_MASK(src_map));
10181                         /* ... adjust for mis-aligned end of copy range */
10182                         adjustment =
10183                                 (vm_map_round_page((copy->offset +
10184                                                     copy->size),
10185                                                    VM_MAP_PAGE_MASK(src_map)) -
10186                                  vm_map_round_page((copy->offset +
10187                                                     copy->size),
10188                                                    PAGE_MASK));
10189                         if (adjustment) {
10190                                 assert(page_aligned(adjustment));
10191                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10192                                 tmp_entry->vme_end -= adjustment;
10193                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10194                         }
10195
10196                         /*
10197                          * Assert that the adjustments haven't exposed
10198                          * more than was originally copied...
10199                          */
10200                         assert(tmp_entry->vme_end <= original_end);
10201                         /*
10202                          * ... and that it did not adjust outside of a
10203                          * a single 16K page.
10204                          */
10205                         assert(vm_map_round_page(tmp_entry->vme_end,
10206                                                  VM_MAP_PAGE_MASK(src_map)) ==
10207                                vm_map_round_page(original_end,
10208                                                  VM_MAP_PAGE_MASK(src_map)));
10209                 }
10210         }
10211
10212         /* Fix-up start and end points in copy.  This is necessary */
10213         /* when the various entries in the copy object were picked */
10214         /* up from different sub-maps */
10215
10216         tmp_entry = vm_map_copy_first_entry(copy);
10217         copy_size = 0; /* compute actual size */
10218         while (tmp_entry != vm_map_copy_to_entry(copy)) {
10219                 assert(VM_MAP_PAGE_ALIGNED(
10220                                copy_addr + (tmp_entry->vme_end -
10221                                             tmp_entry->vme_start),
10222                                VM_MAP_COPY_PAGE_MASK(copy)));
10223                 assert(VM_MAP_PAGE_ALIGNED(
10224                                copy_addr,
10225                                VM_MAP_COPY_PAGE_MASK(copy)));
10226
10227                 /*
10228                  * The copy_entries will be injected directly into the
10229                  * destination map and might not be "map aligned" there...
10230                  */
10231                 tmp_entry->map_aligned = FALSE;
10232
10233                 tmp_entry->vme_end = copy_addr +
10234                         (tmp_entry->vme_end - tmp_entry->vme_start);
10235                 tmp_entry->vme_start = copy_addr;
10236                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10237                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
10238                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
10239                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
10240         }
10241
10242         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
10243             copy_size < copy->size) {
10244                 /*
10245                  * The actual size of the VM map copy is smaller than what
10246                  * was requested by the caller.  This must be because some
10247                  * PAGE_SIZE-sized pages are missing at the end of the last
10248                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
10249                  * The caller might not have been aware of those missing
10250                  * pages and might not want to be aware of it, which is
10251                  * fine as long as they don't try to access (and crash on)
10252                  * those missing pages.
10253                  * Let's adjust the size of the "copy", to avoid failing
10254                  * in vm_map_copyout() or vm_map_copy_overwrite().
10255                  */
10256                 assert(vm_map_round_page(copy_size,
10257                                          VM_MAP_PAGE_MASK(src_map)) ==
10258                        vm_map_round_page(copy->size,
10259                                          VM_MAP_PAGE_MASK(src_map)));
10260                 copy->size = copy_size;
10261         }
10262
10263         *copy_result = copy;
10264         return(KERN_SUCCESS);
10265
10266 #undef  RETURN
10267 }
10268
10269 kern_return_t
10270 vm_map_copy_extract(
10271         vm_map_t                src_map,
10272         vm_map_address_t        src_addr,
10273         vm_map_size_t           len,
10274         vm_map_copy_t           *copy_result,   /* OUT */
10275         vm_prot_t               *cur_prot,      /* OUT */
10276         vm_prot_t               *max_prot)
10277 {
10278         vm_map_offset_t src_start, src_end;
10279         vm_map_copy_t   copy;
10280         kern_return_t   kr;
10281
10282         /*
10283          *      Check for copies of zero bytes.
10284          */
10285
10286         if (len == 0) {
10287                 *copy_result = VM_MAP_COPY_NULL;
10288                 return(KERN_SUCCESS);
10289         }
10290
10291         /*
10292          *      Check that the end address doesn't overflow
10293          */
10294         src_end = src_addr + len;
10295         if (src_end < src_addr)
10296                 return KERN_INVALID_ADDRESS;
10297
10298         /*
10299          *      Compute (page aligned) start and end of region
10300          */
10301         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10302         src_end = vm_map_round_page(src_end, PAGE_MASK);
10303
10304         /*
10305          *      Allocate a header element for the list.
10306          *
10307          *      Use the start and end in the header to
10308          *      remember the endpoints prior to rounding.
10309          */
10310
10311         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10312         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10313         vm_map_copy_first_entry(copy) =
10314                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10315         copy->type = VM_MAP_COPY_ENTRY_LIST;
10316         copy->cpy_hdr.nentries = 0;
10317         copy->cpy_hdr.entries_pageable = TRUE;
10318
10319         vm_map_store_init(&copy->cpy_hdr);
10320
10321         copy->offset = 0;
10322         copy->size = len;
10323
10324         kr = vm_map_remap_extract(src_map,
10325                                   src_addr,
10326                                   len,
10327                                   FALSE, /* copy */
10328                                   &copy->cpy_hdr,
10329                                   cur_prot,
10330                                   max_prot,
10331                                   VM_INHERIT_SHARE,
10332                                   TRUE, /* pageable */
10333                                   FALSE); /* same_map */
10334         if (kr != KERN_SUCCESS) {
10335                 vm_map_copy_discard(copy);
10336                 return kr;
10337         }
10338
10339         *copy_result = copy;
10340         return KERN_SUCCESS;
10341 }
10342
10343 /*
10344  *      vm_map_copyin_object:
10345  *
10346  *      Create a copy object from an object.
10347  *      Our caller donates an object reference.
10348  */
10349
10350 kern_return_t
10351 vm_map_copyin_object(
10352         vm_object_t             object,
10353         vm_object_offset_t      offset, /* offset of region in object */
10354         vm_object_size_t        size,   /* size of region in object */
10355         vm_map_copy_t   *copy_result)   /* OUT */
10356 {
10357         vm_map_copy_t   copy;           /* Resulting copy */
10358
10359         /*
10360          *      We drop the object into a special copy object
10361          *      that contains the object directly.
10362          */
10363
10364         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10365         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10366         copy->type = VM_MAP_COPY_OBJECT;
10367         copy->cpy_object = object;
10368         copy->offset = offset;
10369         copy->size = size;
10370
10371         *copy_result = copy;
10372         return(KERN_SUCCESS);
10373 }
10374
10375 static void
10376 vm_map_fork_share(
10377         vm_map_t        old_map,
10378         vm_map_entry_t  old_entry,
10379         vm_map_t        new_map)
10380 {
10381         vm_object_t     object;
10382         vm_map_entry_t  new_entry;
10383
10384         /*
10385          *      New sharing code.  New map entry
10386          *      references original object.  Internal
10387          *      objects use asynchronous copy algorithm for
10388          *      future copies.  First make sure we have
10389          *      the right object.  If we need a shadow,
10390          *      or someone else already has one, then
10391          *      make a new shadow and share it.
10392          */
10393
10394         object = VME_OBJECT(old_entry);
10395         if (old_entry->is_sub_map) {
10396                 assert(old_entry->wired_count == 0);
10397 #ifndef NO_NESTED_PMAP
10398                 if(old_entry->use_pmap) {
10399                         kern_return_t   result;
10400
10401                         result = pmap_nest(new_map->pmap,
10402                                            (VME_SUBMAP(old_entry))->pmap,
10403                                            (addr64_t)old_entry->vme_start,
10404                                            (addr64_t)old_entry->vme_start,
10405                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
10406                         if(result)
10407                                 panic("vm_map_fork_share: pmap_nest failed!");
10408                 }
10409 #endif  /* NO_NESTED_PMAP */
10410         } else if (object == VM_OBJECT_NULL) {
10411                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
10412                                                             old_entry->vme_start));
10413                 VME_OFFSET_SET(old_entry, 0);
10414                 VME_OBJECT_SET(old_entry, object);
10415                 old_entry->use_pmap = TRUE;
10416                 assert(!old_entry->needs_copy);
10417         } else if (object->copy_strategy !=
10418                    MEMORY_OBJECT_COPY_SYMMETRIC) {
10419
10420                 /*
10421                  *      We are already using an asymmetric
10422                  *      copy, and therefore we already have
10423                  *      the right object.
10424                  */
10425
10426                 assert(! old_entry->needs_copy);
10427         }
10428         else if (old_entry->needs_copy ||       /* case 1 */
10429                  object->shadowed ||            /* case 2 */
10430                  (!object->true_share &&        /* case 3 */
10431                   !old_entry->is_shared &&
10432                   (object->vo_size >
10433                    (vm_map_size_t)(old_entry->vme_end -
10434                                    old_entry->vme_start)))) {
10435
10436                 /*
10437                  *      We need to create a shadow.
10438                  *      There are three cases here.
10439                  *      In the first case, we need to
10440                  *      complete a deferred symmetrical
10441                  *      copy that we participated in.
10442                  *      In the second and third cases,
10443                  *      we need to create the shadow so
10444                  *      that changes that we make to the
10445                  *      object do not interfere with
10446                  *      any symmetrical copies which
10447                  *      have occured (case 2) or which
10448                  *      might occur (case 3).
10449                  *
10450                  *      The first case is when we had
10451                  *      deferred shadow object creation
10452                  *      via the entry->needs_copy mechanism.
10453                  *      This mechanism only works when
10454                  *      only one entry points to the source
10455                  *      object, and we are about to create
10456                  *      a second entry pointing to the
10457                  *      same object. The problem is that
10458                  *      there is no way of mapping from
10459                  *      an object to the entries pointing
10460                  *      to it. (Deferred shadow creation
10461                  *      works with one entry because occurs
10462                  *      at fault time, and we walk from the
10463                  *      entry to the object when handling
10464                  *      the fault.)
10465                  *
10466                  *      The second case is when the object
10467                  *      to be shared has already been copied
10468                  *      with a symmetric copy, but we point
10469                  *      directly to the object without
10470                  *      needs_copy set in our entry. (This
10471                  *      can happen because different ranges
10472                  *      of an object can be pointed to by
10473                  *      different entries. In particular,
10474                  *      a single entry pointing to an object
10475                  *      can be split by a call to vm_inherit,
10476                  *      which, combined with task_create, can
10477                  *      result in the different entries
10478                  *      having different needs_copy values.)
10479                  *      The shadowed flag in the object allows
10480                  *      us to detect this case. The problem
10481                  *      with this case is that if this object
10482                  *      has or will have shadows, then we
10483                  *      must not perform an asymmetric copy
10484                  *      of this object, since such a copy
10485                  *      allows the object to be changed, which
10486                  *      will break the previous symmetrical
10487                  *      copies (which rely upon the object
10488                  *      not changing). In a sense, the shadowed
10489                  *      flag says "don't change this object".
10490                  *      We fix this by creating a shadow
10491                  *      object for this object, and sharing
10492                  *      that. This works because we are free
10493                  *      to change the shadow object (and thus
10494                  *      to use an asymmetric copy strategy);
10495                  *      this is also semantically correct,
10496                  *      since this object is temporary, and
10497                  *      therefore a copy of the object is
10498                  *      as good as the object itself. (This
10499                  *      is not true for permanent objects,
10500                  *      since the pager needs to see changes,
10501                  *      which won't happen if the changes
10502                  *      are made to a copy.)
10503                  *
10504                  *      The third case is when the object
10505                  *      to be shared has parts sticking
10506                  *      outside of the entry we're working
10507                  *      with, and thus may in the future
10508                  *      be subject to a symmetrical copy.
10509                  *      (This is a preemptive version of
10510                  *      case 2.)
10511                  */
10512                 VME_OBJECT_SHADOW(old_entry,
10513                                   (vm_map_size_t) (old_entry->vme_end -
10514                                                    old_entry->vme_start));
10515
10516                 /*
10517                  *      If we're making a shadow for other than
10518                  *      copy on write reasons, then we have
10519                  *      to remove write permission.
10520                  */
10521
10522                 if (!old_entry->needs_copy &&
10523                     (old_entry->protection & VM_PROT_WRITE)) {
10524                         vm_prot_t prot;
10525
10526                         prot = old_entry->protection & ~VM_PROT_WRITE;
10527
10528                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
10529                                 prot |= VM_PROT_EXECUTE;
10530
10531                         if (old_map->mapped_in_other_pmaps) {
10532                                 vm_object_pmap_protect(
10533                                         VME_OBJECT(old_entry),
10534                                         VME_OFFSET(old_entry),
10535                                         (old_entry->vme_end -
10536                                          old_entry->vme_start),
10537                                         PMAP_NULL,
10538                                         old_entry->vme_start,
10539                                         prot);
10540                         } else {
10541                                 pmap_protect(old_map->pmap,
10542                                              old_entry->vme_start,
10543                                              old_entry->vme_end,
10544                                              prot);
10545                         }
10546                 }
10547
10548                 old_entry->needs_copy = FALSE;
10549                 object = VME_OBJECT(old_entry);
10550         }
10551
10552
10553         /*
10554          *      If object was using a symmetric copy strategy,
10555          *      change its copy strategy to the default
10556          *      asymmetric copy strategy, which is copy_delay
10557          *      in the non-norma case and copy_call in the
10558          *      norma case. Bump the reference count for the
10559          *      new entry.
10560          */
10561
10562         if(old_entry->is_sub_map) {
10563                 vm_map_lock(VME_SUBMAP(old_entry));
10564                 vm_map_reference(VME_SUBMAP(old_entry));
10565                 vm_map_unlock(VME_SUBMAP(old_entry));
10566         } else {
10567                 vm_object_lock(object);
10568                 vm_object_reference_locked(object);
10569                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10570                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10571                 }
10572                 vm_object_unlock(object);
10573         }
10574
10575         /*
10576          *      Clone the entry, using object ref from above.
10577          *      Mark both entries as shared.
10578          */
10579
10580         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10581                                                           * map or descendants */
10582         vm_map_entry_copy(new_entry, old_entry);
10583         old_entry->is_shared = TRUE;
10584         new_entry->is_shared = TRUE;
10585
10586         /*
10587          *      If old entry's inheritence is VM_INHERIT_NONE,
10588          *      the new entry is for corpse fork, remove the
10589          *      write permission from the new entry.
10590          */
10591         if (old_entry->inheritance == VM_INHERIT_NONE) {
10592
10593                 new_entry->protection &= ~VM_PROT_WRITE;
10594                 new_entry->max_protection &= ~VM_PROT_WRITE;
10595         }
10596
10597         /*
10598          *      Insert the entry into the new map -- we
10599          *      know we're inserting at the end of the new
10600          *      map.
10601          */
10602
10603         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
10604
10605         /*
10606          *      Update the physical map
10607          */
10608
10609         if (old_entry->is_sub_map) {
10610                 /* Bill Angell pmap support goes here */
10611         } else {
10612                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
10613                           old_entry->vme_end - old_entry->vme_start,
10614                           old_entry->vme_start);
10615         }
10616 }
10617
10618 static boolean_t
10619 vm_map_fork_copy(
10620         vm_map_t        old_map,
10621         vm_map_entry_t  *old_entry_p,
10622         vm_map_t        new_map,
10623         int             vm_map_copyin_flags)
10624 {
10625         vm_map_entry_t old_entry = *old_entry_p;
10626         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10627         vm_map_offset_t start = old_entry->vme_start;
10628         vm_map_copy_t copy;
10629         vm_map_entry_t last = vm_map_last_entry(new_map);
10630
10631         vm_map_unlock(old_map);
10632         /*
10633          *      Use maxprot version of copyin because we
10634          *      care about whether this memory can ever
10635          *      be accessed, not just whether it's accessible
10636          *      right now.
10637          */
10638         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
10639         if (vm_map_copyin_internal(old_map, start, entry_size,
10640                                    vm_map_copyin_flags, &copy)
10641             != KERN_SUCCESS) {
10642                 /*
10643                  *      The map might have changed while it
10644                  *      was unlocked, check it again.  Skip
10645                  *      any blank space or permanently
10646                  *      unreadable region.
10647                  */
10648                 vm_map_lock(old_map);
10649                 if (!vm_map_lookup_entry(old_map, start, &last) ||
10650                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
10651                         last = last->vme_next;
10652                 }
10653                 *old_entry_p = last;
10654
10655                 /*
10656                  * XXX  For some error returns, want to
10657                  * XXX  skip to the next element.  Note
10658                  *      that INVALID_ADDRESS and
10659                  *      PROTECTION_FAILURE are handled above.
10660                  */
10661
10662                 return FALSE;
10663         }
10664
10665         /*
10666          *      Insert the copy into the new map
10667          */
10668
10669         vm_map_copy_insert(new_map, last, copy);
10670
10671         /*
10672          *      Pick up the traversal at the end of
10673          *      the copied region.
10674          */
10675
10676         vm_map_lock(old_map);
10677         start += entry_size;
10678         if (! vm_map_lookup_entry(old_map, start, &last)) {
10679                 last = last->vme_next;
10680         } else {
10681                 if (last->vme_start == start) {
10682                         /*
10683                          * No need to clip here and we don't
10684                          * want to cause any unnecessary
10685                          * unnesting...
10686                          */
10687                 } else {
10688                         vm_map_clip_start(old_map, last, start);
10689                 }
10690         }
10691         *old_entry_p = last;
10692
10693         return TRUE;
10694 }
10695
10696 /*
10697  *      vm_map_fork:
10698  *
10699  *      Create and return a new map based on the old
10700  *      map, according to the inheritance values on the
10701  *      regions in that map and the options.
10702  *
10703  *      The source map must not be locked.
10704  */
10705 vm_map_t
10706 vm_map_fork(
10707         ledger_t        ledger,
10708         vm_map_t        old_map,
10709         int             options)
10710 {
10711         pmap_t          new_pmap;
10712         vm_map_t        new_map;
10713         vm_map_entry_t  old_entry;
10714         vm_map_size_t   new_size = 0, entry_size;
10715         vm_map_entry_t  new_entry;
10716         boolean_t       src_needs_copy;
10717         boolean_t       new_entry_needs_copy;
10718         boolean_t       pmap_is64bit;
10719         int             vm_map_copyin_flags;
10720
10721         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
10722                         VM_MAP_FORK_PRESERVE_PURGEABLE)) {
10723                 /* unsupported option */
10724                 return VM_MAP_NULL;
10725         }
10726
10727         pmap_is64bit =
10728 #if defined(__i386__) || defined(__x86_64__)
10729                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
10730 #else
10731 #error Unknown architecture.
10732 #endif
10733
10734         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
10735
10736         vm_map_reference_swap(old_map);
10737         vm_map_lock(old_map);
10738
10739         new_map = vm_map_create(new_pmap,
10740                                 old_map->min_offset,
10741                                 old_map->max_offset,
10742                                 old_map->hdr.entries_pageable);
10743         vm_commit_pagezero_status(new_map);
10744         /* inherit the parent map's page size */
10745         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
10746         for (
10747                 old_entry = vm_map_first_entry(old_map);
10748                 old_entry != vm_map_to_entry(old_map);
10749                 ) {
10750
10751                 entry_size = old_entry->vme_end - old_entry->vme_start;
10752
10753                 switch (old_entry->inheritance) {
10754                 case VM_INHERIT_NONE:
10755                         /*
10756                          * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
10757                          * is not passed or it is backed by a device pager.
10758                          */
10759                         if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
10760                                 (!old_entry->is_sub_map &&
10761                                 VME_OBJECT(old_entry) != NULL &&
10762                                 VME_OBJECT(old_entry)->pager != NULL &&
10763                                 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
10764                                 break;
10765                         }
10766                         /* FALLTHROUGH */
10767
10768                 case VM_INHERIT_SHARE:
10769                         vm_map_fork_share(old_map, old_entry, new_map);
10770                         new_size += entry_size;
10771                         break;
10772
10773                 case VM_INHERIT_COPY:
10774
10775                         /*
10776                          *      Inline the copy_quickly case;
10777                          *      upon failure, fall back on call
10778                          *      to vm_map_fork_copy.
10779                          */
10780
10781                         if(old_entry->is_sub_map)
10782                                 break;
10783                         if ((old_entry->wired_count != 0) ||
10784                             ((VME_OBJECT(old_entry) != NULL) &&
10785                              (VME_OBJECT(old_entry)->true_share))) {
10786                                 goto slow_vm_map_fork_copy;
10787                         }
10788
10789                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
10790                         vm_map_entry_copy(new_entry, old_entry);
10791                         if (new_entry->is_sub_map) {
10792                                 /* clear address space specifics */
10793                                 new_entry->use_pmap = FALSE;
10794                         }
10795
10796                         if (! vm_object_copy_quickly(
10797                                     &VME_OBJECT(new_entry),
10798                                     VME_OFFSET(old_entry),
10799                                     (old_entry->vme_end -
10800                                      old_entry->vme_start),
10801                                     &src_needs_copy,
10802                                     &new_entry_needs_copy)) {
10803                                 vm_map_entry_dispose(new_map, new_entry);
10804                                 goto slow_vm_map_fork_copy;
10805                         }
10806
10807                         /*
10808                          *      Handle copy-on-write obligations
10809                          */
10810
10811                         if (src_needs_copy && !old_entry->needs_copy) {
10812                                 vm_prot_t prot;
10813
10814                                 prot = old_entry->protection & ~VM_PROT_WRITE;
10815
10816                                 if (override_nx(old_map, VME_ALIAS(old_entry))
10817                                     && prot)
10818                                         prot |= VM_PROT_EXECUTE;
10819
10820                                 vm_object_pmap_protect(
10821                                         VME_OBJECT(old_entry),
10822                                         VME_OFFSET(old_entry),
10823                                         (old_entry->vme_end -
10824                                          old_entry->vme_start),
10825                                         ((old_entry->is_shared
10826                                           || old_map->mapped_in_other_pmaps)
10827                                          ? PMAP_NULL :
10828                                          old_map->pmap),
10829                                         old_entry->vme_start,
10830                                         prot);
10831
10832                                 assert(old_entry->wired_count == 0);
10833                                 old_entry->needs_copy = TRUE;
10834                         }
10835                         new_entry->needs_copy = new_entry_needs_copy;
10836
10837                         /*
10838                          *      Insert the entry at the end
10839                          *      of the map.
10840                          */
10841
10842                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
10843                                           new_entry);
10844                         new_size += entry_size;
10845                         break;
10846
10847                 slow_vm_map_fork_copy:
10848                         vm_map_copyin_flags = 0;
10849                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
10850                                 vm_map_copyin_flags |=
10851                                         VM_MAP_COPYIN_PRESERVE_PURGEABLE;
10852                         }
10853                         if (vm_map_fork_copy(old_map,
10854                                              &old_entry,
10855                                              new_map,
10856                                              vm_map_copyin_flags)) {
10857                                 new_size += entry_size;
10858                         }
10859                         continue;
10860                 }
10861                 old_entry = old_entry->vme_next;
10862         }
10863
10864
10865         new_map->size = new_size;
10866         vm_map_unlock(old_map);
10867         vm_map_deallocate(old_map);
10868
10869         return(new_map);
10870 }
10871
10872 /*
10873  * vm_map_exec:
10874  *
10875  *      Setup the "new_map" with the proper execution environment according
10876  *      to the type of executable (platform, 64bit, chroot environment).
10877  *      Map the comm page and shared region, etc...
10878  */
10879 kern_return_t
10880 vm_map_exec(
10881         vm_map_t        new_map,
10882         task_t          task,
10883         boolean_t       is64bit,
10884         void            *fsroot,
10885         cpu_type_t      cpu)
10886 {
10887         SHARED_REGION_TRACE_DEBUG(
10888                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
10889                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10890                  (void *)VM_KERNEL_ADDRPERM(new_map),
10891                  (void *)VM_KERNEL_ADDRPERM(task),
10892                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10893                  cpu));
10894         (void) vm_commpage_enter(new_map, task, is64bit);
10895         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
10896         SHARED_REGION_TRACE_DEBUG(
10897                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
10898                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10899                  (void *)VM_KERNEL_ADDRPERM(new_map),
10900                  (void *)VM_KERNEL_ADDRPERM(task),
10901                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10902                  cpu));
10903         return KERN_SUCCESS;
10904 }
10905
10906 /*
10907  *      vm_map_lookup_locked:
10908  *
10909  *      Finds the VM object, offset, and
10910  *      protection for a given virtual address in the
10911  *      specified map, assuming a page fault of the
10912  *      type specified.
10913  *
10914  *      Returns the (object, offset, protection) for
10915  *      this address, whether it is wired down, and whether
10916  *      this map has the only reference to the data in question.
10917  *      In order to later verify this lookup, a "version"
10918  *      is returned.
10919  *
10920  *      The map MUST be locked by the caller and WILL be
10921  *      locked on exit.  In order to guarantee the
10922  *      existence of the returned object, it is returned
10923  *      locked.
10924  *
10925  *      If a lookup is requested with "write protection"
10926  *      specified, the map may be changed to perform virtual
10927  *      copying operations, although the data referenced will
10928  *      remain the same.
10929  */
10930 kern_return_t
10931 vm_map_lookup_locked(
10932         vm_map_t                *var_map,       /* IN/OUT */
10933         vm_map_offset_t         vaddr,
10934         vm_prot_t               fault_type,
10935         int                     object_lock_type,
10936         vm_map_version_t        *out_version,   /* OUT */
10937         vm_object_t             *object,        /* OUT */
10938         vm_object_offset_t      *offset,        /* OUT */
10939         vm_prot_t               *out_prot,      /* OUT */
10940         boolean_t               *wired,         /* OUT */
10941         vm_object_fault_info_t  fault_info,     /* OUT */
10942         vm_map_t                *real_map)
10943 {
10944         vm_map_entry_t                  entry;
10945         vm_map_t                        map = *var_map;
10946         vm_map_t                        old_map = *var_map;
10947         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
10948         vm_map_offset_t                 cow_parent_vaddr = 0;
10949         vm_map_offset_t                 old_start = 0;
10950         vm_map_offset_t                 old_end = 0;
10951         vm_prot_t                       prot;
10952         boolean_t                       mask_protections;
10953         boolean_t                       force_copy;
10954         vm_prot_t                       original_fault_type;
10955
10956         /*
10957          * VM_PROT_MASK means that the caller wants us to use "fault_type"
10958          * as a mask against the mapping's actual protections, not as an
10959          * absolute value.
10960          */
10961         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
10962         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10963         fault_type &= VM_PROT_ALL;
10964         original_fault_type = fault_type;
10965
10966         *real_map = map;
10967
10968 RetryLookup:
10969         fault_type = original_fault_type;
10970
10971         /*
10972          *      If the map has an interesting hint, try it before calling
10973          *      full blown lookup routine.
10974          */
10975         entry = map->hint;
10976
10977         if ((entry == vm_map_to_entry(map)) ||
10978             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10979                 vm_map_entry_t  tmp_entry;
10980
10981                 /*
10982                  *      Entry was either not a valid hint, or the vaddr
10983                  *      was not contained in the entry, so do a full lookup.
10984                  */
10985                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10986                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10987                                 vm_map_unlock(cow_sub_map_parent);
10988                         if((*real_map != map)
10989                            && (*real_map != cow_sub_map_parent))
10990                                 vm_map_unlock(*real_map);
10991                         return KERN_INVALID_ADDRESS;
10992                 }
10993
10994                 entry = tmp_entry;
10995         }
10996         if(map == old_map) {
10997                 old_start = entry->vme_start;
10998                 old_end = entry->vme_end;
10999         }
11000
11001         /*
11002          *      Handle submaps.  Drop lock on upper map, submap is
11003          *      returned locked.
11004          */
11005
11006 submap_recurse:
11007         if (entry->is_sub_map) {
11008                 vm_map_offset_t         local_vaddr;
11009                 vm_map_offset_t         end_delta;
11010                 vm_map_offset_t         start_delta;
11011                 vm_map_entry_t          submap_entry;
11012                 boolean_t               mapped_needs_copy=FALSE;
11013
11014                 local_vaddr = vaddr;
11015
11016                 if ((entry->use_pmap &&
11017                      ! ((fault_type & VM_PROT_WRITE) ||
11018                         force_copy))) {
11019                         /* if real_map equals map we unlock below */
11020                         if ((*real_map != map) &&
11021                             (*real_map != cow_sub_map_parent))
11022                                 vm_map_unlock(*real_map);
11023                         *real_map = VME_SUBMAP(entry);
11024                 }
11025
11026                 if(entry->needs_copy &&
11027                    ((fault_type & VM_PROT_WRITE) ||
11028                     force_copy)) {
11029                         if (!mapped_needs_copy) {
11030                                 if (vm_map_lock_read_to_write(map)) {
11031                                         vm_map_lock_read(map);
11032                                         *real_map = map;
11033                                         goto RetryLookup;
11034                                 }
11035                                 vm_map_lock_read(VME_SUBMAP(entry));
11036                                 *var_map = VME_SUBMAP(entry);
11037                                 cow_sub_map_parent = map;
11038                                 /* reset base to map before cow object */
11039                                 /* this is the map which will accept   */
11040                                 /* the new cow object */
11041                                 old_start = entry->vme_start;
11042                                 old_end = entry->vme_end;
11043                                 cow_parent_vaddr = vaddr;
11044                                 mapped_needs_copy = TRUE;
11045                         } else {
11046                                 vm_map_lock_read(VME_SUBMAP(entry));
11047                                 *var_map = VME_SUBMAP(entry);
11048                                 if((cow_sub_map_parent != map) &&
11049                                    (*real_map != map))
11050                                         vm_map_unlock(map);
11051                         }
11052                 } else {
11053                         vm_map_lock_read(VME_SUBMAP(entry));
11054                         *var_map = VME_SUBMAP(entry);
11055                         /* leave map locked if it is a target */
11056                         /* cow sub_map above otherwise, just  */
11057                         /* follow the maps down to the object */
11058                         /* here we unlock knowing we are not  */
11059                         /* revisiting the map.  */
11060                         if((*real_map != map) && (map != cow_sub_map_parent))
11061                                 vm_map_unlock_read(map);
11062                 }
11063
11064                 map = *var_map;
11065
11066                 /* calculate the offset in the submap for vaddr */
11067                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
11068
11069         RetrySubMap:
11070                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
11071                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
11072                                 vm_map_unlock(cow_sub_map_parent);
11073                         }
11074                         if((*real_map != map)
11075                            && (*real_map != cow_sub_map_parent)) {
11076                                 vm_map_unlock(*real_map);
11077                         }
11078                         *real_map = map;
11079                         return KERN_INVALID_ADDRESS;
11080                 }
11081
11082                 /* find the attenuated shadow of the underlying object */
11083                 /* on our target map */
11084
11085                 /* in english the submap object may extend beyond the     */
11086                 /* region mapped by the entry or, may only fill a portion */
11087                 /* of it.  For our purposes, we only care if the object   */
11088                 /* doesn't fill.  In this case the area which will        */
11089                 /* ultimately be clipped in the top map will only need    */
11090                 /* to be as big as the portion of the underlying entry    */
11091                 /* which is mapped */
11092                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
11093                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
11094
11095                 end_delta =
11096                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
11097                         submap_entry->vme_end ?
11098                         0 : (VME_OFFSET(entry) +
11099                              (old_end - old_start))
11100                         - submap_entry->vme_end;
11101
11102                 old_start += start_delta;
11103                 old_end -= end_delta;
11104
11105                 if(submap_entry->is_sub_map) {
11106                         entry = submap_entry;
11107                         vaddr = local_vaddr;
11108                         goto submap_recurse;
11109                 }
11110
11111                 if (((fault_type & VM_PROT_WRITE) ||
11112                      force_copy)
11113                     && cow_sub_map_parent) {
11114
11115                         vm_object_t     sub_object, copy_object;
11116                         vm_object_offset_t copy_offset;
11117                         vm_map_offset_t local_start;
11118                         vm_map_offset_t local_end;
11119                         boolean_t               copied_slowly = FALSE;
11120
11121                         if (vm_map_lock_read_to_write(map)) {
11122                                 vm_map_lock_read(map);
11123                                 old_start -= start_delta;
11124                                 old_end += end_delta;
11125                                 goto RetrySubMap;
11126                         }
11127
11128
11129                         sub_object = VME_OBJECT(submap_entry);
11130                         if (sub_object == VM_OBJECT_NULL) {
11131                                 sub_object =
11132                                         vm_object_allocate(
11133                                                 (vm_map_size_t)
11134                                                 (submap_entry->vme_end -
11135                                                  submap_entry->vme_start));
11136                                 VME_OBJECT_SET(submap_entry, sub_object);
11137                                 VME_OFFSET_SET(submap_entry, 0);
11138                         }
11139                         local_start =  local_vaddr -
11140                                 (cow_parent_vaddr - old_start);
11141                         local_end = local_vaddr +
11142                                 (old_end - cow_parent_vaddr);
11143                         vm_map_clip_start(map, submap_entry, local_start);
11144                         vm_map_clip_end(map, submap_entry, local_end);
11145                         if (submap_entry->is_sub_map) {
11146                                 /* unnesting was done when clipping */
11147                                 assert(!submap_entry->use_pmap);
11148                         }
11149
11150                         /* This is the COW case, lets connect */
11151                         /* an entry in our space to the underlying */
11152                         /* object in the submap, bypassing the  */
11153                         /* submap. */
11154
11155
11156                         if(submap_entry->wired_count != 0 ||
11157                            (sub_object->copy_strategy ==
11158                             MEMORY_OBJECT_COPY_NONE)) {
11159                                 vm_object_lock(sub_object);
11160                                 vm_object_copy_slowly(sub_object,
11161                                                       VME_OFFSET(submap_entry),
11162                                                       (submap_entry->vme_end -
11163                                                        submap_entry->vme_start),
11164                                                       FALSE,
11165                                                       &copy_object);
11166                                 copied_slowly = TRUE;
11167                         } else {
11168
11169                                 /* set up shadow object */
11170                                 copy_object = sub_object;
11171                                 vm_object_lock(sub_object);
11172                                 vm_object_reference_locked(sub_object);
11173                                 sub_object->shadowed = TRUE;
11174                                 vm_object_unlock(sub_object);
11175
11176                                 assert(submap_entry->wired_count == 0);
11177                                 submap_entry->needs_copy = TRUE;
11178
11179                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
11180
11181                                 if (override_nx(old_map,
11182                                                 VME_ALIAS(submap_entry))
11183                                     && prot)
11184                                         prot |= VM_PROT_EXECUTE;
11185
11186                                 vm_object_pmap_protect(
11187                                         sub_object,
11188                                         VME_OFFSET(submap_entry),
11189                                         submap_entry->vme_end -
11190                                         submap_entry->vme_start,
11191                                         (submap_entry->is_shared
11192                                          || map->mapped_in_other_pmaps) ?
11193                                         PMAP_NULL : map->pmap,
11194                                         submap_entry->vme_start,
11195                                         prot);
11196                         }
11197
11198                         /*
11199                          * Adjust the fault offset to the submap entry.
11200                          */
11201                         copy_offset = (local_vaddr -
11202                                        submap_entry->vme_start +
11203                                        VME_OFFSET(submap_entry));
11204
11205                         /* This works diffently than the   */
11206                         /* normal submap case. We go back  */
11207                         /* to the parent of the cow map and*/
11208                         /* clip out the target portion of  */
11209                         /* the sub_map, substituting the   */
11210                         /* new copy object,                */
11211
11212                         vm_map_unlock(map);
11213                         local_start = old_start;
11214                         local_end = old_end;
11215                         map = cow_sub_map_parent;
11216                         *var_map = cow_sub_map_parent;
11217                         vaddr = cow_parent_vaddr;
11218                         cow_sub_map_parent = NULL;
11219
11220                         if(!vm_map_lookup_entry(map,
11221                                                 vaddr, &entry)) {
11222                                 vm_object_deallocate(
11223                                         copy_object);
11224                                 vm_map_lock_write_to_read(map);
11225                                 return KERN_INVALID_ADDRESS;
11226                         }
11227
11228                         /* clip out the portion of space */
11229                         /* mapped by the sub map which   */
11230                         /* corresponds to the underlying */
11231                         /* object */
11232
11233                         /*
11234                          * Clip (and unnest) the smallest nested chunk
11235                          * possible around the faulting address...
11236                          */
11237                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
11238                         local_end = local_start + pmap_nesting_size_min;
11239                         /*
11240                          * ... but don't go beyond the "old_start" to "old_end"
11241                          * range, to avoid spanning over another VM region
11242                          * with a possibly different VM object and/or offset.
11243                          */
11244                         if (local_start < old_start) {
11245                                 local_start = old_start;
11246                         }
11247                         if (local_end > old_end) {
11248                                 local_end = old_end;
11249                         }
11250                         /*
11251                          * Adjust copy_offset to the start of the range.
11252                          */
11253                         copy_offset -= (vaddr - local_start);
11254
11255                         vm_map_clip_start(map, entry, local_start);
11256                         vm_map_clip_end(map, entry, local_end);
11257                         if (entry->is_sub_map) {
11258                                 /* unnesting was done when clipping */
11259                                 assert(!entry->use_pmap);
11260                         }
11261
11262                         /* substitute copy object for */
11263                         /* shared map entry           */
11264                         vm_map_deallocate(VME_SUBMAP(entry));
11265                         assert(!entry->iokit_acct);
11266                         entry->is_sub_map = FALSE;
11267                         entry->use_pmap = TRUE;
11268                         VME_OBJECT_SET(entry, copy_object);
11269
11270                         /* propagate the submap entry's protections */
11271                         entry->protection |= submap_entry->protection;
11272                         entry->max_protection |= submap_entry->max_protection;
11273
11274                         if(copied_slowly) {
11275                                 VME_OFFSET_SET(entry, local_start - old_start);
11276                                 entry->needs_copy = FALSE;
11277                                 entry->is_shared = FALSE;
11278                         } else {
11279                                 VME_OFFSET_SET(entry, copy_offset);
11280                                 assert(entry->wired_count == 0);
11281                                 entry->needs_copy = TRUE;
11282                                 if(entry->inheritance == VM_INHERIT_SHARE)
11283                                         entry->inheritance = VM_INHERIT_COPY;
11284                                 if (map != old_map)
11285                                         entry->is_shared = TRUE;
11286                         }
11287                         if(entry->inheritance == VM_INHERIT_SHARE)
11288                                 entry->inheritance = VM_INHERIT_COPY;
11289
11290                         vm_map_lock_write_to_read(map);
11291                 } else {
11292                         if((cow_sub_map_parent)
11293                            && (cow_sub_map_parent != *real_map)
11294                            && (cow_sub_map_parent != map)) {
11295                                 vm_map_unlock(cow_sub_map_parent);
11296                         }
11297                         entry = submap_entry;
11298                         vaddr = local_vaddr;
11299                 }
11300         }
11301
11302         /*
11303          *      Check whether this task is allowed to have
11304          *      this page.
11305          */
11306
11307         prot = entry->protection;
11308
11309         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
11310                 /*
11311                  * HACK -- if not a stack, then allow execution
11312                  */
11313                 prot |= VM_PROT_EXECUTE;
11314         }
11315
11316         if (mask_protections) {
11317                 fault_type &= prot;
11318                 if (fault_type == VM_PROT_NONE) {
11319                         goto protection_failure;
11320                 }
11321         }
11322         if (((fault_type & prot) != fault_type)
11323             ) {
11324         protection_failure:
11325                 if (*real_map != map) {
11326                         vm_map_unlock(*real_map);
11327                 }
11328                 *real_map = map;
11329
11330                 if ((fault_type & VM_PROT_EXECUTE) && prot)
11331                         log_stack_execution_failure((addr64_t)vaddr, prot);
11332
11333                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
11334                 return KERN_PROTECTION_FAILURE;
11335         }
11336
11337         /*
11338          *      If this page is not pageable, we have to get
11339          *      it for all possible accesses.
11340          */
11341
11342         *wired = (entry->wired_count != 0);
11343         if (*wired)
11344                 fault_type = prot;
11345
11346         /*
11347          *      If the entry was copy-on-write, we either ...
11348          */
11349
11350         if (entry->needs_copy) {
11351                 /*
11352                  *      If we want to write the page, we may as well
11353                  *      handle that now since we've got the map locked.
11354                  *
11355                  *      If we don't need to write the page, we just
11356                  *      demote the permissions allowed.
11357                  */
11358
11359                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
11360                         /*
11361                          *      Make a new object, and place it in the
11362                          *      object chain.  Note that no new references
11363                          *      have appeared -- one just moved from the
11364                          *      map to the new object.
11365                          */
11366
11367                         if (vm_map_lock_read_to_write(map)) {
11368                                 vm_map_lock_read(map);
11369                                 goto RetryLookup;
11370                         }
11371
11372                         if (VME_OBJECT(entry)->shadowed == FALSE) {
11373                                 vm_object_lock(VME_OBJECT(entry));
11374                                 VME_OBJECT(entry)->shadowed = TRUE;
11375                                 vm_object_unlock(VME_OBJECT(entry));
11376                         }
11377                         VME_OBJECT_SHADOW(entry,
11378                                           (vm_map_size_t) (entry->vme_end -
11379                                                            entry->vme_start));
11380                         entry->needs_copy = FALSE;
11381
11382                         vm_map_lock_write_to_read(map);
11383                 }
11384                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
11385                         /*
11386                          *      We're attempting to read a copy-on-write
11387                          *      page -- don't allow writes.
11388                          */
11389
11390                         prot &= (~VM_PROT_WRITE);
11391                 }
11392         }
11393
11394         /*
11395          *      Create an object if necessary.
11396          */
11397         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
11398
11399                 if (vm_map_lock_read_to_write(map)) {
11400                         vm_map_lock_read(map);
11401                         goto RetryLookup;
11402                 }
11403
11404                 VME_OBJECT_SET(entry,
11405                                vm_object_allocate(
11406                                        (vm_map_size_t)(entry->vme_end -
11407                                                        entry->vme_start)));
11408                 VME_OFFSET_SET(entry, 0);
11409                 vm_map_lock_write_to_read(map);
11410         }
11411
11412         /*
11413          *      Return the object/offset from this entry.  If the entry
11414          *      was copy-on-write or empty, it has been fixed up.  Also
11415          *      return the protection.
11416          */
11417
11418         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11419         *object = VME_OBJECT(entry);
11420         *out_prot = prot;
11421
11422         if (fault_info) {
11423                 fault_info->interruptible = THREAD_UNINT; /* for now... */
11424                 /* ... the caller will change "interruptible" if needed */
11425                 fault_info->cluster_size = 0;
11426                 fault_info->user_tag = VME_ALIAS(entry);
11427                 fault_info->pmap_options = 0;
11428                 if (entry->iokit_acct ||
11429                     (!entry->is_sub_map && !entry->use_pmap)) {
11430                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11431                 }
11432                 fault_info->behavior = entry->behavior;
11433                 fault_info->lo_offset = VME_OFFSET(entry);
11434                 fault_info->hi_offset =
11435                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
11436                 fault_info->no_cache  = entry->no_cache;
11437                 fault_info->stealth = FALSE;
11438                 fault_info->io_sync = FALSE;
11439                 if (entry->used_for_jit ||
11440                     entry->vme_resilient_codesign) {
11441                         fault_info->cs_bypass = TRUE;
11442                 } else {
11443                         fault_info->cs_bypass = FALSE;
11444                 }
11445                 fault_info->mark_zf_absent = FALSE;
11446                 fault_info->batch_pmap_op = FALSE;
11447         }
11448
11449         /*
11450          *      Lock the object to prevent it from disappearing
11451          */
11452         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11453                 vm_object_lock(*object);
11454         else
11455                 vm_object_lock_shared(*object);
11456
11457         /*
11458          *      Save the version number
11459          */
11460
11461         out_version->main_timestamp = map->timestamp;
11462
11463         return KERN_SUCCESS;
11464 }
11465
11466
11467 /*
11468  *      vm_map_verify:
11469  *
11470  *      Verifies that the map in question has not changed
11471  *      since the given version.  If successful, the map
11472  *      will not change until vm_map_verify_done() is called.
11473  */
11474 boolean_t
11475 vm_map_verify(
11476         vm_map_t                map,
11477         vm_map_version_t        *version)       /* REF */
11478 {
11479         boolean_t       result;
11480
11481         vm_map_lock_read(map);
11482         result = (map->timestamp == version->main_timestamp);
11483
11484         if (!result)
11485                 vm_map_unlock_read(map);
11486
11487         return(result);
11488 }
11489
11490 /*
11491  *      vm_map_verify_done:
11492  *
11493  *      Releases locks acquired by a vm_map_verify.
11494  *
11495  *      This is now a macro in vm/vm_map.h.  It does a
11496  *      vm_map_unlock_read on the map.
11497  */
11498
11499
11500 /*
11501  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11502  *      Goes away after regular vm_region_recurse function migrates to
11503  *      64 bits
11504  *      vm_region_recurse: A form of vm_region which follows the
11505  *      submaps in a target map
11506  *
11507  */
11508
11509 #if DEVELOPMENT || DEBUG
11510 int vm_region_footprint = 0;
11511 #endif /* DEVELOPMENT || DEBUG */
11512
11513 kern_return_t
11514 vm_map_region_recurse_64(
11515         vm_map_t                 map,
11516         vm_map_offset_t *address,               /* IN/OUT */
11517         vm_map_size_t           *size,                  /* OUT */
11518         natural_t               *nesting_depth, /* IN/OUT */
11519         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
11520         mach_msg_type_number_t  *count) /* IN/OUT */
11521 {
11522         mach_msg_type_number_t  original_count;
11523         vm_region_extended_info_data_t  extended;
11524         vm_map_entry_t                  tmp_entry;
11525         vm_map_offset_t                 user_address;
11526         unsigned int                    user_max_depth;
11527
11528         /*
11529          * "curr_entry" is the VM map entry preceding or including the
11530          * address we're looking for.
11531          * "curr_map" is the map or sub-map containing "curr_entry".
11532          * "curr_address" is the equivalent of the top map's "user_address"
11533          * in the current map.
11534          * "curr_offset" is the cumulated offset of "curr_map" in the
11535          * target task's address space.
11536          * "curr_depth" is the depth of "curr_map" in the chain of
11537          * sub-maps.
11538          *
11539          * "curr_max_below" and "curr_max_above" limit the range (around
11540          * "curr_address") we should take into account in the current (sub)map.
11541          * They limit the range to what's visible through the map entries
11542          * we've traversed from the top map to the current map.
11543
11544          */
11545         vm_map_entry_t                  curr_entry;
11546         vm_map_address_t                curr_address;
11547         vm_map_offset_t                 curr_offset;
11548         vm_map_t                        curr_map;
11549         unsigned int                    curr_depth;
11550         vm_map_offset_t                 curr_max_below, curr_max_above;
11551         vm_map_offset_t                 curr_skip;
11552
11553         /*
11554          * "next_" is the same as "curr_" but for the VM region immediately
11555          * after the address we're looking for.  We need to keep track of this
11556          * too because we want to return info about that region if the
11557          * address we're looking for is not mapped.
11558          */
11559         vm_map_entry_t                  next_entry;
11560         vm_map_offset_t                 next_offset;
11561         vm_map_offset_t                 next_address;
11562         vm_map_t                        next_map;
11563         unsigned int                    next_depth;
11564         vm_map_offset_t                 next_max_below, next_max_above;
11565         vm_map_offset_t                 next_skip;
11566
11567         boolean_t                       look_for_pages;
11568         vm_region_submap_short_info_64_t short_info;
11569
11570         if (map == VM_MAP_NULL) {
11571                 /* no address space to work on */
11572                 return KERN_INVALID_ARGUMENT;
11573         }
11574
11575
11576         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11577                 /*
11578                  * "info" structure is not big enough and
11579                  * would overflow
11580                  */
11581                 return KERN_INVALID_ARGUMENT;
11582         }
11583
11584         original_count = *count;
11585
11586         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11587                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11588                 look_for_pages = FALSE;
11589                 short_info = (vm_region_submap_short_info_64_t) submap_info;
11590                 submap_info = NULL;
11591         } else {
11592                 look_for_pages = TRUE;
11593                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
11594                 short_info = NULL;
11595
11596                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11597                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11598                 }
11599         }
11600
11601         user_address = *address;
11602         user_max_depth = *nesting_depth;
11603
11604         if (not_in_kdp) {
11605                 vm_map_lock_read(map);
11606         }
11607
11608 recurse_again:
11609         curr_entry = NULL;
11610         curr_map = map;
11611         curr_address = user_address;
11612         curr_offset = 0;
11613         curr_skip = 0;
11614         curr_depth = 0;
11615         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11616         curr_max_below = curr_address;
11617
11618         next_entry = NULL;
11619         next_map = NULL;
11620         next_address = 0;
11621         next_offset = 0;
11622         next_skip = 0;
11623         next_depth = 0;
11624         next_max_above = (vm_map_offset_t) -1;
11625         next_max_below = (vm_map_offset_t) -1;
11626
11627         for (;;) {
11628                 if (vm_map_lookup_entry(curr_map,
11629                                         curr_address,
11630                                         &tmp_entry)) {
11631                         /* tmp_entry contains the address we're looking for */
11632                         curr_entry = tmp_entry;
11633                 } else {
11634                         vm_map_offset_t skip;
11635                         /*
11636                          * The address is not mapped.  "tmp_entry" is the
11637                          * map entry preceding the address.  We want the next
11638                          * one, if it exists.
11639                          */
11640                         curr_entry = tmp_entry->vme_next;
11641
11642                         if (curr_entry == vm_map_to_entry(curr_map) ||
11643                             (curr_entry->vme_start >=
11644                              curr_address + curr_max_above)) {
11645                                 /* no next entry at this level: stop looking */
11646                                 if (not_in_kdp) {
11647                                         vm_map_unlock_read(curr_map);
11648                                 }
11649                                 curr_entry = NULL;
11650                                 curr_map = NULL;
11651                                 curr_skip = 0;
11652                                 curr_offset = 0;
11653                                 curr_depth = 0;
11654                                 curr_max_above = 0;
11655                                 curr_max_below = 0;
11656                                 break;
11657                         }
11658
11659                         /* adjust current address and offset */
11660                         skip = curr_entry->vme_start - curr_address;
11661                         curr_address = curr_entry->vme_start;
11662                         curr_skip += skip;
11663                         curr_offset += skip;
11664                         curr_max_above -= skip;
11665                         curr_max_below = 0;
11666                 }
11667
11668                 /*
11669                  * Is the next entry at this level closer to the address (or
11670                  * deeper in the submap chain) than the one we had
11671                  * so far ?
11672                  */
11673                 tmp_entry = curr_entry->vme_next;
11674                 if (tmp_entry == vm_map_to_entry(curr_map)) {
11675                         /* no next entry at this level */
11676                 } else if (tmp_entry->vme_start >=
11677                            curr_address + curr_max_above) {
11678                         /*
11679                          * tmp_entry is beyond the scope of what we mapped of
11680                          * this submap in the upper level: ignore it.
11681                          */
11682                 } else if ((next_entry == NULL) ||
11683                            (tmp_entry->vme_start + curr_offset <=
11684                             next_entry->vme_start + next_offset)) {
11685                         /*
11686                          * We didn't have a "next_entry" or this one is
11687                          * closer to the address we're looking for:
11688                          * use this "tmp_entry" as the new "next_entry".
11689                          */
11690                         if (next_entry != NULL) {
11691                                 /* unlock the last "next_map" */
11692                                 if (next_map != curr_map && not_in_kdp) {
11693                                         vm_map_unlock_read(next_map);
11694                                 }
11695                         }
11696                         next_entry = tmp_entry;
11697                         next_map = curr_map;
11698                         next_depth = curr_depth;
11699                         next_address = next_entry->vme_start;
11700                         next_skip = curr_skip;
11701                         next_skip += (next_address - curr_address);
11702                         next_offset = curr_offset;
11703                         next_offset += (next_address - curr_address);
11704                         next_max_above = MIN(next_max_above, curr_max_above);
11705                         next_max_above = MIN(next_max_above,
11706                                              next_entry->vme_end - next_address);
11707                         next_max_below = MIN(next_max_below, curr_max_below);
11708                         next_max_below = MIN(next_max_below,
11709                                              next_address - next_entry->vme_start);
11710                 }
11711
11712                 /*
11713                  * "curr_max_{above,below}" allow us to keep track of the
11714                  * portion of the submap that is actually mapped at this level:
11715                  * the rest of that submap is irrelevant to us, since it's not
11716                  * mapped here.
11717                  * The relevant portion of the map starts at
11718                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
11719                  */
11720                 curr_max_above = MIN(curr_max_above,
11721                                      curr_entry->vme_end - curr_address);
11722                 curr_max_below = MIN(curr_max_below,
11723                                      curr_address - curr_entry->vme_start);
11724
11725                 if (!curr_entry->is_sub_map ||
11726                     curr_depth >= user_max_depth) {
11727                         /*
11728                          * We hit a leaf map or we reached the maximum depth
11729                          * we could, so stop looking.  Keep the current map
11730                          * locked.
11731                          */
11732                         break;
11733                 }
11734
11735                 /*
11736                  * Get down to the next submap level.
11737                  */
11738
11739                 /*
11740                  * Lock the next level and unlock the current level,
11741                  * unless we need to keep it locked to access the "next_entry"
11742                  * later.
11743                  */
11744                 if (not_in_kdp) {
11745                         vm_map_lock_read(VME_SUBMAP(curr_entry));
11746                 }
11747                 if (curr_map == next_map) {
11748                         /* keep "next_map" locked in case we need it */
11749                 } else {
11750                         /* release this map */
11751                         if (not_in_kdp)
11752                                 vm_map_unlock_read(curr_map);
11753                 }
11754
11755                 /*
11756                  * Adjust the offset.  "curr_entry" maps the submap
11757                  * at relative address "curr_entry->vme_start" in the
11758                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
11759                  * bytes of the submap.
11760                  * "curr_offset" always represents the offset of a virtual
11761                  * address in the curr_map relative to the absolute address
11762                  * space (i.e. the top-level VM map).
11763                  */
11764                 curr_offset +=
11765                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
11766                 curr_address = user_address + curr_offset;
11767                 /* switch to the submap */
11768                 curr_map = VME_SUBMAP(curr_entry);
11769                 curr_depth++;
11770                 curr_entry = NULL;
11771         }
11772
11773         if (curr_entry == NULL) {
11774                 /* no VM region contains the address... */
11775 #if DEVELOPMENT || DEBUG
11776                 if (vm_region_footprint && /* we want footprint numbers */
11777                     look_for_pages && /* & we want page counts */
11778                     next_entry == NULL && /* & there are no more regions */
11779                     /* & we haven't already provided our fake region: */
11780                     user_address == vm_map_last_entry(map)->vme_end) {
11781                         ledger_amount_t nonvol, nonvol_compressed;
11782                         /*
11783                          * Add a fake memory region to account for
11784                          * purgeable memory that counts towards this
11785                          * task's memory footprint, i.e. the resident
11786                          * compressed pages of non-volatile objects
11787                          * owned by that task.
11788                          */
11789                         ledger_get_balance(
11790                                 map->pmap->ledger,
11791                                 task_ledgers.purgeable_nonvolatile,
11792                                 &nonvol);
11793                         ledger_get_balance(
11794                                 map->pmap->ledger,
11795                                 task_ledgers.purgeable_nonvolatile_compressed,
11796                                 &nonvol_compressed);
11797                         if (nonvol + nonvol_compressed == 0) {
11798                                 /* no purgeable memory usage to report */
11799                                 return KERN_FAILURE;
11800                         }
11801                         /* fake region to show nonvolatile footprint */
11802                         submap_info->protection = VM_PROT_DEFAULT;
11803                         submap_info->max_protection = VM_PROT_DEFAULT;
11804                         submap_info->inheritance = VM_INHERIT_DEFAULT;
11805                         submap_info->offset = 0;
11806                         submap_info->user_tag = 0;
11807                         submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
11808                         submap_info->pages_shared_now_private = 0;
11809                         submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
11810                         submap_info->pages_dirtied = submap_info->pages_resident;
11811                         submap_info->ref_count = 1;
11812                         submap_info->shadow_depth = 0;
11813                         submap_info->external_pager = 0;
11814                         submap_info->share_mode = SM_PRIVATE;
11815                         submap_info->is_submap = 0;
11816                         submap_info->behavior = VM_BEHAVIOR_DEFAULT;
11817                         submap_info->object_id = 0x11111111;
11818                         submap_info->user_wired_count = 0;
11819                         submap_info->pages_reusable = 0;
11820                         *nesting_depth = 0;
11821                         *size = (vm_map_size_t) (nonvol + nonvol_compressed);
11822                         *address = user_address;
11823                         return KERN_SUCCESS;
11824                 }
11825 #endif /* DEVELOPMENT || DEBUG */
11826                 if (next_entry == NULL) {
11827                         /* ... and no VM region follows it either */
11828                         return KERN_INVALID_ADDRESS;
11829                 }
11830                 /* ... gather info about the next VM region */
11831                 curr_entry = next_entry;
11832                 curr_map = next_map;    /* still locked ... */
11833                 curr_address = next_address;
11834                 curr_skip = next_skip;
11835                 curr_offset = next_offset;
11836                 curr_depth = next_depth;
11837                 curr_max_above = next_max_above;
11838                 curr_max_below = next_max_below;
11839         } else {
11840                 /* we won't need "next_entry" after all */
11841                 if (next_entry != NULL) {
11842                         /* release "next_map" */
11843                         if (next_map != curr_map && not_in_kdp) {
11844                                 vm_map_unlock_read(next_map);
11845                         }
11846                 }
11847         }
11848         next_entry = NULL;
11849         next_map = NULL;
11850         next_offset = 0;
11851         next_skip = 0;
11852         next_depth = 0;
11853         next_max_below = -1;
11854         next_max_above = -1;
11855
11856         if (curr_entry->is_sub_map &&
11857             curr_depth < user_max_depth) {
11858                 /*
11859                  * We're not as deep as we could be:  we must have
11860                  * gone back up after not finding anything mapped
11861                  * below the original top-level map entry's.
11862                  * Let's move "curr_address" forward and recurse again.
11863                  */
11864                 user_address = curr_address;
11865                 goto recurse_again;
11866         }
11867
11868         *nesting_depth = curr_depth;
11869         *size = curr_max_above + curr_max_below;
11870         *address = user_address + curr_skip - curr_max_below;
11871
11872 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
11873 // so probably should be a real 32b ID vs. ptr.
11874 // Current users just check for equality
11875 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
11876
11877         if (look_for_pages) {
11878                 submap_info->user_tag = VME_ALIAS(curr_entry);
11879                 submap_info->offset = VME_OFFSET(curr_entry);
11880                 submap_info->protection = curr_entry->protection;
11881                 submap_info->inheritance = curr_entry->inheritance;
11882                 submap_info->max_protection = curr_entry->max_protection;
11883                 submap_info->behavior = curr_entry->behavior;
11884                 submap_info->user_wired_count = curr_entry->user_wired_count;
11885                 submap_info->is_submap = curr_entry->is_sub_map;
11886                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11887         } else {
11888                 short_info->user_tag = VME_ALIAS(curr_entry);
11889                 short_info->offset = VME_OFFSET(curr_entry);
11890                 short_info->protection = curr_entry->protection;
11891                 short_info->inheritance = curr_entry->inheritance;
11892                 short_info->max_protection = curr_entry->max_protection;
11893                 short_info->behavior = curr_entry->behavior;
11894                 short_info->user_wired_count = curr_entry->user_wired_count;
11895                 short_info->is_submap = curr_entry->is_sub_map;
11896                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11897         }
11898
11899         extended.pages_resident = 0;
11900         extended.pages_swapped_out = 0;
11901         extended.pages_shared_now_private = 0;
11902         extended.pages_dirtied = 0;
11903         extended.pages_reusable = 0;
11904         extended.external_pager = 0;
11905         extended.shadow_depth = 0;
11906         extended.share_mode = SM_EMPTY;
11907         extended.ref_count = 0;
11908
11909         if (not_in_kdp) {
11910                 if (!curr_entry->is_sub_map) {
11911                         vm_map_offset_t range_start, range_end;
11912                         range_start = MAX((curr_address - curr_max_below),
11913                                           curr_entry->vme_start);
11914                         range_end = MIN((curr_address + curr_max_above),
11915                                         curr_entry->vme_end);
11916                         vm_map_region_walk(curr_map,
11917                                            range_start,
11918                                            curr_entry,
11919                                            (VME_OFFSET(curr_entry) +
11920                                             (range_start -
11921                                              curr_entry->vme_start)),
11922                                            range_end - range_start,
11923                                            &extended,
11924                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
11925                         if (extended.external_pager &&
11926                             extended.ref_count == 2 &&
11927                             extended.share_mode == SM_SHARED) {
11928                                 extended.share_mode = SM_PRIVATE;
11929                         }
11930                 } else {
11931                         if (curr_entry->use_pmap) {
11932                                 extended.share_mode = SM_TRUESHARED;
11933                         } else {
11934                                 extended.share_mode = SM_PRIVATE;
11935                         }
11936                         extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
11937                 }
11938         }
11939
11940         if (look_for_pages) {
11941                 submap_info->pages_resident = extended.pages_resident;
11942                 submap_info->pages_swapped_out = extended.pages_swapped_out;
11943                 submap_info->pages_shared_now_private =
11944                         extended.pages_shared_now_private;
11945                 submap_info->pages_dirtied = extended.pages_dirtied;
11946                 submap_info->external_pager = extended.external_pager;
11947                 submap_info->shadow_depth = extended.shadow_depth;
11948                 submap_info->share_mode = extended.share_mode;
11949                 submap_info->ref_count = extended.ref_count;
11950
11951                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11952                         submap_info->pages_reusable = extended.pages_reusable;
11953                 }
11954         } else {
11955                 short_info->external_pager = extended.external_pager;
11956                 short_info->shadow_depth = extended.shadow_depth;
11957                 short_info->share_mode = extended.share_mode;
11958                 short_info->ref_count = extended.ref_count;
11959         }
11960
11961         if (not_in_kdp) {
11962                 vm_map_unlock_read(curr_map);
11963         }
11964
11965         return KERN_SUCCESS;
11966 }
11967
11968 /*
11969  *      vm_region:
11970  *
11971  *      User call to obtain information about a region in
11972  *      a task's address map. Currently, only one flavor is
11973  *      supported.
11974  *
11975  *      XXX The reserved and behavior fields cannot be filled
11976  *          in until the vm merge from the IK is completed, and
11977  *          vm_reserve is implemented.
11978  */
11979
11980 kern_return_t
11981 vm_map_region(
11982         vm_map_t                 map,
11983         vm_map_offset_t *address,               /* IN/OUT */
11984         vm_map_size_t           *size,                  /* OUT */
11985         vm_region_flavor_t       flavor,                /* IN */
11986         vm_region_info_t         info,                  /* OUT */
11987         mach_msg_type_number_t  *count, /* IN/OUT */
11988         mach_port_t             *object_name)           /* OUT */
11989 {
11990         vm_map_entry_t          tmp_entry;
11991         vm_map_entry_t          entry;
11992         vm_map_offset_t         start;
11993
11994         if (map == VM_MAP_NULL)
11995                 return(KERN_INVALID_ARGUMENT);
11996
11997         switch (flavor) {
11998
11999         case VM_REGION_BASIC_INFO:
12000                 /* legacy for old 32-bit objects info */
12001         {
12002                 vm_region_basic_info_t  basic;
12003
12004                 if (*count < VM_REGION_BASIC_INFO_COUNT)
12005                         return(KERN_INVALID_ARGUMENT);
12006
12007                 basic = (vm_region_basic_info_t) info;
12008                 *count = VM_REGION_BASIC_INFO_COUNT;
12009
12010                 vm_map_lock_read(map);
12011
12012                 start = *address;
12013                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12014                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12015                                 vm_map_unlock_read(map);
12016                                 return(KERN_INVALID_ADDRESS);
12017                         }
12018                 } else {
12019                         entry = tmp_entry;
12020                 }
12021
12022                 start = entry->vme_start;
12023
12024                 basic->offset = (uint32_t)VME_OFFSET(entry);
12025                 basic->protection = entry->protection;
12026                 basic->inheritance = entry->inheritance;
12027                 basic->max_protection = entry->max_protection;
12028                 basic->behavior = entry->behavior;
12029                 basic->user_wired_count = entry->user_wired_count;
12030                 basic->reserved = entry->is_sub_map;
12031                 *address = start;
12032                 *size = (entry->vme_end - start);
12033
12034                 if (object_name) *object_name = IP_NULL;
12035                 if (entry->is_sub_map) {
12036                         basic->shared = FALSE;
12037                 } else {
12038                         basic->shared = entry->is_shared;
12039                 }
12040
12041                 vm_map_unlock_read(map);
12042                 return(KERN_SUCCESS);
12043         }
12044
12045         case VM_REGION_BASIC_INFO_64:
12046         {
12047                 vm_region_basic_info_64_t       basic;
12048
12049                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
12050                         return(KERN_INVALID_ARGUMENT);
12051
12052                 basic = (vm_region_basic_info_64_t) info;
12053                 *count = VM_REGION_BASIC_INFO_COUNT_64;
12054
12055                 vm_map_lock_read(map);
12056
12057                 start = *address;
12058                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12059                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12060                                 vm_map_unlock_read(map);
12061                                 return(KERN_INVALID_ADDRESS);
12062                         }
12063                 } else {
12064                         entry = tmp_entry;
12065                 }
12066
12067                 start = entry->vme_start;
12068
12069                 basic->offset = VME_OFFSET(entry);
12070                 basic->protection = entry->protection;
12071                 basic->inheritance = entry->inheritance;
12072                 basic->max_protection = entry->max_protection;
12073                 basic->behavior = entry->behavior;
12074                 basic->user_wired_count = entry->user_wired_count;
12075                 basic->reserved = entry->is_sub_map;
12076                 *address = start;
12077                 *size = (entry->vme_end - start);
12078
12079                 if (object_name) *object_name = IP_NULL;
12080                 if (entry->is_sub_map) {
12081                         basic->shared = FALSE;
12082                 } else {
12083                         basic->shared = entry->is_shared;
12084                 }
12085
12086                 vm_map_unlock_read(map);
12087                 return(KERN_SUCCESS);
12088         }
12089         case VM_REGION_EXTENDED_INFO:
12090                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
12091                         return(KERN_INVALID_ARGUMENT);
12092                 /*fallthru*/
12093         case VM_REGION_EXTENDED_INFO__legacy:
12094                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
12095                         return KERN_INVALID_ARGUMENT;
12096
12097         {
12098                 vm_region_extended_info_t       extended;
12099                 mach_msg_type_number_t original_count;
12100
12101                 extended = (vm_region_extended_info_t) info;
12102
12103                 vm_map_lock_read(map);
12104
12105                 start = *address;
12106                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12107                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12108                                 vm_map_unlock_read(map);
12109                                 return(KERN_INVALID_ADDRESS);
12110                         }
12111                 } else {
12112                         entry = tmp_entry;
12113                 }
12114                 start = entry->vme_start;
12115
12116                 extended->protection = entry->protection;
12117                 extended->user_tag = VME_ALIAS(entry);
12118                 extended->pages_resident = 0;
12119                 extended->pages_swapped_out = 0;
12120                 extended->pages_shared_now_private = 0;
12121                 extended->pages_dirtied = 0;
12122                 extended->external_pager = 0;
12123                 extended->shadow_depth = 0;
12124
12125                 original_count = *count;
12126                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
12127                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
12128                 } else {
12129                         extended->pages_reusable = 0;
12130                         *count = VM_REGION_EXTENDED_INFO_COUNT;
12131                 }
12132
12133                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
12134
12135                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
12136                         extended->share_mode = SM_PRIVATE;
12137
12138                 if (object_name)
12139                         *object_name = IP_NULL;
12140                 *address = start;
12141                 *size = (entry->vme_end - start);
12142
12143                 vm_map_unlock_read(map);
12144                 return(KERN_SUCCESS);
12145         }
12146         case VM_REGION_TOP_INFO:
12147         {
12148                 vm_region_top_info_t    top;
12149
12150                 if (*count < VM_REGION_TOP_INFO_COUNT)
12151                         return(KERN_INVALID_ARGUMENT);
12152
12153                 top = (vm_region_top_info_t) info;
12154                 *count = VM_REGION_TOP_INFO_COUNT;
12155
12156                 vm_map_lock_read(map);
12157
12158                 start = *address;
12159                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12160                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12161                                 vm_map_unlock_read(map);
12162                                 return(KERN_INVALID_ADDRESS);
12163                         }
12164                 } else {
12165                         entry = tmp_entry;
12166
12167                 }
12168                 start = entry->vme_start;
12169
12170                 top->private_pages_resident = 0;
12171                 top->shared_pages_resident = 0;
12172
12173                 vm_map_region_top_walk(entry, top);
12174
12175                 if (object_name)
12176                         *object_name = IP_NULL;
12177                 *address = start;
12178                 *size = (entry->vme_end - start);
12179
12180                 vm_map_unlock_read(map);
12181                 return(KERN_SUCCESS);
12182         }
12183         default:
12184                 return(KERN_INVALID_ARGUMENT);
12185         }
12186 }
12187
12188 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
12189         MIN((entry_size),                                               \
12190             ((obj)->all_reusable ?                                      \
12191              (obj)->wired_page_count :                                  \
12192              (obj)->resident_page_count - (obj)->reusable_page_count))
12193
12194 void
12195 vm_map_region_top_walk(
12196         vm_map_entry_t             entry,
12197         vm_region_top_info_t       top)
12198 {
12199
12200         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
12201                 top->share_mode = SM_EMPTY;
12202                 top->ref_count = 0;
12203                 top->obj_id = 0;
12204                 return;
12205         }
12206
12207         {
12208                 struct  vm_object *obj, *tmp_obj;
12209                 int             ref_count;
12210                 uint32_t        entry_size;
12211
12212                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
12213
12214                 obj = VME_OBJECT(entry);
12215
12216                 vm_object_lock(obj);
12217
12218                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12219                         ref_count--;
12220
12221                 assert(obj->reusable_page_count <= obj->resident_page_count);
12222                 if (obj->shadow) {
12223                         if (ref_count == 1)
12224                                 top->private_pages_resident =
12225                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12226                         else
12227                                 top->shared_pages_resident =
12228                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12229                         top->ref_count  = ref_count;
12230                         top->share_mode = SM_COW;
12231
12232                         while ((tmp_obj = obj->shadow)) {
12233                                 vm_object_lock(tmp_obj);
12234                                 vm_object_unlock(obj);
12235                                 obj = tmp_obj;
12236
12237                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12238                                         ref_count--;
12239
12240                                 assert(obj->reusable_page_count <= obj->resident_page_count);
12241                                 top->shared_pages_resident +=
12242                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12243                                 top->ref_count += ref_count - 1;
12244                         }
12245                 } else {
12246                         if (entry->superpage_size) {
12247                                 top->share_mode = SM_LARGE_PAGE;
12248                                 top->shared_pages_resident = 0;
12249                                 top->private_pages_resident = entry_size;
12250                         } else if (entry->needs_copy) {
12251                                 top->share_mode = SM_COW;
12252                                 top->shared_pages_resident =
12253                                         OBJ_RESIDENT_COUNT(obj, entry_size);
12254                         } else {
12255                                 if (ref_count == 1 ||
12256                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
12257                                         top->share_mode = SM_PRIVATE;
12258                                                 top->private_pages_resident =
12259                                                         OBJ_RESIDENT_COUNT(obj,
12260                                                                            entry_size);
12261                                 } else {
12262                                         top->share_mode = SM_SHARED;
12263                                         top->shared_pages_resident =
12264                                                 OBJ_RESIDENT_COUNT(obj,
12265                                                                   entry_size);
12266                                 }
12267                         }
12268                         top->ref_count = ref_count;
12269                 }
12270                 /* XXX K64: obj_id will be truncated */
12271                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
12272
12273                 vm_object_unlock(obj);
12274         }
12275 }
12276
12277 void
12278 vm_map_region_walk(
12279         vm_map_t                        map,
12280         vm_map_offset_t                 va,
12281         vm_map_entry_t                  entry,
12282         vm_object_offset_t              offset,
12283         vm_object_size_t                range,
12284         vm_region_extended_info_t       extended,
12285         boolean_t                       look_for_pages,
12286         mach_msg_type_number_t count)
12287 {
12288         struct vm_object *obj, *tmp_obj;
12289         vm_map_offset_t       last_offset;
12290         int               i;
12291         int               ref_count;
12292         struct vm_object        *shadow_object;
12293         int                     shadow_depth;
12294
12295         if ((VME_OBJECT(entry) == 0) ||
12296             (entry->is_sub_map) ||
12297             (VME_OBJECT(entry)->phys_contiguous &&
12298              !entry->superpage_size)) {
12299                 extended->share_mode = SM_EMPTY;
12300                 extended->ref_count = 0;
12301                 return;
12302         }
12303
12304         if (entry->superpage_size) {
12305                 extended->shadow_depth = 0;
12306                 extended->share_mode = SM_LARGE_PAGE;
12307                 extended->ref_count = 1;
12308                 extended->external_pager = 0;
12309                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
12310                 extended->shadow_depth = 0;
12311                 return;
12312         }
12313
12314         obj = VME_OBJECT(entry);
12315
12316         vm_object_lock(obj);
12317
12318         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12319                 ref_count--;
12320
12321         if (look_for_pages) {
12322                 for (last_offset = offset + range;
12323                      offset < last_offset;
12324                      offset += PAGE_SIZE_64, va += PAGE_SIZE) {
12325 #if DEVELOPMENT || DEBUG
12326                         if (vm_region_footprint) {
12327                                 if (obj->purgable != VM_PURGABLE_DENY) {
12328                                         /* alternate accounting */
12329                                 } else if (entry->iokit_acct) {
12330                                         /* alternate accounting */
12331                                         extended->pages_resident++;
12332                                         extended->pages_dirtied++;
12333                                 } else {
12334                                         int disp;
12335
12336                                         disp = 0;
12337                                         pmap_query_page_info(map->pmap, va, &disp);
12338                                         if (disp & PMAP_QUERY_PAGE_PRESENT) {
12339                                                 extended->pages_resident++;
12340                                                 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
12341                                                         extended->pages_reusable++;
12342                                                 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
12343                                                            (disp & PMAP_QUERY_PAGE_ALTACCT)) {
12344                                                         /* alternate accounting */
12345                                                 } else {
12346                                                         extended->pages_dirtied++;
12347                                                 }
12348                                         } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
12349                                                 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
12350                                                         /* alternate accounting */
12351                                                 } else {
12352                                                         extended->pages_swapped_out++;
12353                                                 }
12354                                         }
12355                                 }
12356                                 continue;
12357                         }
12358 #endif /* DEVELOPMENT || DEBUG */
12359                         vm_map_region_look_for_page(map, va, obj,
12360                                                     offset, ref_count,
12361                                                     0, extended, count);
12362                 }
12363 #if DEVELOPMENT || DEBUG
12364                 if (vm_region_footprint) {
12365                         goto collect_object_info;
12366                 }
12367 #endif /* DEVELOPMENT || DEBUG */
12368         } else {
12369 #if DEVELOPMENT || DEBUG
12370         collect_object_info:
12371 #endif /* DEVELOPMENT || DEBUG */
12372                 shadow_object = obj->shadow;
12373                 shadow_depth = 0;
12374
12375                 if ( !(obj->pager_trusted) && !(obj->internal))
12376                         extended->external_pager = 1;
12377
12378                 if (shadow_object != VM_OBJECT_NULL) {
12379                         vm_object_lock(shadow_object);
12380                         for (;
12381                              shadow_object != VM_OBJECT_NULL;
12382                              shadow_depth++) {
12383                                 vm_object_t     next_shadow;
12384
12385                                 if ( !(shadow_object->pager_trusted) &&
12386                                      !(shadow_object->internal))
12387                                         extended->external_pager = 1;
12388
12389                                 next_shadow = shadow_object->shadow;
12390                                 if (next_shadow) {
12391                                         vm_object_lock(next_shadow);
12392                                 }
12393                                 vm_object_unlock(shadow_object);
12394                                 shadow_object = next_shadow;
12395                         }
12396                 }
12397                 extended->shadow_depth = shadow_depth;
12398         }
12399
12400         if (extended->shadow_depth || entry->needs_copy)
12401                 extended->share_mode = SM_COW;
12402         else {
12403                 if (ref_count == 1)
12404                         extended->share_mode = SM_PRIVATE;
12405                 else {
12406                         if (obj->true_share)
12407                                 extended->share_mode = SM_TRUESHARED;
12408                         else
12409                                 extended->share_mode = SM_SHARED;
12410                 }
12411         }
12412         extended->ref_count = ref_count - extended->shadow_depth;
12413
12414         for (i = 0; i < extended->shadow_depth; i++) {
12415                 if ((tmp_obj = obj->shadow) == 0)
12416                         break;
12417                 vm_object_lock(tmp_obj);
12418                 vm_object_unlock(obj);
12419
12420                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
12421                         ref_count--;
12422
12423                 extended->ref_count += ref_count;
12424                 obj = tmp_obj;
12425         }
12426         vm_object_unlock(obj);
12427
12428         if (extended->share_mode == SM_SHARED) {
12429                 vm_map_entry_t       cur;
12430                 vm_map_entry_t       last;
12431                 int      my_refs;
12432
12433                 obj = VME_OBJECT(entry);
12434                 last = vm_map_to_entry(map);
12435                 my_refs = 0;
12436
12437                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12438                         ref_count--;
12439                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
12440                         my_refs += vm_map_region_count_obj_refs(cur, obj);
12441
12442                 if (my_refs == ref_count)
12443                         extended->share_mode = SM_PRIVATE_ALIASED;
12444                 else if (my_refs > 1)
12445                         extended->share_mode = SM_SHARED_ALIASED;
12446         }
12447 }
12448
12449
12450 /* object is locked on entry and locked on return */
12451
12452
12453 static void
12454 vm_map_region_look_for_page(
12455         __unused vm_map_t               map,
12456         __unused vm_map_offset_t        va,
12457         vm_object_t                     object,
12458         vm_object_offset_t              offset,
12459         int                             max_refcnt,
12460         int                             depth,
12461         vm_region_extended_info_t       extended,
12462         mach_msg_type_number_t count)
12463 {
12464         vm_page_t       p;
12465         vm_object_t     shadow;
12466         int             ref_count;
12467         vm_object_t     caller_object;
12468
12469         shadow = object->shadow;
12470         caller_object = object;
12471
12472
12473         while (TRUE) {
12474
12475                 if ( !(object->pager_trusted) && !(object->internal))
12476                         extended->external_pager = 1;
12477
12478                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12479                         if (shadow && (max_refcnt == 1))
12480                                 extended->pages_shared_now_private++;
12481
12482                         if (!p->fictitious &&
12483                             (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
12484                                 extended->pages_dirtied++;
12485                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12486                                 if (p->reusable || object->all_reusable) {
12487                                         extended->pages_reusable++;
12488                                 }
12489                         }
12490
12491                         extended->pages_resident++;
12492
12493                         if(object != caller_object)
12494                                 vm_object_unlock(object);
12495
12496                         return;
12497                 }
12498                 if (object->internal &&
12499                     object->alive &&
12500                     !object->terminating &&
12501                     object->pager_ready) {
12502
12503                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
12504                             == VM_EXTERNAL_STATE_EXISTS) {
12505                                 /* the pager has that page */
12506                                 extended->pages_swapped_out++;
12507                                 if (object != caller_object)
12508                                         vm_object_unlock(object);
12509                                 return;
12510                         }
12511                 }
12512
12513                 if (shadow) {
12514                         vm_object_lock(shadow);
12515
12516                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12517                                 ref_count--;
12518
12519                         if (++depth > extended->shadow_depth)
12520                                 extended->shadow_depth = depth;
12521
12522                         if (ref_count > max_refcnt)
12523                                 max_refcnt = ref_count;
12524
12525                         if(object != caller_object)
12526                                 vm_object_unlock(object);
12527
12528                         offset = offset + object->vo_shadow_offset;
12529                         object = shadow;
12530                         shadow = object->shadow;
12531                         continue;
12532                 }
12533                 if(object != caller_object)
12534                         vm_object_unlock(object);
12535                 break;
12536         }
12537 }
12538
12539 static int
12540 vm_map_region_count_obj_refs(
12541         vm_map_entry_t    entry,
12542         vm_object_t       object)
12543 {
12544         int ref_count;
12545         vm_object_t chk_obj;
12546         vm_object_t tmp_obj;
12547
12548         if (VME_OBJECT(entry) == 0)
12549                 return(0);
12550
12551         if (entry->is_sub_map)
12552                 return(0);
12553         else {
12554                 ref_count = 0;
12555
12556                 chk_obj = VME_OBJECT(entry);
12557                 vm_object_lock(chk_obj);
12558
12559                 while (chk_obj) {
12560                         if (chk_obj == object)
12561                                 ref_count++;
12562                         tmp_obj = chk_obj->shadow;
12563                         if (tmp_obj)
12564                                 vm_object_lock(tmp_obj);
12565                         vm_object_unlock(chk_obj);
12566
12567                         chk_obj = tmp_obj;
12568                 }
12569         }
12570         return(ref_count);
12571 }
12572
12573
12574 /*
12575  *      Routine:        vm_map_simplify
12576  *
12577  *      Description:
12578  *              Attempt to simplify the map representation in
12579  *              the vicinity of the given starting address.
12580  *      Note:
12581  *              This routine is intended primarily to keep the
12582  *              kernel maps more compact -- they generally don't
12583  *              benefit from the "expand a map entry" technology
12584  *              at allocation time because the adjacent entry
12585  *              is often wired down.
12586  */
12587 void
12588 vm_map_simplify_entry(
12589         vm_map_t        map,
12590         vm_map_entry_t  this_entry)
12591 {
12592         vm_map_entry_t  prev_entry;
12593
12594         counter(c_vm_map_simplify_entry_called++);
12595
12596         prev_entry = this_entry->vme_prev;
12597
12598         if ((this_entry != vm_map_to_entry(map)) &&
12599             (prev_entry != vm_map_to_entry(map)) &&
12600
12601             (prev_entry->vme_end == this_entry->vme_start) &&
12602
12603             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
12604             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12605             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
12606                                     prev_entry->vme_start))
12607              == VME_OFFSET(this_entry)) &&
12608
12609             (prev_entry->behavior == this_entry->behavior) &&
12610             (prev_entry->needs_copy == this_entry->needs_copy) &&
12611             (prev_entry->protection == this_entry->protection) &&
12612             (prev_entry->max_protection == this_entry->max_protection) &&
12613             (prev_entry->inheritance == this_entry->inheritance) &&
12614             (prev_entry->use_pmap == this_entry->use_pmap) &&
12615             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
12616             (prev_entry->no_cache == this_entry->no_cache) &&
12617             (prev_entry->permanent == this_entry->permanent) &&
12618             (prev_entry->map_aligned == this_entry->map_aligned) &&
12619             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12620             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12621             /* from_reserved_zone: OK if that field doesn't match */
12622             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
12623             (prev_entry->vme_resilient_codesign ==
12624              this_entry->vme_resilient_codesign) &&
12625             (prev_entry->vme_resilient_media ==
12626              this_entry->vme_resilient_media) &&
12627
12628             (prev_entry->wired_count == this_entry->wired_count) &&
12629             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
12630
12631             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
12632             (prev_entry->in_transition == FALSE) &&
12633             (this_entry->in_transition == FALSE) &&
12634             (prev_entry->needs_wakeup == FALSE) &&
12635             (this_entry->needs_wakeup == FALSE) &&
12636             (prev_entry->is_shared == FALSE) &&
12637             (this_entry->is_shared == FALSE) &&
12638             (prev_entry->superpage_size == FALSE) &&
12639             (this_entry->superpage_size == FALSE)
12640                 ) {
12641                 vm_map_store_entry_unlink(map, prev_entry);
12642                 assert(prev_entry->vme_start < this_entry->vme_end);
12643                 if (prev_entry->map_aligned)
12644                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12645                                                    VM_MAP_PAGE_MASK(map)));
12646                 this_entry->vme_start = prev_entry->vme_start;
12647                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12648
12649                 if (map->holelistenabled) {
12650                         vm_map_store_update_first_free(map, this_entry, TRUE);
12651                 }
12652
12653                 if (prev_entry->is_sub_map) {
12654                         vm_map_deallocate(VME_SUBMAP(prev_entry));
12655                 } else {
12656                         vm_object_deallocate(VME_OBJECT(prev_entry));
12657                 }
12658                 vm_map_entry_dispose(map, prev_entry);
12659                 SAVE_HINT_MAP_WRITE(map, this_entry);
12660                 counter(c_vm_map_simplified++);
12661         }
12662 }
12663
12664 void
12665 vm_map_simplify(
12666         vm_map_t        map,
12667         vm_map_offset_t start)
12668 {
12669         vm_map_entry_t  this_entry;
12670
12671         vm_map_lock(map);
12672         if (vm_map_lookup_entry(map, start, &this_entry)) {
12673                 vm_map_simplify_entry(map, this_entry);
12674                 vm_map_simplify_entry(map, this_entry->vme_next);
12675         }
12676         counter(c_vm_map_simplify_called++);
12677         vm_map_unlock(map);
12678 }
12679
12680 static void
12681 vm_map_simplify_range(
12682         vm_map_t        map,
12683         vm_map_offset_t start,
12684         vm_map_offset_t end)
12685 {
12686         vm_map_entry_t  entry;
12687
12688         /*
12689          * The map should be locked (for "write") by the caller.
12690          */
12691
12692         if (start >= end) {
12693                 /* invalid address range */
12694                 return;
12695         }
12696
12697         start = vm_map_trunc_page(start,
12698                                   VM_MAP_PAGE_MASK(map));
12699         end = vm_map_round_page(end,
12700                                 VM_MAP_PAGE_MASK(map));
12701
12702         if (!vm_map_lookup_entry(map, start, &entry)) {
12703                 /* "start" is not mapped and "entry" ends before "start" */
12704                 if (entry == vm_map_to_entry(map)) {
12705                         /* start with first entry in the map */
12706                         entry = vm_map_first_entry(map);
12707                 } else {
12708                         /* start with next entry */
12709                         entry = entry->vme_next;
12710                 }
12711         }
12712
12713         while (entry != vm_map_to_entry(map) &&
12714                entry->vme_start <= end) {
12715                 /* try and coalesce "entry" with its previous entry */
12716                 vm_map_simplify_entry(map, entry);
12717                 entry = entry->vme_next;
12718         }
12719 }
12720
12721
12722 /*
12723  *      Routine:        vm_map_machine_attribute
12724  *      Purpose:
12725  *              Provide machine-specific attributes to mappings,
12726  *              such as cachability etc. for machines that provide
12727  *              them.  NUMA architectures and machines with big/strange
12728  *              caches will use this.
12729  *      Note:
12730  *              Responsibilities for locking and checking are handled here,
12731  *              everything else in the pmap module. If any non-volatile
12732  *              information must be kept, the pmap module should handle
12733  *              it itself. [This assumes that attributes do not
12734  *              need to be inherited, which seems ok to me]
12735  */
12736 kern_return_t
12737 vm_map_machine_attribute(
12738         vm_map_t                        map,
12739         vm_map_offset_t         start,
12740         vm_map_offset_t         end,
12741         vm_machine_attribute_t  attribute,
12742         vm_machine_attribute_val_t* value)              /* IN/OUT */
12743 {
12744         kern_return_t   ret;
12745         vm_map_size_t sync_size;
12746         vm_map_entry_t entry;
12747
12748         if (start < vm_map_min(map) || end > vm_map_max(map))
12749                 return KERN_INVALID_ADDRESS;
12750
12751         /* Figure how much memory we need to flush (in page increments) */
12752         sync_size = end - start;
12753
12754         vm_map_lock(map);
12755
12756         if (attribute != MATTR_CACHE) {
12757                 /* If we don't have to find physical addresses, we */
12758                 /* don't have to do an explicit traversal here.    */
12759                 ret = pmap_attribute(map->pmap, start, end-start,
12760                                      attribute, value);
12761                 vm_map_unlock(map);
12762                 return ret;
12763         }
12764
12765         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
12766
12767         while(sync_size) {
12768                 if (vm_map_lookup_entry(map, start, &entry)) {
12769                         vm_map_size_t   sub_size;
12770                         if((entry->vme_end - start) > sync_size) {
12771                                 sub_size = sync_size;
12772                                 sync_size = 0;
12773                         } else {
12774                                 sub_size = entry->vme_end - start;
12775                                 sync_size -= sub_size;
12776                         }
12777                         if(entry->is_sub_map) {
12778                                 vm_map_offset_t sub_start;
12779                                 vm_map_offset_t sub_end;
12780
12781                                 sub_start = (start - entry->vme_start)
12782                                         + VME_OFFSET(entry);
12783                                 sub_end = sub_start + sub_size;
12784                                 vm_map_machine_attribute(
12785                                         VME_SUBMAP(entry),
12786                                         sub_start,
12787                                         sub_end,
12788                                         attribute, value);
12789                         } else {
12790                                 if (VME_OBJECT(entry)) {
12791                                         vm_page_t               m;
12792                                         vm_object_t             object;
12793                                         vm_object_t             base_object;
12794                                         vm_object_t             last_object;
12795                                         vm_object_offset_t      offset;
12796                                         vm_object_offset_t      base_offset;
12797                                         vm_map_size_t           range;
12798                                         range = sub_size;
12799                                         offset = (start - entry->vme_start)
12800                                                 + VME_OFFSET(entry);
12801                                         base_offset = offset;
12802                                         object = VME_OBJECT(entry);
12803                                         base_object = object;
12804                                         last_object = NULL;
12805
12806                                         vm_object_lock(object);
12807
12808                                         while (range) {
12809                                                 m = vm_page_lookup(
12810                                                         object, offset);
12811
12812                                                 if (m && !m->fictitious) {
12813                                                         ret =
12814                                                                 pmap_attribute_cache_sync(
12815                                                                         VM_PAGE_GET_PHYS_PAGE(m),
12816                                                                         PAGE_SIZE,
12817                                                                         attribute, value);
12818
12819                                                 } else if (object->shadow) {
12820                                                         offset = offset + object->vo_shadow_offset;
12821                                                         last_object = object;
12822                                                         object = object->shadow;
12823                                                         vm_object_lock(last_object->shadow);
12824                                                         vm_object_unlock(last_object);
12825                                                         continue;
12826                                                 }
12827                                                 range -= PAGE_SIZE;
12828
12829                                                 if (base_object != object) {
12830                                                         vm_object_unlock(object);
12831                                                         vm_object_lock(base_object);
12832                                                         object = base_object;
12833                                                 }
12834                                                 /* Bump to the next page */
12835                                                 base_offset += PAGE_SIZE;
12836                                                 offset = base_offset;
12837                                         }
12838                                         vm_object_unlock(object);
12839                                 }
12840                         }
12841                         start += sub_size;
12842                 } else {
12843                         vm_map_unlock(map);
12844                         return KERN_FAILURE;
12845                 }
12846
12847         }
12848
12849         vm_map_unlock(map);
12850
12851         return ret;
12852 }
12853
12854 /*
12855  *      vm_map_behavior_set:
12856  *
12857  *      Sets the paging reference behavior of the specified address
12858  *      range in the target map.  Paging reference behavior affects
12859  *      how pagein operations resulting from faults on the map will be
12860  *      clustered.
12861  */
12862 kern_return_t
12863 vm_map_behavior_set(
12864         vm_map_t        map,
12865         vm_map_offset_t start,
12866         vm_map_offset_t end,
12867         vm_behavior_t   new_behavior)
12868 {
12869         vm_map_entry_t  entry;
12870         vm_map_entry_t  temp_entry;
12871
12872         XPR(XPR_VM_MAP,
12873             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
12874             map, start, end, new_behavior, 0);
12875
12876         if (start > end ||
12877             start < vm_map_min(map) ||
12878             end > vm_map_max(map)) {
12879                 return KERN_NO_SPACE;
12880         }
12881
12882         switch (new_behavior) {
12883
12884         /*
12885          * This first block of behaviors all set a persistent state on the specified
12886          * memory range.  All we have to do here is to record the desired behavior
12887          * in the vm_map_entry_t's.
12888          */
12889
12890         case VM_BEHAVIOR_DEFAULT:
12891         case VM_BEHAVIOR_RANDOM:
12892         case VM_BEHAVIOR_SEQUENTIAL:
12893         case VM_BEHAVIOR_RSEQNTL:
12894         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12895                 vm_map_lock(map);
12896
12897                 /*
12898                  *      The entire address range must be valid for the map.
12899                  *      Note that vm_map_range_check() does a
12900                  *      vm_map_lookup_entry() internally and returns the
12901                  *      entry containing the start of the address range if
12902                  *      the entire range is valid.
12903                  */
12904                 if (vm_map_range_check(map, start, end, &temp_entry)) {
12905                         entry = temp_entry;
12906                         vm_map_clip_start(map, entry, start);
12907                 }
12908                 else {
12909                         vm_map_unlock(map);
12910                         return(KERN_INVALID_ADDRESS);
12911                 }
12912
12913                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12914                         vm_map_clip_end(map, entry, end);
12915                         if (entry->is_sub_map) {
12916                                 assert(!entry->use_pmap);
12917                         }
12918
12919                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12920                                 entry->zero_wired_pages = TRUE;
12921                         } else {
12922                                 entry->behavior = new_behavior;
12923                         }
12924                         entry = entry->vme_next;
12925                 }
12926
12927                 vm_map_unlock(map);
12928                 break;
12929
12930         /*
12931          * The rest of these are different from the above in that they cause
12932          * an immediate action to take place as opposed to setting a behavior that
12933          * affects future actions.
12934          */
12935
12936         case VM_BEHAVIOR_WILLNEED:
12937                 return vm_map_willneed(map, start, end);
12938
12939         case VM_BEHAVIOR_DONTNEED:
12940                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12941
12942         case VM_BEHAVIOR_FREE:
12943                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12944
12945         case VM_BEHAVIOR_REUSABLE:
12946                 return vm_map_reusable_pages(map, start, end);
12947
12948         case VM_BEHAVIOR_REUSE:
12949                 return vm_map_reuse_pages(map, start, end);
12950
12951         case VM_BEHAVIOR_CAN_REUSE:
12952                 return vm_map_can_reuse(map, start, end);
12953
12954 #if MACH_ASSERT
12955         case VM_BEHAVIOR_PAGEOUT:
12956                 return vm_map_pageout(map, start, end);
12957 #endif /* MACH_ASSERT */
12958
12959         default:
12960                 return(KERN_INVALID_ARGUMENT);
12961         }
12962
12963         return(KERN_SUCCESS);
12964 }
12965
12966
12967 /*
12968  * Internals for madvise(MADV_WILLNEED) system call.
12969  *
12970  * The present implementation is to do a read-ahead if the mapping corresponds
12971  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
12972  * and basically ignore the "advice" (which we are always free to do).
12973  */
12974
12975
12976 static kern_return_t
12977 vm_map_willneed(
12978         vm_map_t        map,
12979         vm_map_offset_t start,
12980         vm_map_offset_t end
12981 )
12982 {
12983         vm_map_entry_t                  entry;
12984         vm_object_t                     object;
12985         memory_object_t                 pager;
12986         struct vm_object_fault_info     fault_info;
12987         kern_return_t                   kr;
12988         vm_object_size_t                len;
12989         vm_object_offset_t              offset;
12990
12991         /*
12992          * Fill in static values in fault_info.  Several fields get ignored by the code
12993          * we call, but we'll fill them in anyway since uninitialized fields are bad
12994          * when it comes to future backwards compatibility.
12995          */
12996
12997         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
12998         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
12999         fault_info.no_cache      = FALSE;                       /* ignored value */
13000         fault_info.stealth       = TRUE;
13001         fault_info.io_sync = FALSE;
13002         fault_info.cs_bypass = FALSE;
13003         fault_info.mark_zf_absent = FALSE;
13004         fault_info.batch_pmap_op = FALSE;
13005
13006         /*
13007          * The MADV_WILLNEED operation doesn't require any changes to the
13008          * vm_map_entry_t's, so the read lock is sufficient.
13009          */
13010
13011         vm_map_lock_read(map);
13012
13013         /*
13014          * The madvise semantics require that the address range be fully
13015          * allocated with no holes.  Otherwise, we're required to return
13016          * an error.
13017          */
13018
13019         if (! vm_map_range_check(map, start, end, &entry)) {
13020                 vm_map_unlock_read(map);
13021                 return KERN_INVALID_ADDRESS;
13022         }
13023
13024         /*
13025          * Examine each vm_map_entry_t in the range.
13026          */
13027         for (; entry != vm_map_to_entry(map) && start < end; ) {
13028
13029                 /*
13030                  * The first time through, the start address could be anywhere
13031                  * within the vm_map_entry we found.  So adjust the offset to
13032                  * correspond.  After that, the offset will always be zero to
13033                  * correspond to the beginning of the current vm_map_entry.
13034                  */
13035                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
13036
13037                 /*
13038                  * Set the length so we don't go beyond the end of the
13039                  * map_entry or beyond the end of the range we were given.
13040                  * This range could span also multiple map entries all of which
13041                  * map different files, so make sure we only do the right amount
13042                  * of I/O for each object.  Note that it's possible for there
13043                  * to be multiple map entries all referring to the same object
13044                  * but with different page permissions, but it's not worth
13045                  * trying to optimize that case.
13046                  */
13047                 len = MIN(entry->vme_end - start, end - start);
13048
13049                 if ((vm_size_t) len != len) {
13050                         /* 32-bit overflow */
13051                         len = (vm_size_t) (0 - PAGE_SIZE);
13052                 }
13053                 fault_info.cluster_size = (vm_size_t) len;
13054                 fault_info.lo_offset    = offset;
13055                 fault_info.hi_offset    = offset + len;
13056                 fault_info.user_tag     = VME_ALIAS(entry);
13057                 fault_info.pmap_options = 0;
13058                 if (entry->iokit_acct ||
13059                     (!entry->is_sub_map && !entry->use_pmap)) {
13060                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13061                 }
13062
13063                 /*
13064                  * If there's no read permission to this mapping, then just
13065                  * skip it.
13066                  */
13067                 if ((entry->protection & VM_PROT_READ) == 0) {
13068                         entry = entry->vme_next;
13069                         start = entry->vme_start;
13070                         continue;
13071                 }
13072
13073                 /*
13074                  * Find the file object backing this map entry.  If there is
13075                  * none, then we simply ignore the "will need" advice for this
13076                  * entry and go on to the next one.
13077                  */
13078                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
13079                         entry = entry->vme_next;
13080                         start = entry->vme_start;
13081                         continue;
13082                 }
13083
13084                 /*
13085                  * The data_request() could take a long time, so let's
13086                  * release the map lock to avoid blocking other threads.
13087                  */
13088                 vm_map_unlock_read(map);
13089
13090                 vm_object_paging_begin(object);
13091                 pager = object->pager;
13092                 vm_object_unlock(object);
13093
13094                 /*
13095                  * Get the data from the object asynchronously.
13096                  *
13097                  * Note that memory_object_data_request() places limits on the
13098                  * amount of I/O it will do.  Regardless of the len we
13099                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
13100                  * silently truncates the len to that size.  This isn't
13101                  * necessarily bad since madvise shouldn't really be used to
13102                  * page in unlimited amounts of data.  Other Unix variants
13103                  * limit the willneed case as well.  If this turns out to be an
13104                  * issue for developers, then we can always adjust the policy
13105                  * here and still be backwards compatible since this is all
13106                  * just "advice".
13107                  */
13108                 kr = memory_object_data_request(
13109                         pager,
13110                         offset + object->paging_offset,
13111                         0,      /* ignored */
13112                         VM_PROT_READ,
13113                         (memory_object_fault_info_t)&fault_info);
13114
13115                 vm_object_lock(object);
13116                 vm_object_paging_end(object);
13117                 vm_object_unlock(object);
13118
13119                 /*
13120                  * If we couldn't do the I/O for some reason, just give up on
13121                  * the madvise.  We still return success to the user since
13122                  * madvise isn't supposed to fail when the advice can't be
13123                  * taken.
13124                  */
13125                 if (kr != KERN_SUCCESS) {
13126                         return KERN_SUCCESS;
13127                 }
13128
13129                 start += len;
13130                 if (start >= end) {
13131                         /* done */
13132                         return KERN_SUCCESS;
13133                 }
13134
13135                 /* look up next entry */
13136                 vm_map_lock_read(map);
13137                 if (! vm_map_lookup_entry(map, start, &entry)) {
13138                         /*
13139                          * There's a new hole in the address range.
13140                          */
13141                         vm_map_unlock_read(map);
13142                         return KERN_INVALID_ADDRESS;
13143                 }
13144         }
13145
13146         vm_map_unlock_read(map);
13147         return KERN_SUCCESS;
13148 }
13149
13150 static boolean_t
13151 vm_map_entry_is_reusable(
13152         vm_map_entry_t entry)
13153 {
13154         /* Only user map entries */
13155
13156         vm_object_t object;
13157
13158         if (entry->is_sub_map) {
13159                 return FALSE;
13160         }
13161
13162         switch (VME_ALIAS(entry)) {
13163         case VM_MEMORY_MALLOC:
13164         case VM_MEMORY_MALLOC_SMALL:
13165         case VM_MEMORY_MALLOC_LARGE:
13166         case VM_MEMORY_REALLOC:
13167         case VM_MEMORY_MALLOC_TINY:
13168         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
13169         case VM_MEMORY_MALLOC_LARGE_REUSED:
13170                 /*
13171                  * This is a malloc() memory region: check if it's still
13172                  * in its original state and can be re-used for more
13173                  * malloc() allocations.
13174                  */
13175                 break;
13176         default:
13177                 /*
13178                  * Not a malloc() memory region: let the caller decide if
13179                  * it's re-usable.
13180                  */
13181                 return TRUE;
13182         }
13183
13184         if (entry->is_shared ||
13185             entry->is_sub_map ||
13186             entry->in_transition ||
13187             entry->protection != VM_PROT_DEFAULT ||
13188             entry->max_protection != VM_PROT_ALL ||
13189             entry->inheritance != VM_INHERIT_DEFAULT ||
13190             entry->no_cache ||
13191             entry->permanent ||
13192             entry->superpage_size != FALSE ||
13193             entry->zero_wired_pages ||
13194             entry->wired_count != 0 ||
13195             entry->user_wired_count != 0) {
13196                 return FALSE;
13197         }
13198
13199         object = VME_OBJECT(entry);
13200         if (object == VM_OBJECT_NULL) {
13201                 return TRUE;
13202         }
13203         if (
13204 #if 0
13205                 /*
13206                  * Let's proceed even if the VM object is potentially
13207                  * shared.
13208                  * We check for this later when processing the actual
13209                  * VM pages, so the contents will be safe if shared.
13210                  *
13211                  * But we can still mark this memory region as "reusable" to
13212                  * acknowledge that the caller did let us know that the memory
13213                  * could be re-used and should not be penalized for holding
13214                  * on to it.  This allows its "resident size" to not include
13215                  * the reusable range.
13216                  */
13217             object->ref_count == 1 &&
13218 #endif
13219             object->wired_page_count == 0 &&
13220             object->copy == VM_OBJECT_NULL &&
13221             object->shadow == VM_OBJECT_NULL &&
13222             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
13223             object->internal &&
13224             !object->true_share &&
13225             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
13226             !object->code_signed) {
13227                 return TRUE;
13228         }
13229         return FALSE;
13230
13231
13232 }
13233
13234 static kern_return_t
13235 vm_map_reuse_pages(
13236         vm_map_t        map,
13237         vm_map_offset_t start,
13238         vm_map_offset_t end)
13239 {
13240         vm_map_entry_t                  entry;
13241         vm_object_t                     object;
13242         vm_object_offset_t              start_offset, end_offset;
13243
13244         /*
13245          * The MADV_REUSE operation doesn't require any changes to the
13246          * vm_map_entry_t's, so the read lock is sufficient.
13247          */
13248
13249         vm_map_lock_read(map);
13250         assert(map->pmap != kernel_pmap);       /* protect alias access */
13251
13252         /*
13253          * The madvise semantics require that the address range be fully
13254          * allocated with no holes.  Otherwise, we're required to return
13255          * an error.
13256          */
13257
13258         if (!vm_map_range_check(map, start, end, &entry)) {
13259                 vm_map_unlock_read(map);
13260                 vm_page_stats_reusable.reuse_pages_failure++;
13261                 return KERN_INVALID_ADDRESS;
13262         }
13263
13264         /*
13265          * Examine each vm_map_entry_t in the range.
13266          */
13267         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13268              entry = entry->vme_next) {
13269                 /*
13270                  * Sanity check on the VM map entry.
13271                  */
13272                 if (! vm_map_entry_is_reusable(entry)) {
13273                         vm_map_unlock_read(map);
13274                         vm_page_stats_reusable.reuse_pages_failure++;
13275                         return KERN_INVALID_ADDRESS;
13276                 }
13277
13278                 /*
13279                  * The first time through, the start address could be anywhere
13280                  * within the vm_map_entry we found.  So adjust the offset to
13281                  * correspond.
13282                  */
13283                 if (entry->vme_start < start) {
13284                         start_offset = start - entry->vme_start;
13285                 } else {
13286                         start_offset = 0;
13287                 }
13288                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
13289                 start_offset += VME_OFFSET(entry);
13290                 end_offset += VME_OFFSET(entry);
13291
13292                 assert(!entry->is_sub_map);
13293                 object = VME_OBJECT(entry);
13294                 if (object != VM_OBJECT_NULL) {
13295                         vm_object_lock(object);
13296                         vm_object_reuse_pages(object, start_offset, end_offset,
13297                                               TRUE);
13298                         vm_object_unlock(object);
13299                 }
13300
13301                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
13302                         /*
13303                          * XXX
13304                          * We do not hold the VM map exclusively here.
13305                          * The "alias" field is not that critical, so it's
13306                          * safe to update it here, as long as it is the only
13307                          * one that can be modified while holding the VM map
13308                          * "shared".
13309                          */
13310                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
13311                 }
13312         }
13313
13314         vm_map_unlock_read(map);
13315         vm_page_stats_reusable.reuse_pages_success++;
13316         return KERN_SUCCESS;
13317 }
13318
13319
13320 static kern_return_t
13321 vm_map_reusable_pages(
13322         vm_map_t        map,
13323         vm_map_offset_t start,
13324         vm_map_offset_t end)
13325 {
13326         vm_map_entry_t                  entry;
13327         vm_object_t                     object;
13328         vm_object_offset_t              start_offset, end_offset;
13329         vm_map_offset_t                 pmap_offset;
13330
13331         /*
13332          * The MADV_REUSABLE operation doesn't require any changes to the
13333          * vm_map_entry_t's, so the read lock is sufficient.
13334          */
13335
13336         vm_map_lock_read(map);
13337         assert(map->pmap != kernel_pmap);       /* protect alias access */
13338
13339         /*
13340          * The madvise semantics require that the address range be fully
13341          * allocated with no holes.  Otherwise, we're required to return
13342          * an error.
13343          */
13344
13345         if (!vm_map_range_check(map, start, end, &entry)) {
13346                 vm_map_unlock_read(map);
13347                 vm_page_stats_reusable.reusable_pages_failure++;
13348                 return KERN_INVALID_ADDRESS;
13349         }
13350
13351         /*
13352          * Examine each vm_map_entry_t in the range.
13353          */
13354         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13355              entry = entry->vme_next) {
13356                 int kill_pages = 0;
13357
13358                 /*
13359                  * Sanity check on the VM map entry.
13360                  */
13361                 if (! vm_map_entry_is_reusable(entry)) {
13362                         vm_map_unlock_read(map);
13363                         vm_page_stats_reusable.reusable_pages_failure++;
13364                         return KERN_INVALID_ADDRESS;
13365                 }
13366
13367                 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
13368                         /* not writable: can't discard contents */
13369                         vm_map_unlock_read(map);
13370                         vm_page_stats_reusable.reusable_nonwritable++;
13371                         vm_page_stats_reusable.reusable_pages_failure++;
13372                         return KERN_PROTECTION_FAILURE;
13373                 }
13374
13375                 /*
13376                  * The first time through, the start address could be anywhere
13377                  * within the vm_map_entry we found.  So adjust the offset to
13378                  * correspond.
13379                  */
13380                 if (entry->vme_start < start) {
13381                         start_offset = start - entry->vme_start;
13382                         pmap_offset = start;
13383                 } else {
13384                         start_offset = 0;
13385                         pmap_offset = entry->vme_start;
13386                 }
13387                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
13388                 start_offset += VME_OFFSET(entry);
13389                 end_offset += VME_OFFSET(entry);
13390
13391                 assert(!entry->is_sub_map);
13392                 object = VME_OBJECT(entry);
13393                 if (object == VM_OBJECT_NULL)
13394                         continue;
13395
13396
13397                 vm_object_lock(object);
13398                 if (((object->ref_count == 1) ||
13399                      (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
13400                       object->copy == VM_OBJECT_NULL)) &&
13401                     object->shadow == VM_OBJECT_NULL &&
13402                     /*
13403                      * "iokit_acct" entries are billed for their virtual size
13404                      * (rather than for their resident pages only), so they
13405                      * wouldn't benefit from making pages reusable, and it
13406                      * would be hard to keep track of pages that are both
13407                      * "iokit_acct" and "reusable" in the pmap stats and
13408                      * ledgers.
13409                      */
13410                     !(entry->iokit_acct ||
13411                       (!entry->is_sub_map && !entry->use_pmap))) {
13412                         if (object->ref_count != 1) {
13413                                 vm_page_stats_reusable.reusable_shared++;
13414                         }
13415                         kill_pages = 1;
13416                 } else {
13417                         kill_pages = -1;
13418                 }
13419                 if (kill_pages != -1) {
13420                         vm_object_deactivate_pages(object,
13421                                                    start_offset,
13422                                                    end_offset - start_offset,
13423                                                    kill_pages,
13424                                                    TRUE /*reusable_pages*/,
13425                                                    map->pmap,
13426                                                    pmap_offset);
13427                 } else {
13428                         vm_page_stats_reusable.reusable_pages_shared++;
13429                 }
13430                 vm_object_unlock(object);
13431
13432                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13433                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
13434                         /*
13435                          * XXX
13436                          * We do not hold the VM map exclusively here.
13437                          * The "alias" field is not that critical, so it's
13438                          * safe to update it here, as long as it is the only
13439                          * one that can be modified while holding the VM map
13440                          * "shared".
13441                          */
13442                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
13443                 }
13444         }
13445
13446         vm_map_unlock_read(map);
13447         vm_page_stats_reusable.reusable_pages_success++;
13448         return KERN_SUCCESS;
13449 }
13450
13451
13452 static kern_return_t
13453 vm_map_can_reuse(
13454         vm_map_t        map,
13455         vm_map_offset_t start,
13456         vm_map_offset_t end)
13457 {
13458         vm_map_entry_t                  entry;
13459
13460         /*
13461          * The MADV_REUSABLE operation doesn't require any changes to the
13462          * vm_map_entry_t's, so the read lock is sufficient.
13463          */
13464
13465         vm_map_lock_read(map);
13466         assert(map->pmap != kernel_pmap);       /* protect alias access */
13467
13468         /*
13469          * The madvise semantics require that the address range be fully
13470          * allocated with no holes.  Otherwise, we're required to return
13471          * an error.
13472          */
13473
13474         if (!vm_map_range_check(map, start, end, &entry)) {
13475                 vm_map_unlock_read(map);
13476                 vm_page_stats_reusable.can_reuse_failure++;
13477                 return KERN_INVALID_ADDRESS;
13478         }
13479
13480         /*
13481          * Examine each vm_map_entry_t in the range.
13482          */
13483         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13484              entry = entry->vme_next) {
13485                 /*
13486                  * Sanity check on the VM map entry.
13487                  */
13488                 if (! vm_map_entry_is_reusable(entry)) {
13489                         vm_map_unlock_read(map);
13490                         vm_page_stats_reusable.can_reuse_failure++;
13491                         return KERN_INVALID_ADDRESS;
13492                 }
13493         }
13494
13495         vm_map_unlock_read(map);
13496         vm_page_stats_reusable.can_reuse_success++;
13497         return KERN_SUCCESS;
13498 }
13499
13500
13501 #if MACH_ASSERT
13502 static kern_return_t
13503 vm_map_pageout(
13504         vm_map_t        map,
13505         vm_map_offset_t start,
13506         vm_map_offset_t end)
13507 {
13508         vm_map_entry_t                  entry;
13509
13510         /*
13511          * The MADV_PAGEOUT operation doesn't require any changes to the
13512          * vm_map_entry_t's, so the read lock is sufficient.
13513          */
13514
13515         vm_map_lock_read(map);
13516
13517         /*
13518          * The madvise semantics require that the address range be fully
13519          * allocated with no holes.  Otherwise, we're required to return
13520          * an error.
13521          */
13522
13523         if (!vm_map_range_check(map, start, end, &entry)) {
13524                 vm_map_unlock_read(map);
13525                 return KERN_INVALID_ADDRESS;
13526         }
13527
13528         /*
13529          * Examine each vm_map_entry_t in the range.
13530          */
13531         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13532              entry = entry->vme_next) {
13533                 vm_object_t     object;
13534
13535                 /*
13536                  * Sanity check on the VM map entry.
13537                  */
13538                 if (entry->is_sub_map) {
13539                         vm_map_t submap;
13540                         vm_map_offset_t submap_start;
13541                         vm_map_offset_t submap_end;
13542                         vm_map_entry_t submap_entry;
13543
13544                         submap = VME_SUBMAP(entry);
13545                         submap_start = VME_OFFSET(entry);
13546                         submap_end = submap_start + (entry->vme_end -
13547                                                      entry->vme_start);
13548
13549                         vm_map_lock_read(submap);
13550
13551                         if (! vm_map_range_check(submap,
13552                                                  submap_start,
13553                                                  submap_end,
13554                                                  &submap_entry)) {
13555                                 vm_map_unlock_read(submap);
13556                                 vm_map_unlock_read(map);
13557                                 return KERN_INVALID_ADDRESS;
13558                         }
13559
13560                         object = VME_OBJECT(submap_entry);
13561                         if (submap_entry->is_sub_map ||
13562                             object == VM_OBJECT_NULL ||
13563                             !object->internal) {
13564                                 vm_map_unlock_read(submap);
13565                                 continue;
13566                         }
13567
13568                         vm_object_pageout(object);
13569
13570                         vm_map_unlock_read(submap);
13571                         submap = VM_MAP_NULL;
13572                         submap_entry = VM_MAP_ENTRY_NULL;
13573                         continue;
13574                 }
13575
13576                 object = VME_OBJECT(entry);
13577                 if (entry->is_sub_map ||
13578                     object == VM_OBJECT_NULL ||
13579                     !object->internal) {
13580                         continue;
13581                 }
13582
13583                 vm_object_pageout(object);
13584         }
13585
13586         vm_map_unlock_read(map);
13587         return KERN_SUCCESS;
13588 }
13589 #endif /* MACH_ASSERT */
13590
13591
13592 /*
13593  *      Routine:        vm_map_entry_insert
13594  *
13595  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
13596  */
13597 vm_map_entry_t
13598 vm_map_entry_insert(
13599         vm_map_t                map,
13600         vm_map_entry_t          insp_entry,
13601         vm_map_offset_t         start,
13602         vm_map_offset_t         end,
13603         vm_object_t             object,
13604         vm_object_offset_t      offset,
13605         boolean_t               needs_copy,
13606         boolean_t               is_shared,
13607         boolean_t               in_transition,
13608         vm_prot_t               cur_protection,
13609         vm_prot_t               max_protection,
13610         vm_behavior_t           behavior,
13611         vm_inherit_t            inheritance,
13612         unsigned                wired_count,
13613         boolean_t               no_cache,
13614         boolean_t               permanent,
13615         unsigned int            superpage_size,
13616         boolean_t               clear_map_aligned,
13617         boolean_t               is_submap)
13618 {
13619         vm_map_entry_t  new_entry;
13620
13621         assert(insp_entry != (vm_map_entry_t)0);
13622
13623         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
13624
13625         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13626                 new_entry->map_aligned = TRUE;
13627         } else {
13628                 new_entry->map_aligned = FALSE;
13629         }
13630         if (clear_map_aligned &&
13631             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13632              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
13633                 new_entry->map_aligned = FALSE;
13634         }
13635
13636         new_entry->vme_start = start;
13637         new_entry->vme_end = end;
13638         assert(page_aligned(new_entry->vme_start));
13639         assert(page_aligned(new_entry->vme_end));
13640         if (new_entry->map_aligned) {
13641                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13642                                            VM_MAP_PAGE_MASK(map)));
13643                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13644                                            VM_MAP_PAGE_MASK(map)));
13645         }
13646         assert(new_entry->vme_start < new_entry->vme_end);
13647
13648         VME_OBJECT_SET(new_entry, object);
13649         VME_OFFSET_SET(new_entry, offset);
13650         new_entry->is_shared = is_shared;
13651         new_entry->is_sub_map = is_submap;
13652         new_entry->needs_copy = needs_copy;
13653         new_entry->in_transition = in_transition;
13654         new_entry->needs_wakeup = FALSE;
13655         new_entry->inheritance = inheritance;
13656         new_entry->protection = cur_protection;
13657         new_entry->max_protection = max_protection;
13658         new_entry->behavior = behavior;
13659         new_entry->wired_count = wired_count;
13660         new_entry->user_wired_count = 0;
13661         if (is_submap) {
13662                 /*
13663                  * submap: "use_pmap" means "nested".
13664                  * default: false.
13665                  */
13666                 new_entry->use_pmap = FALSE;
13667         } else {
13668                 /*
13669                  * object: "use_pmap" means "use pmap accounting" for footprint.
13670                  * default: true.
13671                  */
13672                 new_entry->use_pmap = TRUE;
13673         }
13674         VME_ALIAS_SET(new_entry, 0);
13675         new_entry->zero_wired_pages = FALSE;
13676         new_entry->no_cache = no_cache;
13677         new_entry->permanent = permanent;
13678         if (superpage_size)
13679                 new_entry->superpage_size = TRUE;
13680         else
13681                 new_entry->superpage_size = FALSE;
13682         new_entry->used_for_jit = FALSE;
13683         new_entry->iokit_acct = FALSE;
13684         new_entry->vme_resilient_codesign = FALSE;
13685         new_entry->vme_resilient_media = FALSE;
13686         new_entry->vme_atomic = FALSE;
13687
13688         /*
13689          *      Insert the new entry into the list.
13690          */
13691
13692         vm_map_store_entry_link(map, insp_entry, new_entry);
13693         map->size += end - start;
13694
13695         /*
13696          *      Update the free space hint and the lookup hint.
13697          */
13698
13699         SAVE_HINT_MAP_WRITE(map, new_entry);
13700         return new_entry;
13701 }
13702
13703 /*
13704  *      Routine:        vm_map_remap_extract
13705  *
13706  *      Descritpion:    This routine returns a vm_entry list from a map.
13707  */
13708 static kern_return_t
13709 vm_map_remap_extract(
13710         vm_map_t                map,
13711         vm_map_offset_t         addr,
13712         vm_map_size_t           size,
13713         boolean_t               copy,
13714         struct vm_map_header    *map_header,
13715         vm_prot_t               *cur_protection,
13716         vm_prot_t               *max_protection,
13717         /* What, no behavior? */
13718         vm_inherit_t            inheritance,
13719         boolean_t               pageable,
13720         boolean_t               same_map)
13721 {
13722         kern_return_t           result;
13723         vm_map_size_t           mapped_size;
13724         vm_map_size_t           tmp_size;
13725         vm_map_entry_t          src_entry;     /* result of last map lookup */
13726         vm_map_entry_t          new_entry;
13727         vm_object_offset_t      offset;
13728         vm_map_offset_t         map_address;
13729         vm_map_offset_t         src_start;     /* start of entry to map */
13730         vm_map_offset_t         src_end;       /* end of region to be mapped */
13731         vm_object_t             object;
13732         vm_map_version_t        version;
13733         boolean_t               src_needs_copy;
13734         boolean_t               new_entry_needs_copy;
13735
13736         assert(map != VM_MAP_NULL);
13737         assert(size != 0);
13738         assert(size == vm_map_round_page(size, PAGE_MASK));
13739         assert(inheritance == VM_INHERIT_NONE ||
13740                inheritance == VM_INHERIT_COPY ||
13741                inheritance == VM_INHERIT_SHARE);
13742
13743         /*
13744          *      Compute start and end of region.
13745          */
13746         src_start = vm_map_trunc_page(addr, PAGE_MASK);
13747         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13748
13749
13750         /*
13751          *      Initialize map_header.
13752          */
13753         map_header->links.next = (struct vm_map_entry *)&map_header->links;
13754         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13755         map_header->nentries = 0;
13756         map_header->entries_pageable = pageable;
13757         map_header->page_shift = PAGE_SHIFT;
13758
13759         vm_map_store_init( map_header );
13760
13761         *cur_protection = VM_PROT_ALL;
13762         *max_protection = VM_PROT_ALL;
13763
13764         map_address = 0;
13765         mapped_size = 0;
13766         result = KERN_SUCCESS;
13767
13768         /*
13769          *      The specified source virtual space might correspond to
13770          *      multiple map entries, need to loop on them.
13771          */
13772         vm_map_lock(map);
13773         while (mapped_size != size) {
13774                 vm_map_size_t   entry_size;
13775
13776                 /*
13777                  *      Find the beginning of the region.
13778                  */
13779                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13780                         result = KERN_INVALID_ADDRESS;
13781                         break;
13782                 }
13783
13784                 if (src_start < src_entry->vme_start ||
13785                     (mapped_size && src_start != src_entry->vme_start)) {
13786                         result = KERN_INVALID_ADDRESS;
13787                         break;
13788                 }
13789
13790                 tmp_size = size - mapped_size;
13791                 if (src_end > src_entry->vme_end)
13792                         tmp_size -= (src_end - src_entry->vme_end);
13793
13794                 entry_size = (vm_map_size_t)(src_entry->vme_end -
13795                                              src_entry->vme_start);
13796
13797                 if(src_entry->is_sub_map) {
13798                         vm_map_reference(VME_SUBMAP(src_entry));
13799                         object = VM_OBJECT_NULL;
13800                 } else {
13801                         object = VME_OBJECT(src_entry);
13802                         if (src_entry->iokit_acct) {
13803                                 /*
13804                                  * This entry uses "IOKit accounting".
13805                                  */
13806                         } else if (object != VM_OBJECT_NULL &&
13807                                    object->purgable != VM_PURGABLE_DENY) {
13808                                 /*
13809                                  * Purgeable objects have their own accounting:
13810                                  * no pmap accounting for them.
13811                                  */
13812                                 assert(!src_entry->use_pmap);
13813                         } else {
13814                                 /*
13815                                  * Not IOKit or purgeable:
13816                                  * must be accounted by pmap stats.
13817                                  */
13818                                 assert(src_entry->use_pmap);
13819                         }
13820
13821                         if (object == VM_OBJECT_NULL) {
13822                                 object = vm_object_allocate(entry_size);
13823                                 VME_OFFSET_SET(src_entry, 0);
13824                                 VME_OBJECT_SET(src_entry, object);
13825                         } else if (object->copy_strategy !=
13826                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
13827                                 /*
13828                                  *      We are already using an asymmetric
13829                                  *      copy, and therefore we already have
13830                                  *      the right object.
13831                                  */
13832                                 assert(!src_entry->needs_copy);
13833                         } else if (src_entry->needs_copy || object->shadowed ||
13834                                    (object->internal && !object->true_share &&
13835                                     !src_entry->is_shared &&
13836                                     object->vo_size > entry_size)) {
13837
13838                                 VME_OBJECT_SHADOW(src_entry, entry_size);
13839
13840                                 if (!src_entry->needs_copy &&
13841                                     (src_entry->protection & VM_PROT_WRITE)) {
13842                                         vm_prot_t prot;
13843
13844                                         prot = src_entry->protection & ~VM_PROT_WRITE;
13845
13846                                         if (override_nx(map,
13847                                                         VME_ALIAS(src_entry))
13848                                             && prot)
13849                                                 prot |= VM_PROT_EXECUTE;
13850
13851                                         if(map->mapped_in_other_pmaps) {
13852                                                 vm_object_pmap_protect(
13853                                                         VME_OBJECT(src_entry),
13854                                                         VME_OFFSET(src_entry),
13855                                                         entry_size,
13856                                                         PMAP_NULL,
13857                                                         src_entry->vme_start,
13858                                                         prot);
13859                                         } else {
13860                                                 pmap_protect(vm_map_pmap(map),
13861                                                              src_entry->vme_start,
13862                                                              src_entry->vme_end,
13863                                                              prot);
13864                                         }
13865                                 }
13866
13867                                 object = VME_OBJECT(src_entry);
13868                                 src_entry->needs_copy = FALSE;
13869                         }
13870
13871
13872                         vm_object_lock(object);
13873                         vm_object_reference_locked(object); /* object ref. for new entry */
13874                         if (object->copy_strategy ==
13875                             MEMORY_OBJECT_COPY_SYMMETRIC) {
13876                                 object->copy_strategy =
13877                                         MEMORY_OBJECT_COPY_DELAY;
13878                         }
13879                         vm_object_unlock(object);
13880                 }
13881
13882                 offset = (VME_OFFSET(src_entry) +
13883                           (src_start - src_entry->vme_start));
13884
13885                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
13886                 vm_map_entry_copy(new_entry, src_entry);
13887                 if (new_entry->is_sub_map) {
13888                         /* clr address space specifics */
13889                         new_entry->use_pmap = FALSE;
13890                 }
13891
13892                 new_entry->map_aligned = FALSE;
13893
13894                 new_entry->vme_start = map_address;
13895                 new_entry->vme_end = map_address + tmp_size;
13896                 assert(new_entry->vme_start < new_entry->vme_end);
13897                 new_entry->inheritance = inheritance;
13898                 VME_OFFSET_SET(new_entry, offset);
13899
13900                 /*
13901                  * The new region has to be copied now if required.
13902                  */
13903         RestartCopy:
13904                 if (!copy) {
13905                         /*
13906                          * Cannot allow an entry describing a JIT
13907                          * region to be shared across address spaces.
13908                          */
13909                         if (src_entry->used_for_jit == TRUE && !same_map) {
13910                                 result = KERN_INVALID_ARGUMENT;
13911                                 break;
13912                         }
13913                         src_entry->is_shared = TRUE;
13914                         new_entry->is_shared = TRUE;
13915                         if (!(new_entry->is_sub_map))
13916                                 new_entry->needs_copy = FALSE;
13917
13918                 } else if (src_entry->is_sub_map) {
13919                         /* make this a COW sub_map if not already */
13920                         assert(new_entry->wired_count == 0);
13921                         new_entry->needs_copy = TRUE;
13922                         object = VM_OBJECT_NULL;
13923                 } else if (src_entry->wired_count == 0 &&
13924                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
13925                                                   VME_OFFSET(new_entry),
13926                                                   (new_entry->vme_end -
13927                                                    new_entry->vme_start),
13928                                                   &src_needs_copy,
13929                                                   &new_entry_needs_copy)) {
13930
13931                         new_entry->needs_copy = new_entry_needs_copy;
13932                         new_entry->is_shared = FALSE;
13933
13934                         /*
13935                          * Handle copy_on_write semantics.
13936                          */
13937                         if (src_needs_copy && !src_entry->needs_copy) {
13938                                 vm_prot_t prot;
13939
13940                                 prot = src_entry->protection & ~VM_PROT_WRITE;
13941
13942                                 if (override_nx(map,
13943                                                 VME_ALIAS(src_entry))
13944                                     && prot)
13945                                         prot |= VM_PROT_EXECUTE;
13946
13947                                 vm_object_pmap_protect(object,
13948                                                        offset,
13949                                                        entry_size,
13950                                                        ((src_entry->is_shared
13951                                                          || map->mapped_in_other_pmaps) ?
13952                                                         PMAP_NULL : map->pmap),
13953                                                        src_entry->vme_start,
13954                                                        prot);
13955
13956                                 assert(src_entry->wired_count == 0);
13957                                 src_entry->needs_copy = TRUE;
13958                         }
13959                         /*
13960                          * Throw away the old object reference of the new entry.
13961                          */
13962                         vm_object_deallocate(object);
13963
13964                 } else {
13965                         new_entry->is_shared = FALSE;
13966
13967                         /*
13968                          * The map can be safely unlocked since we
13969                          * already hold a reference on the object.
13970                          *
13971                          * Record the timestamp of the map for later
13972                          * verification, and unlock the map.
13973                          */
13974                         version.main_timestamp = map->timestamp;
13975                         vm_map_unlock(map);     /* Increments timestamp once! */
13976
13977                         /*
13978                          * Perform the copy.
13979                          */
13980                         if (src_entry->wired_count > 0) {
13981                                 vm_object_lock(object);
13982                                 result = vm_object_copy_slowly(
13983                                         object,
13984                                         offset,
13985                                         entry_size,
13986                                         THREAD_UNINT,
13987                                         &VME_OBJECT(new_entry));
13988
13989                                 VME_OFFSET_SET(new_entry, 0);
13990                                 new_entry->needs_copy = FALSE;
13991                         } else {
13992                                 vm_object_offset_t new_offset;
13993
13994                                 new_offset = VME_OFFSET(new_entry);
13995                                 result = vm_object_copy_strategically(
13996                                         object,
13997                                         offset,
13998                                         entry_size,
13999                                         &VME_OBJECT(new_entry),
14000                                         &new_offset,
14001                                         &new_entry_needs_copy);
14002                                 if (new_offset != VME_OFFSET(new_entry)) {
14003                                         VME_OFFSET_SET(new_entry, new_offset);
14004                                 }
14005
14006                                 new_entry->needs_copy = new_entry_needs_copy;
14007                         }
14008
14009                         /*
14010                          * Throw away the old object reference of the new entry.
14011                          */
14012                         vm_object_deallocate(object);
14013
14014                         if (result != KERN_SUCCESS &&
14015                             result != KERN_MEMORY_RESTART_COPY) {
14016                                 _vm_map_entry_dispose(map_header, new_entry);
14017                                 vm_map_lock(map);
14018                                 break;
14019                         }
14020
14021                         /*
14022                          * Verify that the map has not substantially
14023                          * changed while the copy was being made.
14024                          */
14025
14026                         vm_map_lock(map);
14027                         if (version.main_timestamp + 1 != map->timestamp) {
14028                                 /*
14029                                  * Simple version comparison failed.
14030                                  *
14031                                  * Retry the lookup and verify that the
14032                                  * same object/offset are still present.
14033                                  */
14034                                 vm_object_deallocate(VME_OBJECT(new_entry));
14035                                 _vm_map_entry_dispose(map_header, new_entry);
14036                                 if (result == KERN_MEMORY_RESTART_COPY)
14037                                         result = KERN_SUCCESS;
14038                                 continue;
14039                         }
14040
14041                         if (result == KERN_MEMORY_RESTART_COPY) {
14042                                 vm_object_reference(object);
14043                                 goto RestartCopy;
14044                         }
14045                 }
14046
14047                 _vm_map_store_entry_link(map_header,
14048                                    map_header->links.prev, new_entry);
14049
14050                 /*Protections for submap mapping are irrelevant here*/
14051                 if( !src_entry->is_sub_map ) {
14052                         *cur_protection &= src_entry->protection;
14053                         *max_protection &= src_entry->max_protection;
14054                 }
14055                 map_address += tmp_size;
14056                 mapped_size += tmp_size;
14057                 src_start += tmp_size;
14058
14059         } /* end while */
14060
14061         vm_map_unlock(map);
14062         if (result != KERN_SUCCESS) {
14063                 /*
14064                  * Free all allocated elements.
14065                  */
14066                 for (src_entry = map_header->links.next;
14067                      src_entry != (struct vm_map_entry *)&map_header->links;
14068                      src_entry = new_entry) {
14069                         new_entry = src_entry->vme_next;
14070                         _vm_map_store_entry_unlink(map_header, src_entry);
14071                         if (src_entry->is_sub_map) {
14072                                 vm_map_deallocate(VME_SUBMAP(src_entry));
14073                         } else {
14074                                 vm_object_deallocate(VME_OBJECT(src_entry));
14075                         }
14076                         _vm_map_entry_dispose(map_header, src_entry);
14077                 }
14078         }
14079         return result;
14080 }
14081
14082 /*
14083  *      Routine:        vm_remap
14084  *
14085  *                      Map portion of a task's address space.
14086  *                      Mapped region must not overlap more than
14087  *                      one vm memory object. Protections and
14088  *                      inheritance attributes remain the same
14089  *                      as in the original task and are out parameters.
14090  *                      Source and Target task can be identical
14091  *                      Other attributes are identical as for vm_map()
14092  */
14093 kern_return_t
14094 vm_map_remap(
14095         vm_map_t                target_map,
14096         vm_map_address_t        *address,
14097         vm_map_size_t           size,
14098         vm_map_offset_t         mask,
14099         int                     flags,
14100         vm_map_t                src_map,
14101         vm_map_offset_t         memory_address,
14102         boolean_t               copy,
14103         vm_prot_t               *cur_protection,
14104         vm_prot_t               *max_protection,
14105         vm_inherit_t            inheritance)
14106 {
14107         kern_return_t           result;
14108         vm_map_entry_t          entry;
14109         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
14110         vm_map_entry_t          new_entry;
14111         struct vm_map_header    map_header;
14112         vm_map_offset_t         offset_in_mapping;
14113
14114         if (target_map == VM_MAP_NULL)
14115                 return KERN_INVALID_ARGUMENT;
14116
14117         switch (inheritance) {
14118         case VM_INHERIT_NONE:
14119         case VM_INHERIT_COPY:
14120         case VM_INHERIT_SHARE:
14121                 if (size != 0 && src_map != VM_MAP_NULL)
14122                         break;
14123                 /*FALL THRU*/
14124         default:
14125                 return KERN_INVALID_ARGUMENT;
14126         }
14127
14128         /*
14129          * If the user is requesting that we return the address of the
14130          * first byte of the data (rather than the base of the page),
14131          * then we use different rounding semantics: specifically,
14132          * we assume that (memory_address, size) describes a region
14133          * all of whose pages we must cover, rather than a base to be truncated
14134          * down and a size to be added to that base.  So we figure out
14135          * the highest page that the requested region includes and make
14136          * sure that the size will cover it.
14137          *
14138          * The key example we're worried about it is of the form:
14139          *
14140          *              memory_address = 0x1ff0, size = 0x20
14141          *
14142          * With the old semantics, we round down the memory_address to 0x1000
14143          * and round up the size to 0x1000, resulting in our covering *only*
14144          * page 0x1000.  With the new semantics, we'd realize that the region covers
14145          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
14146          * 0x1000 and page 0x2000 in the region we remap.
14147          */
14148         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14149                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
14150                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
14151         } else {
14152                 size = vm_map_round_page(size, PAGE_MASK);
14153         }
14154
14155         result = vm_map_remap_extract(src_map, memory_address,
14156                                       size, copy, &map_header,
14157                                       cur_protection,
14158                                       max_protection,
14159                                       inheritance,
14160                                       target_map->hdr.entries_pageable,
14161                                       src_map == target_map);
14162
14163         if (result != KERN_SUCCESS) {
14164                 return result;
14165         }
14166
14167         /*
14168          * Allocate/check a range of free virtual address
14169          * space for the target
14170          */
14171         *address = vm_map_trunc_page(*address,
14172                                      VM_MAP_PAGE_MASK(target_map));
14173         vm_map_lock(target_map);
14174         result = vm_map_remap_range_allocate(target_map, address, size,
14175                                              mask, flags, &insp_entry);
14176
14177         for (entry = map_header.links.next;
14178              entry != (struct vm_map_entry *)&map_header.links;
14179              entry = new_entry) {
14180                 new_entry = entry->vme_next;
14181                 _vm_map_store_entry_unlink(&map_header, entry);
14182                 if (result == KERN_SUCCESS) {
14183                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14184                                 /* no codesigning -> read-only access */
14185                                 assert(!entry->used_for_jit);
14186                                 entry->max_protection = VM_PROT_READ;
14187                                 entry->protection = VM_PROT_READ;
14188                                 entry->vme_resilient_codesign = TRUE;
14189                         }
14190                         entry->vme_start += *address;
14191                         entry->vme_end += *address;
14192                         assert(!entry->map_aligned);
14193                         vm_map_store_entry_link(target_map, insp_entry, entry);
14194                         insp_entry = entry;
14195                 } else {
14196                         if (!entry->is_sub_map) {
14197                                 vm_object_deallocate(VME_OBJECT(entry));
14198                         } else {
14199                                 vm_map_deallocate(VME_SUBMAP(entry));
14200                         }
14201                         _vm_map_entry_dispose(&map_header, entry);
14202                 }
14203         }
14204
14205         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14206                 *cur_protection = VM_PROT_READ;
14207                 *max_protection = VM_PROT_READ;
14208         }
14209
14210         if( target_map->disable_vmentry_reuse == TRUE) {
14211                 assert(!target_map->is_nested_map);
14212                 if( target_map->highest_entry_end < insp_entry->vme_end ){
14213                         target_map->highest_entry_end = insp_entry->vme_end;
14214                 }
14215         }
14216
14217         if (result == KERN_SUCCESS) {
14218                 target_map->size += size;
14219                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
14220         }
14221         vm_map_unlock(target_map);
14222
14223         if (result == KERN_SUCCESS && target_map->wiring_required)
14224                 result = vm_map_wire(target_map, *address,
14225                                      *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
14226                                      TRUE);
14227
14228         /*
14229          * If requested, return the address of the data pointed to by the
14230          * request, rather than the base of the resulting page.
14231          */
14232         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14233                 *address += offset_in_mapping;
14234         }
14235
14236         return result;
14237 }
14238
14239 /*
14240  *      Routine:        vm_map_remap_range_allocate
14241  *
14242  *      Description:
14243  *              Allocate a range in the specified virtual address map.
14244  *              returns the address and the map entry just before the allocated
14245  *              range
14246  *
14247  *      Map must be locked.
14248  */
14249
14250 static kern_return_t
14251 vm_map_remap_range_allocate(
14252         vm_map_t                map,
14253         vm_map_address_t        *address,       /* IN/OUT */
14254         vm_map_size_t           size,
14255         vm_map_offset_t         mask,
14256         int                     flags,
14257         vm_map_entry_t          *map_entry)     /* OUT */
14258 {
14259         vm_map_entry_t  entry;
14260         vm_map_offset_t start;
14261         vm_map_offset_t end;
14262         kern_return_t   kr;
14263         vm_map_entry_t          hole_entry;
14264
14265 StartAgain: ;
14266
14267         start = *address;
14268
14269         if (flags & VM_FLAGS_ANYWHERE)
14270         {
14271                 if (flags & VM_FLAGS_RANDOM_ADDR)
14272                 {
14273                         /*
14274                          * Get a random start address.
14275                          */
14276                         kr = vm_map_random_address_for_size(map, address, size);
14277                         if (kr != KERN_SUCCESS) {
14278                                 return(kr);
14279                         }
14280                         start = *address;
14281                 }
14282
14283                 /*
14284                  *      Calculate the first possible address.
14285                  */
14286
14287                 if (start < map->min_offset)
14288                         start = map->min_offset;
14289                 if (start > map->max_offset)
14290                         return(KERN_NO_SPACE);
14291
14292                 /*
14293                  *      Look for the first possible address;
14294                  *      if there's already something at this
14295                  *      address, we have to start after it.
14296                  */
14297
14298                 if( map->disable_vmentry_reuse == TRUE) {
14299                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
14300                 } else {
14301
14302                         if (map->holelistenabled) {
14303                                 hole_entry = (vm_map_entry_t)map->holes_list;
14304
14305                                 if (hole_entry == NULL) {
14306                                         /*
14307                                          * No more space in the map?
14308                                          */
14309                                         return(KERN_NO_SPACE);
14310                                 } else {
14311
14312                                         boolean_t found_hole = FALSE;
14313
14314                                         do {
14315                                                 if (hole_entry->vme_start >= start) {
14316                                                         start = hole_entry->vme_start;
14317                                                         found_hole = TRUE;
14318                                                         break;
14319                                                 }
14320
14321                                                 if (hole_entry->vme_end > start) {
14322                                                         found_hole = TRUE;
14323                                                         break;
14324                                                 }
14325                                                 hole_entry = hole_entry->vme_next;
14326
14327                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
14328
14329                                         if (found_hole == FALSE) {
14330                                                 return (KERN_NO_SPACE);
14331                                         }
14332
14333                                         entry = hole_entry;
14334                                 }
14335                         } else {
14336                                 assert(first_free_is_valid(map));
14337                                 if (start == map->min_offset) {
14338                                         if ((entry = map->first_free) != vm_map_to_entry(map))
14339                                                 start = entry->vme_end;
14340                                 } else {
14341                                         vm_map_entry_t  tmp_entry;
14342                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
14343                                                 start = tmp_entry->vme_end;
14344                                         entry = tmp_entry;
14345                                 }
14346                         }
14347                         start = vm_map_round_page(start,
14348                                                   VM_MAP_PAGE_MASK(map));
14349                 }
14350
14351                 /*
14352                  *      In any case, the "entry" always precedes
14353                  *      the proposed new region throughout the
14354                  *      loop:
14355                  */
14356
14357                 while (TRUE) {
14358                         vm_map_entry_t  next;
14359
14360                         /*
14361                          *      Find the end of the proposed new region.
14362                          *      Be sure we didn't go beyond the end, or
14363                          *      wrap around the address.
14364                          */
14365
14366                         end = ((start + mask) & ~mask);
14367                         end = vm_map_round_page(end,
14368                                                 VM_MAP_PAGE_MASK(map));
14369                         if (end < start)
14370                                 return(KERN_NO_SPACE);
14371                         start = end;
14372                         end += size;
14373
14374                         if ((end > map->max_offset) || (end < start)) {
14375                                 if (map->wait_for_space) {
14376                                         if (size <= (map->max_offset -
14377                                                      map->min_offset)) {
14378                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
14379                                                 vm_map_unlock(map);
14380                                                 thread_block(THREAD_CONTINUE_NULL);
14381                                                 vm_map_lock(map);
14382                                                 goto StartAgain;
14383                                         }
14384                                 }
14385
14386                                 return(KERN_NO_SPACE);
14387                         }
14388
14389                         next = entry->vme_next;
14390
14391                         if (map->holelistenabled) {
14392                                 if (entry->vme_end >= end)
14393                                         break;
14394                         } else {
14395                                 /*
14396                                  *      If there are no more entries, we must win.
14397                                  *
14398                                  *      OR
14399                                  *
14400                                  *      If there is another entry, it must be
14401                                  *      after the end of the potential new region.
14402                                  */
14403
14404                                 if (next == vm_map_to_entry(map))
14405                                         break;
14406
14407                                 if (next->vme_start >= end)
14408                                         break;
14409                         }
14410
14411                         /*
14412                          *      Didn't fit -- move to the next entry.
14413                          */
14414
14415                         entry = next;
14416
14417                         if (map->holelistenabled) {
14418                                 if (entry == (vm_map_entry_t) map->holes_list) {
14419                                         /*
14420                                          * Wrapped around
14421                                          */
14422                                         return(KERN_NO_SPACE);
14423                                 }
14424                                 start = entry->vme_start;
14425                         } else {
14426                                 start = entry->vme_end;
14427                         }
14428                 }
14429
14430                 if (map->holelistenabled) {
14431
14432                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
14433                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
14434                         }
14435                 }
14436
14437                 *address = start;
14438
14439         } else {
14440                 vm_map_entry_t          temp_entry;
14441
14442                 /*
14443                  *      Verify that:
14444                  *              the address doesn't itself violate
14445                  *              the mask requirement.
14446                  */
14447
14448                 if ((start & mask) != 0)
14449                         return(KERN_NO_SPACE);
14450
14451
14452                 /*
14453                  *      ...     the address is within bounds
14454                  */
14455
14456                 end = start + size;
14457
14458                 if ((start < map->min_offset) ||
14459                     (end > map->max_offset) ||
14460                     (start >= end)) {
14461                         return(KERN_INVALID_ADDRESS);
14462                 }
14463
14464                 /*
14465                  * If we're asked to overwrite whatever was mapped in that
14466                  * range, first deallocate that range.
14467                  */
14468                 if (flags & VM_FLAGS_OVERWRITE) {
14469                         vm_map_t zap_map;
14470
14471                         /*
14472                          * We use a "zap_map" to avoid having to unlock
14473                          * the "map" in vm_map_delete(), which would compromise
14474                          * the atomicity of the "deallocate" and then "remap"
14475                          * combination.
14476                          */
14477                         zap_map = vm_map_create(PMAP_NULL,
14478                                                 start,
14479                                                 end,
14480                                                 map->hdr.entries_pageable);
14481                         if (zap_map == VM_MAP_NULL) {
14482                                 return KERN_RESOURCE_SHORTAGE;
14483                         }
14484                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
14485                         vm_map_disable_hole_optimization(zap_map);
14486
14487                         kr = vm_map_delete(map, start, end,
14488                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
14489                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
14490                                            zap_map);
14491                         if (kr == KERN_SUCCESS) {
14492                                 vm_map_destroy(zap_map,
14493                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14494                                 zap_map = VM_MAP_NULL;
14495                         }
14496                 }
14497
14498                 /*
14499                  *      ...     the starting address isn't allocated
14500                  */
14501
14502                 if (vm_map_lookup_entry(map, start, &temp_entry))
14503                         return(KERN_NO_SPACE);
14504
14505                 entry = temp_entry;
14506
14507                 /*
14508                  *      ...     the next region doesn't overlap the
14509                  *              end point.
14510                  */
14511
14512                 if ((entry->vme_next != vm_map_to_entry(map)) &&
14513                     (entry->vme_next->vme_start < end))
14514                         return(KERN_NO_SPACE);
14515         }
14516         *map_entry = entry;
14517         return(KERN_SUCCESS);
14518 }
14519
14520 /*
14521  *      vm_map_switch:
14522  *
14523  *      Set the address map for the current thread to the specified map
14524  */
14525
14526 vm_map_t
14527 vm_map_switch(
14528         vm_map_t        map)
14529 {
14530         int             mycpu;
14531         thread_t        thread = current_thread();
14532         vm_map_t        oldmap = thread->map;
14533
14534         mp_disable_preemption();
14535         mycpu = cpu_number();
14536
14537         /*
14538          *      Deactivate the current map and activate the requested map
14539          */
14540         PMAP_SWITCH_USER(thread, map, mycpu);
14541
14542         mp_enable_preemption();
14543         return(oldmap);
14544 }
14545
14546
14547 /*
14548  *      Routine:        vm_map_write_user
14549  *
14550  *      Description:
14551  *              Copy out data from a kernel space into space in the
14552  *              destination map. The space must already exist in the
14553  *              destination map.
14554  *              NOTE:  This routine should only be called by threads
14555  *              which can block on a page fault. i.e. kernel mode user
14556  *              threads.
14557  *
14558  */
14559 kern_return_t
14560 vm_map_write_user(
14561         vm_map_t                map,
14562         void                    *src_p,
14563         vm_map_address_t        dst_addr,
14564         vm_size_t               size)
14565 {
14566         kern_return_t   kr = KERN_SUCCESS;
14567
14568         if(current_map() == map) {
14569                 if (copyout(src_p, dst_addr, size)) {
14570                         kr = KERN_INVALID_ADDRESS;
14571                 }
14572         } else {
14573                 vm_map_t        oldmap;
14574
14575                 /* take on the identity of the target map while doing */
14576                 /* the transfer */
14577
14578                 vm_map_reference(map);
14579                 oldmap = vm_map_switch(map);
14580                 if (copyout(src_p, dst_addr, size)) {
14581                         kr = KERN_INVALID_ADDRESS;
14582                 }
14583                 vm_map_switch(oldmap);
14584                 vm_map_deallocate(map);
14585         }
14586         return kr;
14587 }
14588
14589 /*
14590  *      Routine:        vm_map_read_user
14591  *
14592  *      Description:
14593  *              Copy in data from a user space source map into the
14594  *              kernel map. The space must already exist in the
14595  *              kernel map.
14596  *              NOTE:  This routine should only be called by threads
14597  *              which can block on a page fault. i.e. kernel mode user
14598  *              threads.
14599  *
14600  */
14601 kern_return_t
14602 vm_map_read_user(
14603         vm_map_t                map,
14604         vm_map_address_t        src_addr,
14605         void                    *dst_p,
14606         vm_size_t               size)
14607 {
14608         kern_return_t   kr = KERN_SUCCESS;
14609
14610         if(current_map() == map) {
14611                 if (copyin(src_addr, dst_p, size)) {
14612                         kr = KERN_INVALID_ADDRESS;
14613                 }
14614         } else {
14615                 vm_map_t        oldmap;
14616
14617                 /* take on the identity of the target map while doing */
14618                 /* the transfer */
14619
14620                 vm_map_reference(map);
14621                 oldmap = vm_map_switch(map);
14622                 if (copyin(src_addr, dst_p, size)) {
14623                         kr = KERN_INVALID_ADDRESS;
14624                 }
14625                 vm_map_switch(oldmap);
14626                 vm_map_deallocate(map);
14627         }
14628         return kr;
14629 }
14630
14631
14632 /*
14633  *      vm_map_check_protection:
14634  *
14635  *      Assert that the target map allows the specified
14636  *      privilege on the entire address region given.
14637  *      The entire region must be allocated.
14638  */
14639 boolean_t
14640 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14641                         vm_map_offset_t end, vm_prot_t protection)
14642 {
14643         vm_map_entry_t entry;
14644         vm_map_entry_t tmp_entry;
14645
14646         vm_map_lock(map);
14647
14648         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
14649         {
14650                 vm_map_unlock(map);
14651                 return (FALSE);
14652         }
14653
14654         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14655                 vm_map_unlock(map);
14656                 return(FALSE);
14657         }
14658
14659         entry = tmp_entry;
14660
14661         while (start < end) {
14662                 if (entry == vm_map_to_entry(map)) {
14663                         vm_map_unlock(map);
14664                         return(FALSE);
14665                 }
14666
14667                 /*
14668                  *      No holes allowed!
14669                  */
14670
14671                 if (start < entry->vme_start) {
14672                         vm_map_unlock(map);
14673                         return(FALSE);
14674                 }
14675
14676                 /*
14677                  * Check protection associated with entry.
14678                  */
14679
14680                 if ((entry->protection & protection) != protection) {
14681                         vm_map_unlock(map);
14682                         return(FALSE);
14683                 }
14684
14685                 /* go to next entry */
14686
14687                 start = entry->vme_end;
14688                 entry = entry->vme_next;
14689         }
14690         vm_map_unlock(map);
14691         return(TRUE);
14692 }
14693
14694 kern_return_t
14695 vm_map_purgable_control(
14696         vm_map_t                map,
14697         vm_map_offset_t         address,
14698         vm_purgable_t           control,
14699         int                     *state)
14700 {
14701         vm_map_entry_t          entry;
14702         vm_object_t             object;
14703         kern_return_t           kr;
14704         boolean_t               was_nonvolatile;
14705
14706         /*
14707          * Vet all the input parameters and current type and state of the
14708          * underlaying object.  Return with an error if anything is amiss.
14709          */
14710         if (map == VM_MAP_NULL)
14711                 return(KERN_INVALID_ARGUMENT);
14712
14713         if (control != VM_PURGABLE_SET_STATE &&
14714             control != VM_PURGABLE_GET_STATE &&
14715             control != VM_PURGABLE_PURGE_ALL)
14716                 return(KERN_INVALID_ARGUMENT);
14717
14718         if (control == VM_PURGABLE_PURGE_ALL) {
14719                 vm_purgeable_object_purge_all();
14720                 return KERN_SUCCESS;
14721         }
14722
14723         if (control == VM_PURGABLE_SET_STATE &&
14724             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
14725              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
14726                 return(KERN_INVALID_ARGUMENT);
14727
14728         vm_map_lock_read(map);
14729
14730         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14731
14732                 /*
14733                  * Must pass a valid non-submap address.
14734                  */
14735                 vm_map_unlock_read(map);
14736                 return(KERN_INVALID_ADDRESS);
14737         }
14738
14739         if ((entry->protection & VM_PROT_WRITE) == 0) {
14740                 /*
14741                  * Can't apply purgable controls to something you can't write.
14742                  */
14743                 vm_map_unlock_read(map);
14744                 return(KERN_PROTECTION_FAILURE);
14745         }
14746
14747         object = VME_OBJECT(entry);
14748         if (object == VM_OBJECT_NULL ||
14749             object->purgable == VM_PURGABLE_DENY) {
14750                 /*
14751                  * Object must already be present and be purgeable.
14752                  */
14753                 vm_map_unlock_read(map);
14754                 return KERN_INVALID_ARGUMENT;
14755         }
14756
14757         vm_object_lock(object);
14758
14759 #if 00
14760         if (VME_OFFSET(entry) != 0 ||
14761             entry->vme_end - entry->vme_start != object->vo_size) {
14762                 /*
14763                  * Can only apply purgable controls to the whole (existing)
14764                  * object at once.
14765                  */
14766                 vm_map_unlock_read(map);
14767                 vm_object_unlock(object);
14768                 return KERN_INVALID_ARGUMENT;
14769         }
14770 #endif
14771
14772         assert(!entry->is_sub_map);
14773         assert(!entry->use_pmap); /* purgeable has its own accounting */
14774
14775         vm_map_unlock_read(map);
14776
14777         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14778
14779         kr = vm_object_purgable_control(object, control, state);
14780
14781         if (was_nonvolatile &&
14782             object->purgable != VM_PURGABLE_NONVOLATILE &&
14783             map->pmap == kernel_pmap) {
14784 #if DEBUG
14785                 object->vo_purgeable_volatilizer = kernel_task;
14786 #endif /* DEBUG */
14787         }
14788
14789         vm_object_unlock(object);
14790
14791         return kr;
14792 }
14793
14794 kern_return_t
14795 vm_map_page_query_internal(
14796         vm_map_t        target_map,
14797         vm_map_offset_t offset,
14798         int             *disposition,
14799         int             *ref_count)
14800 {
14801         kern_return_t                   kr;
14802         vm_page_info_basic_data_t       info;
14803         mach_msg_type_number_t          count;
14804
14805         count = VM_PAGE_INFO_BASIC_COUNT;
14806         kr = vm_map_page_info(target_map,
14807                               offset,
14808                               VM_PAGE_INFO_BASIC,
14809                               (vm_page_info_t) &info,
14810                               &count);
14811         if (kr == KERN_SUCCESS) {
14812                 *disposition = info.disposition;
14813                 *ref_count = info.ref_count;
14814         } else {
14815                 *disposition = 0;
14816                 *ref_count = 0;
14817         }
14818
14819         return kr;
14820 }
14821
14822 kern_return_t
14823 vm_map_page_info(
14824         vm_map_t                map,
14825         vm_map_offset_t         offset,
14826         vm_page_info_flavor_t   flavor,
14827         vm_page_info_t          info,
14828         mach_msg_type_number_t  *count)
14829 {
14830         vm_map_entry_t          map_entry;
14831         vm_object_t             object;
14832         vm_page_t               m;
14833         kern_return_t           retval = KERN_SUCCESS;
14834         boolean_t               top_object;
14835         int                     disposition;
14836         int                     ref_count;
14837         vm_page_info_basic_t    basic_info;
14838         int                     depth;
14839         vm_map_offset_t         offset_in_page;
14840
14841         switch (flavor) {
14842         case VM_PAGE_INFO_BASIC:
14843                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
14844                         /*
14845                          * The "vm_page_info_basic_data" structure was not
14846                          * properly padded, so allow the size to be off by
14847                          * one to maintain backwards binary compatibility...
14848                          */
14849                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14850                                 return KERN_INVALID_ARGUMENT;
14851                 }
14852                 break;
14853         default:
14854                 return KERN_INVALID_ARGUMENT;
14855         }
14856
14857         disposition = 0;
14858         ref_count = 0;
14859         top_object = TRUE;
14860         depth = 0;
14861
14862         retval = KERN_SUCCESS;
14863         offset_in_page = offset & PAGE_MASK;
14864         offset = vm_map_trunc_page(offset, PAGE_MASK);
14865
14866         vm_map_lock_read(map);
14867
14868         /*
14869          * First, find the map entry covering "offset", going down
14870          * submaps if necessary.
14871          */
14872         for (;;) {
14873                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14874                         vm_map_unlock_read(map);
14875                         return KERN_INVALID_ADDRESS;
14876                 }
14877                 /* compute offset from this map entry's start */
14878                 offset -= map_entry->vme_start;
14879                 /* compute offset into this map entry's object (or submap) */
14880                 offset += VME_OFFSET(map_entry);
14881
14882                 if (map_entry->is_sub_map) {
14883                         vm_map_t sub_map;
14884
14885                         sub_map = VME_SUBMAP(map_entry);
14886                         vm_map_lock_read(sub_map);
14887                         vm_map_unlock_read(map);
14888
14889                         map = sub_map;
14890
14891                         ref_count = MAX(ref_count, map->ref_count);
14892                         continue;
14893                 }
14894                 break;
14895         }
14896
14897         object = VME_OBJECT(map_entry);
14898         if (object == VM_OBJECT_NULL) {
14899                 /* no object -> no page */
14900                 vm_map_unlock_read(map);
14901                 goto done;
14902         }
14903
14904         vm_object_lock(object);
14905         vm_map_unlock_read(map);
14906
14907         /*
14908          * Go down the VM object shadow chain until we find the page
14909          * we're looking for.
14910          */
14911         for (;;) {
14912                 ref_count = MAX(ref_count, object->ref_count);
14913
14914                 m = vm_page_lookup(object, offset);
14915
14916                 if (m != VM_PAGE_NULL) {
14917                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
14918                         break;
14919                 } else {
14920                         if (object->internal &&
14921                             object->alive &&
14922                             !object->terminating &&
14923                             object->pager_ready) {
14924
14925                                 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14926                                     == VM_EXTERNAL_STATE_EXISTS) {
14927                                         /* the pager has that page */
14928                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14929                                         break;
14930                                 }
14931                         }
14932
14933                         if (object->shadow != VM_OBJECT_NULL) {
14934                                 vm_object_t shadow;
14935
14936                                 offset += object->vo_shadow_offset;
14937                                 shadow = object->shadow;
14938
14939                                 vm_object_lock(shadow);
14940                                 vm_object_unlock(object);
14941
14942                                 object = shadow;
14943                                 top_object = FALSE;
14944                                 depth++;
14945                         } else {
14946 //                              if (!object->internal)
14947 //                                      break;
14948 //                              retval = KERN_FAILURE;
14949 //                              goto done_with_object;
14950                                 break;
14951                         }
14952                 }
14953         }
14954         /* The ref_count is not strictly accurate, it measures the number   */
14955         /* of entities holding a ref on the object, they may not be mapping */
14956         /* the object or may not be mapping the section holding the         */
14957         /* target page but its still a ball park number and though an over- */
14958         /* count, it picks up the copy-on-write cases                       */
14959
14960         /* We could also get a picture of page sharing from pmap_attributes */
14961         /* but this would under count as only faulted-in mappings would     */
14962         /* show up.                                                         */
14963
14964         if (top_object == TRUE && object->shadow)
14965                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14966
14967         if (! object->internal)
14968                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
14969
14970         if (m == VM_PAGE_NULL)
14971                 goto done_with_object;
14972
14973         if (m->fictitious) {
14974                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14975                 goto done_with_object;
14976         }
14977         if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
14978                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
14979
14980         if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
14981                 disposition |= VM_PAGE_QUERY_PAGE_REF;
14982
14983         if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
14984                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
14985
14986         if (m->cs_validated)
14987                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
14988         if (m->cs_tainted)
14989                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
14990         if (m->cs_nx)
14991                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
14992
14993 done_with_object:
14994         vm_object_unlock(object);
14995 done:
14996
14997         switch (flavor) {
14998         case VM_PAGE_INFO_BASIC:
14999                 basic_info = (vm_page_info_basic_t) info;
15000                 basic_info->disposition = disposition;
15001                 basic_info->ref_count = ref_count;
15002                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
15003                         VM_KERNEL_ADDRPERM(object);
15004                 basic_info->offset =
15005                         (memory_object_offset_t) offset + offset_in_page;
15006                 basic_info->depth = depth;
15007                 break;
15008         }
15009
15010         return retval;
15011 }
15012
15013 /*
15014  *      vm_map_msync
15015  *
15016  *      Synchronises the memory range specified with its backing store
15017  *      image by either flushing or cleaning the contents to the appropriate
15018  *      memory manager engaging in a memory object synchronize dialog with
15019  *      the manager.  The client doesn't return until the manager issues
15020  *      m_o_s_completed message.  MIG Magically converts user task parameter
15021  *      to the task's address map.
15022  *
15023  *      interpretation of sync_flags
15024  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
15025  *                                pages to manager.
15026  *
15027  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
15028  *                              - discard pages, write dirty or precious
15029  *                                pages back to memory manager.
15030  *
15031  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
15032  *                              - write dirty or precious pages back to
15033  *                                the memory manager.
15034  *
15035  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
15036  *                                is a hole in the region, and we would
15037  *                                have returned KERN_SUCCESS, return
15038  *                                KERN_INVALID_ADDRESS instead.
15039  *
15040  *      NOTE
15041  *      The memory object attributes have not yet been implemented, this
15042  *      function will have to deal with the invalidate attribute
15043  *
15044  *      RETURNS
15045  *      KERN_INVALID_TASK               Bad task parameter
15046  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
15047  *      KERN_SUCCESS                    The usual.
15048  *      KERN_INVALID_ADDRESS            There was a hole in the region.
15049  */
15050
15051 kern_return_t
15052 vm_map_msync(
15053         vm_map_t                map,
15054         vm_map_address_t        address,
15055         vm_map_size_t           size,
15056         vm_sync_t               sync_flags)
15057 {
15058         msync_req_t             msr;
15059         msync_req_t             new_msr;
15060         queue_chain_t           req_q;  /* queue of requests for this msync */
15061         vm_map_entry_t          entry;
15062         vm_map_size_t           amount_left;
15063         vm_object_offset_t      offset;
15064         boolean_t               do_sync_req;
15065         boolean_t               had_hole = FALSE;
15066         memory_object_t         pager;
15067         vm_map_offset_t         pmap_offset;
15068
15069         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
15070             (sync_flags & VM_SYNC_SYNCHRONOUS))
15071                 return(KERN_INVALID_ARGUMENT);
15072
15073         /*
15074          * align address and size on page boundaries
15075          */
15076         size = (vm_map_round_page(address + size,
15077                                   VM_MAP_PAGE_MASK(map)) -
15078                 vm_map_trunc_page(address,
15079                                   VM_MAP_PAGE_MASK(map)));
15080         address = vm_map_trunc_page(address,
15081                                     VM_MAP_PAGE_MASK(map));
15082
15083         if (map == VM_MAP_NULL)
15084                 return(KERN_INVALID_TASK);
15085
15086         if (size == 0)
15087                 return(KERN_SUCCESS);
15088
15089         queue_init(&req_q);
15090         amount_left = size;
15091
15092         while (amount_left > 0) {
15093                 vm_object_size_t        flush_size;
15094                 vm_object_t             object;
15095
15096                 vm_map_lock(map);
15097                 if (!vm_map_lookup_entry(map,
15098                                          address,
15099                                          &entry)) {
15100
15101                         vm_map_size_t   skip;
15102
15103                         /*
15104                          * hole in the address map.
15105                          */
15106                         had_hole = TRUE;
15107
15108                         if (sync_flags & VM_SYNC_KILLPAGES) {
15109                                 /*
15110                                  * For VM_SYNC_KILLPAGES, there should be
15111                                  * no holes in the range, since we couldn't
15112                                  * prevent someone else from allocating in
15113                                  * that hole and we wouldn't want to "kill"
15114                                  * their pages.
15115                                  */
15116                                 vm_map_unlock(map);
15117                                 break;
15118                         }
15119
15120                         /*
15121                          * Check for empty map.
15122                          */
15123                         if (entry == vm_map_to_entry(map) &&
15124                             entry->vme_next == entry) {
15125                                 vm_map_unlock(map);
15126                                 break;
15127                         }
15128                         /*
15129                          * Check that we don't wrap and that
15130                          * we have at least one real map entry.
15131                          */
15132                         if ((map->hdr.nentries == 0) ||
15133                             (entry->vme_next->vme_start < address)) {
15134                                 vm_map_unlock(map);
15135                                 break;
15136                         }
15137                         /*
15138                          * Move up to the next entry if needed
15139                          */
15140                         skip = (entry->vme_next->vme_start - address);
15141                         if (skip >= amount_left)
15142                                 amount_left = 0;
15143                         else
15144                                 amount_left -= skip;
15145                         address = entry->vme_next->vme_start;
15146                         vm_map_unlock(map);
15147                         continue;
15148                 }
15149
15150                 offset = address - entry->vme_start;
15151                 pmap_offset = address;
15152
15153                 /*
15154                  * do we have more to flush than is contained in this
15155                  * entry ?
15156                  */
15157                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
15158                         flush_size = entry->vme_end -
15159                                 (entry->vme_start + offset);
15160                 } else {
15161                         flush_size = amount_left;
15162                 }
15163                 amount_left -= flush_size;
15164                 address += flush_size;
15165
15166                 if (entry->is_sub_map == TRUE) {
15167                         vm_map_t        local_map;
15168                         vm_map_offset_t local_offset;
15169
15170                         local_map = VME_SUBMAP(entry);
15171                         local_offset = VME_OFFSET(entry);
15172                         vm_map_unlock(map);
15173                         if (vm_map_msync(
15174                                     local_map,
15175                                     local_offset,
15176                                     flush_size,
15177                                     sync_flags) == KERN_INVALID_ADDRESS) {
15178                                 had_hole = TRUE;
15179                         }
15180                         continue;
15181                 }
15182                 object = VME_OBJECT(entry);
15183
15184                 /*
15185                  * We can't sync this object if the object has not been
15186                  * created yet
15187                  */
15188                 if (object == VM_OBJECT_NULL) {
15189                         vm_map_unlock(map);
15190                         continue;
15191                 }
15192                 offset += VME_OFFSET(entry);
15193
15194                 vm_object_lock(object);
15195
15196                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
15197                         int kill_pages = 0;
15198                         boolean_t reusable_pages = FALSE;
15199
15200                         if (sync_flags & VM_SYNC_KILLPAGES) {
15201                                 if (((object->ref_count == 1) ||
15202                                      ((object->copy_strategy !=
15203                                        MEMORY_OBJECT_COPY_SYMMETRIC) &&
15204                                       (object->copy == VM_OBJECT_NULL))) &&
15205                                     (object->shadow == VM_OBJECT_NULL)) {
15206                                         if (object->ref_count != 1) {
15207                                                 vm_page_stats_reusable.free_shared++;
15208                                         }
15209                                         kill_pages = 1;
15210                                 } else {
15211                                         kill_pages = -1;
15212                                 }
15213                         }
15214                         if (kill_pages != -1)
15215                                 vm_object_deactivate_pages(
15216                                         object,
15217                                         offset,
15218                                         (vm_object_size_t) flush_size,
15219                                         kill_pages,
15220                                         reusable_pages,
15221                                         map->pmap,
15222                                         pmap_offset);
15223                         vm_object_unlock(object);
15224                         vm_map_unlock(map);
15225                         continue;
15226                 }
15227                 /*
15228                  * We can't sync this object if there isn't a pager.
15229                  * Don't bother to sync internal objects, since there can't
15230                  * be any "permanent" storage for these objects anyway.
15231                  */
15232                 if ((object->pager == MEMORY_OBJECT_NULL) ||
15233                     (object->internal) || (object->private)) {
15234                         vm_object_unlock(object);
15235                         vm_map_unlock(map);
15236                         continue;
15237                 }
15238                 /*
15239                  * keep reference on the object until syncing is done
15240                  */
15241                 vm_object_reference_locked(object);
15242                 vm_object_unlock(object);
15243
15244                 vm_map_unlock(map);
15245
15246                 do_sync_req = vm_object_sync(object,
15247                                              offset,
15248                                              flush_size,
15249                                              sync_flags & VM_SYNC_INVALIDATE,
15250                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
15251                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
15252                                              sync_flags & VM_SYNC_SYNCHRONOUS);
15253                 /*
15254                  * only send a m_o_s if we returned pages or if the entry
15255                  * is writable (ie dirty pages may have already been sent back)
15256                  */
15257                 if (!do_sync_req) {
15258                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
15259                                 /*
15260                                  * clear out the clustering and read-ahead hints
15261                                  */
15262                                 vm_object_lock(object);
15263
15264                                 object->pages_created = 0;
15265                                 object->pages_used = 0;
15266                                 object->sequential = 0;
15267                                 object->last_alloc = 0;
15268
15269                                 vm_object_unlock(object);
15270                         }
15271                         vm_object_deallocate(object);
15272                         continue;
15273                 }
15274                 msync_req_alloc(new_msr);
15275
15276                 vm_object_lock(object);
15277                 offset += object->paging_offset;
15278
15279                 new_msr->offset = offset;
15280                 new_msr->length = flush_size;
15281                 new_msr->object = object;
15282                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
15283         re_iterate:
15284
15285                 /*
15286                  * We can't sync this object if there isn't a pager.  The
15287                  * pager can disappear anytime we're not holding the object
15288                  * lock.  So this has to be checked anytime we goto re_iterate.
15289                  */
15290
15291                 pager = object->pager;
15292
15293                 if (pager == MEMORY_OBJECT_NULL) {
15294                         vm_object_unlock(object);
15295                         vm_object_deallocate(object);
15296                         msync_req_free(new_msr);
15297                         new_msr = NULL;
15298                         continue;
15299                 }
15300
15301                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
15302                         /*
15303                          * need to check for overlapping entry, if found, wait
15304                          * on overlapping msr to be done, then reiterate
15305                          */
15306                         msr_lock(msr);
15307                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
15308                             ((offset >= msr->offset &&
15309                               offset < (msr->offset + msr->length)) ||
15310                              (msr->offset >= offset &&
15311                               msr->offset < (offset + flush_size))))
15312                         {
15313                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
15314                                 msr_unlock(msr);
15315                                 vm_object_unlock(object);
15316                                 thread_block(THREAD_CONTINUE_NULL);
15317                                 vm_object_lock(object);
15318                                 goto re_iterate;
15319                         }
15320                         msr_unlock(msr);
15321                 }/* queue_iterate */
15322
15323                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
15324
15325                 vm_object_paging_begin(object);
15326                 vm_object_unlock(object);
15327
15328                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
15329
15330                 (void) memory_object_synchronize(
15331                         pager,
15332                         offset,
15333                         flush_size,
15334                         sync_flags & ~VM_SYNC_CONTIGUOUS);
15335
15336                 vm_object_lock(object);
15337                 vm_object_paging_end(object);
15338                 vm_object_unlock(object);
15339         }/* while */
15340
15341         /*
15342          * wait for memory_object_sychronize_completed messages from pager(s)
15343          */
15344
15345         while (!queue_empty(&req_q)) {
15346                 msr = (msync_req_t)queue_first(&req_q);
15347                 msr_lock(msr);
15348                 while(msr->flag != VM_MSYNC_DONE) {
15349                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
15350                         msr_unlock(msr);
15351                         thread_block(THREAD_CONTINUE_NULL);
15352                         msr_lock(msr);
15353                 }/* while */
15354                 queue_remove(&req_q, msr, msync_req_t, req_q);
15355                 msr_unlock(msr);
15356                 vm_object_deallocate(msr->object);
15357                 msync_req_free(msr);
15358         }/* queue_iterate */
15359
15360         /* for proper msync() behaviour */
15361         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
15362                 return(KERN_INVALID_ADDRESS);
15363
15364         return(KERN_SUCCESS);
15365 }/* vm_msync */
15366
15367 /*
15368  *      Routine:        convert_port_entry_to_map
15369  *      Purpose:
15370  *              Convert from a port specifying an entry or a task
15371  *              to a map. Doesn't consume the port ref; produces a map ref,
15372  *              which may be null.  Unlike convert_port_to_map, the
15373  *              port may be task or a named entry backed.
15374  *      Conditions:
15375  *              Nothing locked.
15376  */
15377
15378
15379 vm_map_t
15380 convert_port_entry_to_map(
15381         ipc_port_t      port)
15382 {
15383         vm_map_t map;
15384         vm_named_entry_t        named_entry;
15385         uint32_t        try_failed_count = 0;
15386
15387         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15388                 while(TRUE) {
15389                         ip_lock(port);
15390                         if(ip_active(port) && (ip_kotype(port)
15391                                                == IKOT_NAMED_ENTRY)) {
15392                                 named_entry =
15393                                         (vm_named_entry_t)port->ip_kobject;
15394                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15395                                         ip_unlock(port);
15396
15397                                         try_failed_count++;
15398                                         mutex_pause(try_failed_count);
15399                                         continue;
15400                                 }
15401                                 named_entry->ref_count++;
15402                                 lck_mtx_unlock(&(named_entry)->Lock);
15403                                 ip_unlock(port);
15404                                 if ((named_entry->is_sub_map) &&
15405                                     (named_entry->protection
15406                                      & VM_PROT_WRITE)) {
15407                                         map = named_entry->backing.map;
15408                                 } else {
15409                                         mach_destroy_memory_entry(port);
15410                                         return VM_MAP_NULL;
15411                                 }
15412                                 vm_map_reference_swap(map);
15413                                 mach_destroy_memory_entry(port);
15414                                 break;
15415                         }
15416                         else
15417                                 return VM_MAP_NULL;
15418                 }
15419         }
15420         else
15421                 map = convert_port_to_map(port);
15422
15423         return map;
15424 }
15425
15426 /*
15427  *      Routine:        convert_port_entry_to_object
15428  *      Purpose:
15429  *              Convert from a port specifying a named entry to an
15430  *              object. Doesn't consume the port ref; produces a map ref,
15431  *              which may be null.
15432  *      Conditions:
15433  *              Nothing locked.
15434  */
15435
15436
15437 vm_object_t
15438 convert_port_entry_to_object(
15439         ipc_port_t      port)
15440 {
15441         vm_object_t             object = VM_OBJECT_NULL;
15442         vm_named_entry_t        named_entry;
15443         uint32_t                try_failed_count = 0;
15444
15445         if (IP_VALID(port) &&
15446             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15447         try_again:
15448                 ip_lock(port);
15449                 if (ip_active(port) &&
15450                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15451                         named_entry = (vm_named_entry_t)port->ip_kobject;
15452                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15453                                 ip_unlock(port);
15454                                 try_failed_count++;
15455                                 mutex_pause(try_failed_count);
15456                                 goto try_again;
15457                         }
15458                         named_entry->ref_count++;
15459                         lck_mtx_unlock(&(named_entry)->Lock);
15460                         ip_unlock(port);
15461                         if (!(named_entry->is_sub_map) &&
15462                             !(named_entry->is_pager) &&
15463                             !(named_entry->is_copy) &&
15464                             (named_entry->protection & VM_PROT_WRITE)) {
15465                                 object = named_entry->backing.object;
15466                                 vm_object_reference(object);
15467                         }
15468                         mach_destroy_memory_entry(port);
15469                 }
15470         }
15471
15472         return object;
15473 }
15474
15475 /*
15476  * Export routines to other components for the things we access locally through
15477  * macros.
15478  */
15479 #undef current_map
15480 vm_map_t
15481 current_map(void)
15482 {
15483         return (current_map_fast());
15484 }
15485
15486 /*
15487  *      vm_map_reference:
15488  *
15489  *      Most code internal to the osfmk will go through a
15490  *      macro defining this.  This is always here for the
15491  *      use of other kernel components.
15492  */
15493 #undef vm_map_reference
15494 void
15495 vm_map_reference(
15496         vm_map_t        map)
15497 {
15498         if (map == VM_MAP_NULL)
15499                 return;
15500
15501         lck_mtx_lock(&map->s_lock);
15502 #if     TASK_SWAPPER
15503         assert(map->res_count > 0);
15504         assert(map->ref_count >= map->res_count);
15505         map->res_count++;
15506 #endif
15507         map->ref_count++;
15508         lck_mtx_unlock(&map->s_lock);
15509 }
15510
15511 /*
15512  *      vm_map_deallocate:
15513  *
15514  *      Removes a reference from the specified map,
15515  *      destroying it if no references remain.
15516  *      The map should not be locked.
15517  */
15518 void
15519 vm_map_deallocate(
15520         vm_map_t        map)
15521 {
15522         unsigned int            ref;
15523
15524         if (map == VM_MAP_NULL)
15525                 return;
15526
15527         lck_mtx_lock(&map->s_lock);
15528         ref = --map->ref_count;
15529         if (ref > 0) {
15530                 vm_map_res_deallocate(map);
15531                 lck_mtx_unlock(&map->s_lock);
15532                 return;
15533         }
15534         assert(map->ref_count == 0);
15535         lck_mtx_unlock(&map->s_lock);
15536
15537 #if     TASK_SWAPPER
15538         /*
15539          * The map residence count isn't decremented here because
15540          * the vm_map_delete below will traverse the entire map,
15541          * deleting entries, and the residence counts on objects
15542          * and sharing maps will go away then.
15543          */
15544 #endif
15545
15546         vm_map_destroy(map, VM_MAP_NO_FLAGS);
15547 }
15548
15549
15550 void
15551 vm_map_disable_NX(vm_map_t map)
15552 {
15553         if (map == NULL)
15554                 return;
15555         if (map->pmap == NULL)
15556                 return;
15557
15558         pmap_disable_NX(map->pmap);
15559 }
15560
15561 void
15562 vm_map_disallow_data_exec(vm_map_t map)
15563 {
15564     if (map == NULL)
15565         return;
15566
15567     map->map_disallow_data_exec = TRUE;
15568 }
15569
15570 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15571  * more descriptive.
15572  */
15573 void
15574 vm_map_set_32bit(vm_map_t map)
15575 {
15576         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15577 }
15578
15579
15580 void
15581 vm_map_set_64bit(vm_map_t map)
15582 {
15583         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15584 }
15585
15586 /*
15587  * Expand the maximum size of an existing map.
15588  */
15589 void
15590 vm_map_set_jumbo(vm_map_t map)
15591 {
15592         (void) map;
15593 }
15594
15595 vm_map_offset_t
15596 vm_compute_max_offset(boolean_t is64)
15597 {
15598         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15599 }
15600
15601 uint64_t
15602 vm_map_get_max_aslr_slide_pages(vm_map_t map)
15603 {
15604         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15605 }
15606
15607 boolean_t
15608 vm_map_is_64bit(
15609                 vm_map_t map)
15610 {
15611         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15612 }
15613
15614 boolean_t
15615 vm_map_has_hard_pagezero(
15616                 vm_map_t        map,
15617                 vm_map_offset_t pagezero_size)
15618 {
15619         /*
15620          * XXX FBDP
15621          * We should lock the VM map (for read) here but we can get away
15622          * with it for now because there can't really be any race condition:
15623          * the VM map's min_offset is changed only when the VM map is created
15624          * and when the zero page is established (when the binary gets loaded),
15625          * and this routine gets called only when the task terminates and the
15626          * VM map is being torn down, and when a new map is created via
15627          * load_machfile()/execve().
15628          */
15629         return (map->min_offset >= pagezero_size);
15630 }
15631
15632 /*
15633  * Raise a VM map's maximun offset.
15634  */
15635 kern_return_t
15636 vm_map_raise_max_offset(
15637         vm_map_t        map,
15638         vm_map_offset_t new_max_offset)
15639 {
15640         kern_return_t   ret;
15641
15642         vm_map_lock(map);
15643         ret = KERN_INVALID_ADDRESS;
15644
15645         if (new_max_offset >= map->max_offset) {
15646                 if (!vm_map_is_64bit(map)) {
15647                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15648                                 map->max_offset = new_max_offset;
15649                                 ret = KERN_SUCCESS;
15650                         }
15651                 } else {
15652                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15653                                 map->max_offset = new_max_offset;
15654                                 ret = KERN_SUCCESS;
15655                         }
15656                 }
15657         }
15658
15659         vm_map_unlock(map);
15660         return ret;
15661 }
15662
15663
15664 /*
15665  * Raise a VM map's minimum offset.
15666  * To strictly enforce "page zero" reservation.
15667  */
15668 kern_return_t
15669 vm_map_raise_min_offset(
15670         vm_map_t        map,
15671         vm_map_offset_t new_min_offset)
15672 {
15673         vm_map_entry_t  first_entry;
15674
15675         new_min_offset = vm_map_round_page(new_min_offset,
15676                                            VM_MAP_PAGE_MASK(map));
15677
15678         vm_map_lock(map);
15679
15680         if (new_min_offset < map->min_offset) {
15681                 /*
15682                  * Can't move min_offset backwards, as that would expose
15683                  * a part of the address space that was previously, and for
15684                  * possibly good reasons, inaccessible.
15685                  */
15686                 vm_map_unlock(map);
15687                 return KERN_INVALID_ADDRESS;
15688         }
15689         if (new_min_offset >= map->max_offset) {
15690                 /* can't go beyond the end of the address space */
15691                 vm_map_unlock(map);
15692                 return KERN_INVALID_ADDRESS;
15693         }
15694
15695         first_entry = vm_map_first_entry(map);
15696         if (first_entry != vm_map_to_entry(map) &&
15697             first_entry->vme_start < new_min_offset) {
15698                 /*
15699                  * Some memory was already allocated below the new
15700                  * minimun offset.  It's too late to change it now...
15701                  */
15702                 vm_map_unlock(map);
15703                 return KERN_NO_SPACE;
15704         }
15705
15706         map->min_offset = new_min_offset;
15707
15708         assert(map->holes_list);
15709         map->holes_list->start = new_min_offset;
15710         assert(new_min_offset < map->holes_list->end);
15711
15712         vm_map_unlock(map);
15713
15714         return KERN_SUCCESS;
15715 }
15716
15717 /*
15718  * Set the limit on the maximum amount of user wired memory allowed for this map.
15719  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15720  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
15721  * don't have to reach over to the BSD data structures.
15722  */
15723
15724 void
15725 vm_map_set_user_wire_limit(vm_map_t     map,
15726                            vm_size_t    limit)
15727 {
15728         map->user_wire_limit = limit;
15729 }
15730
15731
15732 void vm_map_switch_protect(vm_map_t     map,
15733                            boolean_t    val)
15734 {
15735         vm_map_lock(map);
15736         map->switch_protect=val;
15737         vm_map_unlock(map);
15738 }
15739
15740 /*
15741  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15742  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15743  * bump both counters.
15744  */
15745 void
15746 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15747 {
15748         pmap_t pmap = vm_map_pmap(map);
15749
15750         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15751         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15752 }
15753
15754 void
15755 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15756 {
15757         pmap_t pmap = vm_map_pmap(map);
15758
15759         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15760         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15761 }
15762
15763 /* Add (generate) code signature for memory range */
15764 #if CONFIG_DYNAMIC_CODE_SIGNING
15765 kern_return_t vm_map_sign(vm_map_t map,
15766                  vm_map_offset_t start,
15767                  vm_map_offset_t end)
15768 {
15769         vm_map_entry_t entry;
15770         vm_page_t m;
15771         vm_object_t object;
15772
15773         /*
15774          * Vet all the input parameters and current type and state of the
15775          * underlaying object.  Return with an error if anything is amiss.
15776          */
15777         if (map == VM_MAP_NULL)
15778                 return(KERN_INVALID_ARGUMENT);
15779
15780         vm_map_lock_read(map);
15781
15782         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15783                 /*
15784                  * Must pass a valid non-submap address.
15785                  */
15786                 vm_map_unlock_read(map);
15787                 return(KERN_INVALID_ADDRESS);
15788         }
15789
15790         if((entry->vme_start > start) || (entry->vme_end < end)) {
15791                 /*
15792                  * Map entry doesn't cover the requested range. Not handling
15793                  * this situation currently.
15794                  */
15795                 vm_map_unlock_read(map);
15796                 return(KERN_INVALID_ARGUMENT);
15797         }
15798
15799         object = VME_OBJECT(entry);
15800         if (object == VM_OBJECT_NULL) {
15801                 /*
15802                  * Object must already be present or we can't sign.
15803                  */
15804                 vm_map_unlock_read(map);
15805                 return KERN_INVALID_ARGUMENT;
15806         }
15807
15808         vm_object_lock(object);
15809         vm_map_unlock_read(map);
15810
15811         while(start < end) {
15812                 uint32_t refmod;
15813
15814                 m = vm_page_lookup(object,
15815                                    start - entry->vme_start + VME_OFFSET(entry));
15816                 if (m==VM_PAGE_NULL) {
15817                         /* shoud we try to fault a page here? we can probably
15818                          * demand it exists and is locked for this request */
15819                         vm_object_unlock(object);
15820                         return KERN_FAILURE;
15821                 }
15822                 /* deal with special page status */
15823                 if (m->busy ||
15824                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15825                         vm_object_unlock(object);
15826                         return KERN_FAILURE;
15827                 }
15828
15829                 /* Page is OK... now "validate" it */
15830                 /* This is the place where we'll call out to create a code
15831                  * directory, later */
15832                 m->cs_validated = TRUE;
15833
15834                 /* The page is now "clean" for codesigning purposes. That means
15835                  * we don't consider it as modified (wpmapped) anymore. But
15836                  * we'll disconnect the page so we note any future modification
15837                  * attempts. */
15838                 m->wpmapped = FALSE;
15839                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
15840
15841                 /* Pull the dirty status from the pmap, since we cleared the
15842                  * wpmapped bit */
15843                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
15844                         SET_PAGE_DIRTY(m, FALSE);
15845                 }
15846
15847                 /* On to the next page */
15848                 start += PAGE_SIZE;
15849         }
15850         vm_object_unlock(object);
15851
15852         return KERN_SUCCESS;
15853 }
15854 #endif
15855
15856 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15857 {
15858         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
15859         vm_map_entry_t next_entry;
15860         kern_return_t   kr = KERN_SUCCESS;
15861         vm_map_t        zap_map;
15862
15863         vm_map_lock(map);
15864
15865         /*
15866          * We use a "zap_map" to avoid having to unlock
15867          * the "map" in vm_map_delete().
15868          */
15869         zap_map = vm_map_create(PMAP_NULL,
15870                                 map->min_offset,
15871                                 map->max_offset,
15872                                 map->hdr.entries_pageable);
15873
15874         if (zap_map == VM_MAP_NULL) {
15875                 return KERN_RESOURCE_SHORTAGE;
15876         }
15877
15878         vm_map_set_page_shift(zap_map,
15879                               VM_MAP_PAGE_SHIFT(map));
15880         vm_map_disable_hole_optimization(zap_map);
15881
15882         for (entry = vm_map_first_entry(map);
15883              entry != vm_map_to_entry(map);
15884              entry = next_entry) {
15885                 next_entry = entry->vme_next;
15886
15887                 if (VME_OBJECT(entry) &&
15888                     !entry->is_sub_map &&
15889                     (VME_OBJECT(entry)->internal == TRUE) &&
15890                     (VME_OBJECT(entry)->ref_count == 1)) {
15891
15892                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15893                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
15894
15895                         (void)vm_map_delete(map,
15896                                             entry->vme_start,
15897                                             entry->vme_end,
15898                                             VM_MAP_REMOVE_SAVE_ENTRIES,
15899                                             zap_map);
15900                 }
15901         }
15902
15903         vm_map_unlock(map);
15904
15905         /*
15906          * Get rid of the "zap_maps" and all the map entries that
15907          * they may still contain.
15908          */
15909         if (zap_map != VM_MAP_NULL) {
15910                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15911                 zap_map = VM_MAP_NULL;
15912         }
15913
15914         return kr;
15915 }
15916
15917
15918 #if DEVELOPMENT || DEBUG
15919
15920 int
15921 vm_map_disconnect_page_mappings(
15922         vm_map_t map,
15923         boolean_t do_unnest)
15924 {
15925         vm_map_entry_t entry;
15926         int     page_count = 0;
15927
15928         if (do_unnest == TRUE) {
15929 #ifndef NO_NESTED_PMAP
15930                 vm_map_lock(map);
15931
15932                 for (entry = vm_map_first_entry(map);
15933                      entry != vm_map_to_entry(map);
15934                      entry = entry->vme_next) {
15935
15936                         if (entry->is_sub_map && entry->use_pmap) {
15937                                 /*
15938                                  * Make sure the range between the start of this entry and
15939                                  * the end of this entry is no longer nested, so that
15940                                  * we will only remove mappings from the pmap in use by this
15941                                  * this task
15942                                  */
15943                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
15944                         }
15945                 }
15946                 vm_map_unlock(map);
15947 #endif
15948         }
15949         vm_map_lock_read(map);
15950
15951         page_count = map->pmap->stats.resident_count;
15952
15953         for (entry = vm_map_first_entry(map);
15954              entry != vm_map_to_entry(map);
15955              entry = entry->vme_next) {
15956
15957                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
15958                                            (VME_OBJECT(entry)->phys_contiguous))) {
15959                         continue;
15960                 }
15961                 if (entry->is_sub_map)
15962                         assert(!entry->use_pmap);
15963
15964                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
15965         }
15966         vm_map_unlock_read(map);
15967
15968         return page_count;
15969 }
15970
15971 #endif
15972
15973
15974 #if CONFIG_FREEZE
15975
15976
15977 int c_freezer_swapout_count;
15978 int c_freezer_compression_count = 0;
15979 AbsoluteTime c_freezer_last_yield_ts = 0;
15980
15981 kern_return_t vm_map_freeze(
15982                 vm_map_t map,
15983                 unsigned int *purgeable_count,
15984                 unsigned int *wired_count,
15985                 unsigned int *clean_count,
15986                 unsigned int *dirty_count,
15987                 __unused unsigned int dirty_budget,
15988                 boolean_t *has_shared)
15989 {
15990         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
15991         kern_return_t   kr = KERN_SUCCESS;
15992
15993         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15994         *has_shared = FALSE;
15995
15996         /*
15997          * We need the exclusive lock here so that we can
15998          * block any page faults or lookups while we are
15999          * in the middle of freezing this vm map.
16000          */
16001         vm_map_lock(map);
16002
16003         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
16004
16005         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
16006                 kr = KERN_NO_SPACE;
16007                 goto done;
16008         }
16009
16010         c_freezer_compression_count = 0;
16011         clock_get_uptime(&c_freezer_last_yield_ts);
16012
16013         for (entry2 = vm_map_first_entry(map);
16014              entry2 != vm_map_to_entry(map);
16015              entry2 = entry2->vme_next) {
16016
16017                 vm_object_t     src_object = VME_OBJECT(entry2);
16018
16019                 if (src_object &&
16020                     !entry2->is_sub_map &&
16021                     !src_object->phys_contiguous) {
16022                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
16023
16024                         if (src_object->internal == TRUE) {
16025
16026                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
16027                                         /*
16028                                          * Pages belonging to this object could be swapped to disk.
16029                                          * Make sure it's not a shared object because we could end
16030                                          * up just bringing it back in again.
16031                                          */
16032                                         if (src_object->ref_count > 1) {
16033                                                 continue;
16034                                         }
16035                                 }
16036                                 vm_object_compressed_freezer_pageout(src_object);
16037
16038                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
16039                                         kr = KERN_NO_SPACE;
16040                                         break;
16041                                 }
16042                         }
16043                 }
16044         }
16045 done:
16046         vm_map_unlock(map);
16047
16048         vm_object_compressed_freezer_done();
16049
16050         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
16051                 /*
16052                  * reset the counter tracking the # of swapped c_segs
16053                  * because we are now done with this freeze session and task.
16054                  */
16055                 c_freezer_swapout_count = 0;
16056         }
16057         return kr;
16058 }
16059
16060 #endif
16061
16062 /*
16063  * vm_map_entry_should_cow_for_true_share:
16064  *
16065  * Determines if the map entry should be clipped and setup for copy-on-write
16066  * to avoid applying "true_share" to a large VM object when only a subset is
16067  * targeted.
16068  *
16069  * For now, we target only the map entries created for the Objective C
16070  * Garbage Collector, which initially have the following properties:
16071  *      - alias == VM_MEMORY_MALLOC
16072  *      - wired_count == 0
16073  *      - !needs_copy
16074  * and a VM object with:
16075  *      - internal
16076  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
16077  *      - !true_share
16078  *      - vo_size == ANON_CHUNK_SIZE
16079  *
16080  * Only non-kernel map entries.
16081  */
16082 boolean_t
16083 vm_map_entry_should_cow_for_true_share(
16084         vm_map_entry_t  entry)
16085 {
16086         vm_object_t     object;
16087
16088         if (entry->is_sub_map) {
16089                 /* entry does not point at a VM object */
16090                 return FALSE;
16091         }
16092
16093         if (entry->needs_copy) {
16094                 /* already set for copy_on_write: done! */
16095                 return FALSE;
16096         }
16097
16098         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
16099             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
16100                 /* not a malloc heap or Obj-C Garbage Collector heap */
16101                 return FALSE;
16102         }
16103
16104         if (entry->wired_count) {
16105                 /* wired: can't change the map entry... */
16106                 vm_counters.should_cow_but_wired++;
16107                 return FALSE;
16108         }
16109
16110         object = VME_OBJECT(entry);
16111
16112         if (object == VM_OBJECT_NULL) {
16113                 /* no object yet... */
16114                 return FALSE;
16115         }
16116
16117         if (!object->internal) {
16118                 /* not an internal object */
16119                 return FALSE;
16120         }
16121
16122         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16123                 /* not the default copy strategy */
16124                 return FALSE;
16125         }
16126
16127         if (object->true_share) {
16128                 /* already true_share: too late to avoid it */
16129                 return FALSE;
16130         }
16131
16132         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
16133             object->vo_size != ANON_CHUNK_SIZE) {
16134                 /* ... not an object created for the ObjC Garbage Collector */
16135                 return FALSE;
16136         }
16137
16138         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
16139             object->vo_size != 2048 * 4096) {
16140                 /* ... not a "MALLOC_SMALL" heap */
16141                 return FALSE;
16142         }
16143
16144         /*
16145          * All the criteria match: we have a large object being targeted for "true_share".
16146          * To limit the adverse side-effects linked with "true_share", tell the caller to
16147          * try and avoid setting up the entire object for "true_share" by clipping the
16148          * targeted range and setting it up for copy-on-write.
16149          */
16150         return TRUE;
16151 }
16152
16153 vm_map_offset_t
16154 vm_map_round_page_mask(
16155         vm_map_offset_t offset,
16156         vm_map_offset_t mask)
16157 {
16158         return VM_MAP_ROUND_PAGE(offset, mask);
16159 }
16160
16161 vm_map_offset_t
16162 vm_map_trunc_page_mask(
16163         vm_map_offset_t offset,
16164         vm_map_offset_t mask)
16165 {
16166         return VM_MAP_TRUNC_PAGE(offset, mask);
16167 }
16168
16169 boolean_t
16170 vm_map_page_aligned(
16171         vm_map_offset_t offset,
16172         vm_map_offset_t mask)
16173 {
16174         return ((offset) & mask) == 0;
16175 }
16176
16177 int
16178 vm_map_page_shift(
16179         vm_map_t map)
16180 {
16181         return VM_MAP_PAGE_SHIFT(map);
16182 }
16183
16184 int
16185 vm_map_page_size(
16186         vm_map_t map)
16187 {
16188         return VM_MAP_PAGE_SIZE(map);
16189 }
16190
16191 vm_map_offset_t
16192 vm_map_page_mask(
16193         vm_map_t map)
16194 {
16195         return VM_MAP_PAGE_MASK(map);
16196 }
16197
16198 kern_return_t
16199 vm_map_set_page_shift(
16200         vm_map_t        map,
16201         int             pageshift)
16202 {
16203         if (map->hdr.nentries != 0) {
16204                 /* too late to change page size */
16205                 return KERN_FAILURE;
16206         }
16207
16208         map->hdr.page_shift = pageshift;
16209
16210         return KERN_SUCCESS;
16211 }
16212
16213 kern_return_t
16214 vm_map_query_volatile(
16215         vm_map_t        map,
16216         mach_vm_size_t  *volatile_virtual_size_p,
16217         mach_vm_size_t  *volatile_resident_size_p,
16218         mach_vm_size_t  *volatile_compressed_size_p,
16219         mach_vm_size_t  *volatile_pmap_size_p,
16220         mach_vm_size_t  *volatile_compressed_pmap_size_p)
16221 {
16222         mach_vm_size_t  volatile_virtual_size;
16223         mach_vm_size_t  volatile_resident_count;
16224         mach_vm_size_t  volatile_compressed_count;
16225         mach_vm_size_t  volatile_pmap_count;
16226         mach_vm_size_t  volatile_compressed_pmap_count;
16227         mach_vm_size_t  resident_count;
16228         vm_map_entry_t  entry;
16229         vm_object_t     object;
16230
16231         /* map should be locked by caller */
16232
16233         volatile_virtual_size = 0;
16234         volatile_resident_count = 0;
16235         volatile_compressed_count = 0;
16236         volatile_pmap_count = 0;
16237         volatile_compressed_pmap_count = 0;
16238
16239         for (entry = vm_map_first_entry(map);
16240              entry != vm_map_to_entry(map);
16241              entry = entry->vme_next) {
16242                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
16243
16244                 if (entry->is_sub_map) {
16245                         continue;
16246                 }
16247                 if (! (entry->protection & VM_PROT_WRITE)) {
16248                         continue;
16249                 }
16250                 object = VME_OBJECT(entry);
16251                 if (object == VM_OBJECT_NULL) {
16252                         continue;
16253                 }
16254                 if (object->purgable != VM_PURGABLE_VOLATILE &&
16255                     object->purgable != VM_PURGABLE_EMPTY) {
16256                         continue;
16257                 }
16258                 if (VME_OFFSET(entry)) {
16259                         /*
16260                          * If the map entry has been split and the object now
16261                          * appears several times in the VM map, we don't want
16262                          * to count the object's resident_page_count more than
16263                          * once.  We count it only for the first one, starting
16264                          * at offset 0 and ignore the other VM map entries.
16265                          */
16266                         continue;
16267                 }
16268                 resident_count = object->resident_page_count;
16269                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
16270                         resident_count = 0;
16271                 } else {
16272                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
16273                 }
16274
16275                 volatile_virtual_size += entry->vme_end - entry->vme_start;
16276                 volatile_resident_count += resident_count;
16277                 if (object->pager) {
16278                         volatile_compressed_count +=
16279                                 vm_compressor_pager_get_count(object->pager);
16280                 }
16281                 pmap_compressed_bytes = 0;
16282                 pmap_resident_bytes =
16283                         pmap_query_resident(map->pmap,
16284                                             entry->vme_start,
16285                                             entry->vme_end,
16286                                             &pmap_compressed_bytes);
16287                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
16288                 volatile_compressed_pmap_count += (pmap_compressed_bytes
16289                                                    / PAGE_SIZE);
16290         }
16291
16292         /* map is still locked on return */
16293
16294         *volatile_virtual_size_p = volatile_virtual_size;
16295         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
16296         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
16297         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
16298         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
16299
16300         return KERN_SUCCESS;
16301 }
16302
16303 void
16304 vm_map_sizes(vm_map_t map,
16305                 vm_map_size_t * psize,
16306                 vm_map_size_t * pfree,
16307                 vm_map_size_t * plargest_free)
16308 {
16309     vm_map_entry_t  entry;
16310     vm_map_offset_t prev;
16311     vm_map_size_t   free, total_free, largest_free;
16312     boolean_t       end;
16313
16314     if (!map)
16315     {
16316         *psize = *pfree = *plargest_free = 0;
16317         return;
16318     }
16319     total_free = largest_free = 0;
16320
16321     vm_map_lock_read(map);
16322     if (psize) *psize = map->max_offset - map->min_offset;
16323
16324     prev = map->min_offset;
16325     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16326     {
16327         end = (entry == vm_map_to_entry(map));
16328
16329         if (end) free = entry->vme_end   - prev;
16330         else     free = entry->vme_start - prev;
16331
16332         total_free += free;
16333         if (free > largest_free) largest_free = free;
16334
16335         if (end) break;
16336         prev = entry->vme_end;
16337     }
16338     vm_map_unlock_read(map);
16339     if (pfree)         *pfree = total_free;
16340     if (plargest_free) *plargest_free = largest_free;
16341 }
16342
16343 #if VM_SCAN_FOR_SHADOW_CHAIN
16344 int vm_map_shadow_max(vm_map_t map);
16345 int vm_map_shadow_max(
16346         vm_map_t map)
16347 {
16348         int             shadows, shadows_max;
16349         vm_map_entry_t  entry;
16350         vm_object_t     object, next_object;
16351
16352         if (map == NULL)
16353                 return 0;
16354
16355         shadows_max = 0;
16356
16357         vm_map_lock_read(map);
16358
16359         for (entry = vm_map_first_entry(map);
16360              entry != vm_map_to_entry(map);
16361              entry = entry->vme_next) {
16362                 if (entry->is_sub_map) {
16363                         continue;
16364                 }
16365                 object = VME_OBJECT(entry);
16366                 if (object == NULL) {
16367                         continue;
16368                 }
16369                 vm_object_lock_shared(object);
16370                 for (shadows = 0;
16371                      object->shadow != NULL;
16372                      shadows++, object = next_object) {
16373                         next_object = object->shadow;
16374                         vm_object_lock_shared(next_object);
16375                         vm_object_unlock(object);
16376                 }
16377                 vm_object_unlock(object);
16378                 if (shadows > shadows_max) {
16379                         shadows_max = shadows;
16380                 }
16381         }
16382
16383         vm_map_unlock_read(map);
16384
16385         return shadows_max;
16386 }
16387 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
16388
16389 void vm_commit_pagezero_status(vm_map_t lmap) {
16390         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
16391 }