osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/counters.h>
  86 #include <kern/kalloc.h>
  87 #include <kern/zalloc.h>
  88
  89 #include <vm/cpm.h>
  90 #include <vm/vm_compressor_pager.h>
  91 #include <vm/vm_init.h>
  92 #include <vm/vm_fault.h>
  93 #include <vm/vm_map.h>
  94 #include <vm/vm_object.h>
  95 #include <vm/vm_page.h>
  96 #include <vm/vm_pageout.h>
  97 #include <vm/vm_kern.h>
  98 #include <ipc/ipc_port.h>
  99 #include <kern/sched_prim.h>
 100 #include <kern/misc_protos.h>
 101 #include <kern/xpr.h>
 102
 103 #include <mach/vm_map_server.h>
 104 #include <mach/mach_host_server.h>
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_purgeable_internal.h>
 107
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_shared_region.h>
 110 #include <vm/vm_map_store.h>
 111
 112
 113 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 114 /* Internal prototypes
 115  */
 116
 117 static void vm_map_simplify_range(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end);   /* forward */
 121
 122 static boolean_t        vm_map_range_check(
 123         vm_map_t        map,
 124         vm_map_offset_t start,
 125         vm_map_offset_t end,
 126         vm_map_entry_t  *entry);
 127
 128 static vm_map_entry_t   _vm_map_entry_create(
 129         struct vm_map_header    *map_header, boolean_t map_locked);
 130
 131 static void             _vm_map_entry_dispose(
 132         struct vm_map_header    *map_header,
 133         vm_map_entry_t          entry);
 134
 135 static void             vm_map_pmap_enter(
 136         vm_map_t                map,
 137         vm_map_offset_t         addr,
 138         vm_map_offset_t         end_addr,
 139         vm_object_t             object,
 140         vm_object_offset_t      offset,
 141         vm_prot_t               protection);
 142
 143 static void             _vm_map_clip_end(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         end);
 147
 148 static void             _vm_map_clip_start(
 149         struct vm_map_header    *map_header,
 150         vm_map_entry_t          entry,
 151         vm_map_offset_t         start);
 152
 153 static void             vm_map_entry_delete(
 154         vm_map_t        map,
 155         vm_map_entry_t  entry);
 156
 157 static kern_return_t    vm_map_delete(
 158         vm_map_t        map,
 159         vm_map_offset_t start,
 160         vm_map_offset_t end,
 161         int             flags,
 162         vm_map_t        zap_map);
 163
 164 static kern_return_t    vm_map_copy_overwrite_unaligned(
 165         vm_map_t        dst_map,
 166         vm_map_entry_t  entry,
 167         vm_map_copy_t   copy,
 168         vm_map_address_t start,
 169         boolean_t       discard_on_success);
 170
 171 static kern_return_t    vm_map_copy_overwrite_aligned(
 172         vm_map_t        dst_map,
 173         vm_map_entry_t  tmp_entry,
 174         vm_map_copy_t   copy,
 175         vm_map_offset_t start,
 176         pmap_t          pmap);
 177
 178 static kern_return_t    vm_map_copyin_kernel_buffer(
 179         vm_map_t        src_map,
 180         vm_map_address_t src_addr,
 181         vm_map_size_t   len,
 182         boolean_t       src_destroy,
 183         vm_map_copy_t   *copy_result);  /* OUT */
 184
 185 static kern_return_t    vm_map_copyout_kernel_buffer(
 186         vm_map_t        map,
 187         vm_map_address_t *addr, /* IN/OUT */
 188         vm_map_copy_t   copy,
 189         boolean_t       overwrite,
 190         boolean_t       consume_on_success);
 191
 192 static void             vm_map_fork_share(
 193         vm_map_t        old_map,
 194         vm_map_entry_t  old_entry,
 195         vm_map_t        new_map);
 196
 197 static boolean_t        vm_map_fork_copy(
 198         vm_map_t        old_map,
 199         vm_map_entry_t  *old_entry_p,
 200         vm_map_t        new_map);
 201
 202 void            vm_map_region_top_walk(
 203         vm_map_entry_t             entry,
 204         vm_region_top_info_t       top);
 205
 206 void            vm_map_region_walk(
 207         vm_map_t                   map,
 208         vm_map_offset_t            va,
 209         vm_map_entry_t             entry,
 210         vm_object_offset_t         offset,
 211         vm_object_size_t           range,
 212         vm_region_extended_info_t  extended,
 213         boolean_t                  look_for_pages,
 214         mach_msg_type_number_t count);
 215
 216 static kern_return_t    vm_map_wire_nested(
 217         vm_map_t                   map,
 218         vm_map_offset_t            start,
 219         vm_map_offset_t            end,
 220         vm_prot_t                  caller_prot,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr,
 224         ppnum_t                    *physpage_p);
 225
 226 static kern_return_t    vm_map_unwire_nested(
 227         vm_map_t                   map,
 228         vm_map_offset_t            start,
 229         vm_map_offset_t            end,
 230         boolean_t                  user_wire,
 231         pmap_t                     map_pmap,
 232         vm_map_offset_t            pmap_addr);
 233
 234 static kern_return_t    vm_map_overwrite_submap_recurse(
 235         vm_map_t                   dst_map,
 236         vm_map_offset_t            dst_addr,
 237         vm_map_size_t              dst_size);
 238
 239 static kern_return_t    vm_map_copy_overwrite_nested(
 240         vm_map_t                   dst_map,
 241         vm_map_offset_t            dst_addr,
 242         vm_map_copy_t              copy,
 243         boolean_t                  interruptible,
 244         pmap_t                     pmap,
 245         boolean_t                  discard_on_success);
 246
 247 static kern_return_t    vm_map_remap_extract(
 248         vm_map_t                map,
 249         vm_map_offset_t         addr,
 250         vm_map_size_t           size,
 251         boolean_t               copy,
 252         struct vm_map_header    *map_header,
 253         vm_prot_t               *cur_protection,
 254         vm_prot_t               *max_protection,
 255         vm_inherit_t            inheritance,
 256         boolean_t               pageable);
 257
 258 static kern_return_t    vm_map_remap_range_allocate(
 259         vm_map_t                map,
 260         vm_map_address_t        *address,
 261         vm_map_size_t           size,
 262         vm_map_offset_t         mask,
 263         int                     flags,
 264         vm_map_entry_t          *map_entry);
 265
 266 static void             vm_map_region_look_for_page(
 267         vm_map_t                   map,
 268         vm_map_offset_t            va,
 269         vm_object_t                object,
 270         vm_object_offset_t         offset,
 271         int                        max_refcnt,
 272         int                        depth,
 273         vm_region_extended_info_t  extended,
 274         mach_msg_type_number_t count);
 275
 276 static int              vm_map_region_count_obj_refs(
 277         vm_map_entry_t             entry,
 278         vm_object_t                object);
 279
 280
 281 static kern_return_t    vm_map_willneed(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_reuse_pages(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 static kern_return_t    vm_map_reusable_pages(
 292         vm_map_t        map,
 293         vm_map_offset_t start,
 294         vm_map_offset_t end);
 295
 296 static kern_return_t    vm_map_can_reuse(
 297         vm_map_t        map,
 298         vm_map_offset_t start,
 299         vm_map_offset_t end);
 300
 301 #if MACH_ASSERT
 302 static kern_return_t    vm_map_pageout(
 303         vm_map_t        map,
 304         vm_map_offset_t start,
 305         vm_map_offset_t end);
 306 #endif /* MACH_ASSERT */
 307
 308 /*
 309  * Macros to copy a vm_map_entry. We must be careful to correctly
 310  * manage the wired page count. vm_map_entry_copy() creates a new
 311  * map entry to the same memory - the wired count in the new entry
 312  * must be set to zero. vm_map_entry_copy_full() creates a new
 313  * entry that is identical to the old entry.  This preserves the
 314  * wire count; it's used for map splitting and zone changing in
 315  * vm_map_copyout.
 316  */
 317
 318 #define vm_map_entry_copy(NEW,OLD)      \
 319 MACRO_BEGIN                             \
 320 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 321         *(NEW) = *(OLD);                \
 322         (NEW)->is_shared = FALSE;       \
 323         (NEW)->needs_wakeup = FALSE;    \
 324         (NEW)->in_transition = FALSE;   \
 325         (NEW)->wired_count = 0;         \
 326         (NEW)->user_wired_count = 0;    \
 327         (NEW)->permanent = FALSE;       \
 328         (NEW)->used_for_jit = FALSE;    \
 329         (NEW)->from_reserved_zone = _vmec_reserved;     \
 330         (NEW)->iokit_acct = FALSE;      \
 331         (NEW)->vme_resilient_codesign = FALSE; \
 332         (NEW)->vme_resilient_media = FALSE;     \
 333 MACRO_END
 334
 335 #define vm_map_entry_copy_full(NEW,OLD)                 \
 336 MACRO_BEGIN                                             \
 337 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 338 (*(NEW) = *(OLD));                                      \
 339 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 340 MACRO_END
 341
 342 /*
 343  *      Decide if we want to allow processes to execute from their data or stack areas.
 344  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 345  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 346  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 347  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 348  *      specific pmap files since the default behavior varies according to architecture.  The
 349  *      main reason it varies is because of the need to provide binary compatibility with old
 350  *      applications that were written before these restrictions came into being.  In the old
 351  *      days, an app could execute anything it could read, but this has slowly been tightened
 352  *      up over time.  The default behavior is:
 353  *
 354  *      32-bit PPC apps         may execute from both stack and data areas
 355  *      32-bit Intel apps       may exeucte from data areas but not stack
 356  *      64-bit PPC/Intel apps   may not execute from either data or stack
 357  *
 358  *      An application on any architecture may override these defaults by explicitly
 359  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 360  *      system call.  This code here just determines what happens when an app tries to
 361  *      execute from a page that lacks execute permission.
 362  *
 363  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 364  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 365  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 366  *      execution from data areas for a particular binary even if the arch normally permits it. As
 367  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 368  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 369  *      are not all NX-safe.
 370  */
 371
 372 extern int allow_data_exec, allow_stack_exec;
 373
 374 int
 375 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 376 {
 377         int current_abi;
 378
 379         if (map->pmap == kernel_pmap) return FALSE;
 380
 381         /*
 382          * Determine if the app is running in 32 or 64 bit mode.
 383          */
 384
 385         if (vm_map_is_64bit(map))
 386                 current_abi = VM_ABI_64;
 387         else
 388                 current_abi = VM_ABI_32;
 389
 390         /*
 391          * Determine if we should allow the execution based on whether it's a
 392          * stack or data area and the current architecture.
 393          */
 394
 395         if (user_tag == VM_MEMORY_STACK)
 396                 return allow_stack_exec & current_abi;
 397
 398         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 399 }
 400
 401
 402 /*
 403  *      Virtual memory maps provide for the mapping, protection,
 404  *      and sharing of virtual memory objects.  In addition,
 405  *      this module provides for an efficient virtual copy of
 406  *      memory from one map to another.
 407  *
 408  *      Synchronization is required prior to most operations.
 409  *
 410  *      Maps consist of an ordered doubly-linked list of simple
 411  *      entries; a single hint is used to speed up lookups.
 412  *
 413  *      Sharing maps have been deleted from this version of Mach.
 414  *      All shared objects are now mapped directly into the respective
 415  *      maps.  This requires a change in the copy on write strategy;
 416  *      the asymmetric (delayed) strategy is used for shared temporary
 417  *      objects instead of the symmetric (shadow) strategy.  All maps
 418  *      are now "top level" maps (either task map, kernel map or submap
 419  *      of the kernel map).
 420  *
 421  *      Since portions of maps are specified by start/end addreses,
 422  *      which may not align with existing map entries, all
 423  *      routines merely "clip" entries to these start/end values.
 424  *      [That is, an entry is split into two, bordering at a
 425  *      start or end value.]  Note that these clippings may not
 426  *      always be necessary (as the two resulting entries are then
 427  *      not changed); however, the clipping is done for convenience.
 428  *      No attempt is currently made to "glue back together" two
 429  *      abutting entries.
 430  *
 431  *      The symmetric (shadow) copy strategy implements virtual copy
 432  *      by copying VM object references from one map to
 433  *      another, and then marking both regions as copy-on-write.
 434  *      It is important to note that only one writeable reference
 435  *      to a VM object region exists in any map when this strategy
 436  *      is used -- this means that shadow object creation can be
 437  *      delayed until a write operation occurs.  The symmetric (delayed)
 438  *      strategy allows multiple maps to have writeable references to
 439  *      the same region of a vm object, and hence cannot delay creating
 440  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 441  *      Copying of permanent objects is completely different; see
 442  *      vm_object_copy_strategically() in vm_object.c.
 443  */
 444
 445 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 446 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 447 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 448                                          * allocations */
 449 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 450 zone_t          vm_map_holes_zone;      /* zone for vm map holes (vm_map_links) structures */
 451
 452
 453 /*
 454  *      Placeholder object for submap operations.  This object is dropped
 455  *      into the range by a call to vm_map_find, and removed when
 456  *      vm_map_submap creates the submap.
 457  */
 458
 459 vm_object_t     vm_submap_object;
 460
 461 static void             *map_data;
 462 static vm_size_t        map_data_size;
 463 static void             *kentry_data;
 464 static vm_size_t        kentry_data_size;
 465 static void             *map_holes_data;
 466 static vm_size_t        map_holes_data_size;
 467
 468 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 469
 470 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 471 unsigned int not_in_kdp = 1;
 472
 473 unsigned int vm_map_set_cache_attr_count = 0;
 474
 475 kern_return_t
 476 vm_map_set_cache_attr(
 477         vm_map_t        map,
 478         vm_map_offset_t va)
 479 {
 480         vm_map_entry_t  map_entry;
 481         vm_object_t     object;
 482         kern_return_t   kr = KERN_SUCCESS;
 483
 484         vm_map_lock_read(map);
 485
 486         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 487             map_entry->is_sub_map) {
 488                 /*
 489                  * that memory is not properly mapped
 490                  */
 491                 kr = KERN_INVALID_ARGUMENT;
 492                 goto done;
 493         }
 494         object = VME_OBJECT(map_entry);
 495
 496         if (object == VM_OBJECT_NULL) {
 497                 /*
 498                  * there should be a VM object here at this point
 499                  */
 500                 kr = KERN_INVALID_ARGUMENT;
 501                 goto done;
 502         }
 503         vm_object_lock(object);
 504         object->set_cache_attr = TRUE;
 505         vm_object_unlock(object);
 506
 507         vm_map_set_cache_attr_count++;
 508 done:
 509         vm_map_unlock_read(map);
 510
 511         return kr;
 512 }
 513
 514
 515 #if CONFIG_CODE_DECRYPTION
 516 /*
 517  * vm_map_apple_protected:
 518  * This remaps the requested part of the object with an object backed by
 519  * the decrypting pager.
 520  * crypt_info contains entry points and session data for the crypt module.
 521  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 522  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 523  */
 524 kern_return_t
 525 vm_map_apple_protected(
 526         vm_map_t                map,
 527         vm_map_offset_t         start,
 528         vm_map_offset_t         end,
 529         vm_object_offset_t      crypto_backing_offset,
 530         struct pager_crypt_info *crypt_info)
 531 {
 532         boolean_t       map_locked;
 533         kern_return_t   kr;
 534         vm_map_entry_t  map_entry;
 535         struct vm_map_entry tmp_entry;
 536         memory_object_t unprotected_mem_obj;
 537         vm_object_t     protected_object;
 538         vm_map_offset_t map_addr;
 539         vm_map_offset_t start_aligned, end_aligned;
 540         vm_object_offset_t      crypto_start, crypto_end;
 541         int             vm_flags;
 542
 543         map_locked = FALSE;
 544         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 545
 546         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 547         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 548         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 549         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 550
 551         assert(start_aligned == start);
 552         assert(end_aligned == end);
 553
 554         map_addr = start_aligned;
 555         for (map_addr = start_aligned;
 556              map_addr < end;
 557              map_addr = tmp_entry.vme_end) {
 558                 vm_map_lock(map);
 559                 map_locked = TRUE;
 560
 561                 /* lookup the protected VM object */
 562                 if (!vm_map_lookup_entry(map,
 563                                          map_addr,
 564                                          &map_entry) ||
 565                     map_entry->is_sub_map ||
 566                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 567                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 568                         /* that memory is not properly mapped */
 569                         kr = KERN_INVALID_ARGUMENT;
 570                         goto done;
 571                 }
 572
 573                 /* get the protected object to be decrypted */
 574                 protected_object = VME_OBJECT(map_entry);
 575                 if (protected_object == VM_OBJECT_NULL) {
 576                         /* there should be a VM object here at this point */
 577                         kr = KERN_INVALID_ARGUMENT;
 578                         goto done;
 579                 }
 580                 /* ensure protected object stays alive while map is unlocked */
 581                 vm_object_reference(protected_object);
 582
 583                 /* limit the map entry to the area we want to cover */
 584                 vm_map_clip_start(map, map_entry, start_aligned);
 585                 vm_map_clip_end(map, map_entry, end_aligned);
 586
 587                 tmp_entry = *map_entry;
 588                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 589                 vm_map_unlock(map);
 590                 map_locked = FALSE;
 591
 592                 /*
 593                  * This map entry might be only partially encrypted
 594                  * (if not fully "page-aligned").
 595                  */
 596                 crypto_start = 0;
 597                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 598                 if (tmp_entry.vme_start < start) {
 599                         if (tmp_entry.vme_start != start_aligned) {
 600                                 kr = KERN_INVALID_ADDRESS;
 601                         }
 602                         crypto_start += (start - tmp_entry.vme_start);
 603                 }
 604                 if (tmp_entry.vme_end > end) {
 605                         if (tmp_entry.vme_end != end_aligned) {
 606                                 kr = KERN_INVALID_ADDRESS;
 607                         }
 608                         crypto_end -= (tmp_entry.vme_end - end);
 609                 }
 610
 611                 /*
 612                  * This "extra backing offset" is needed to get the decryption
 613                  * routine to use the right key.  It adjusts for the possibly
 614                  * relative offset of an interposed "4K" pager...
 615                  */
 616                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 617                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 618                 }
 619
 620                 /*
 621                  * Lookup (and create if necessary) the protected memory object
 622                  * matching that VM object.
 623                  * If successful, this also grabs a reference on the memory object,
 624                  * to guarantee that it doesn't go away before we get a chance to map
 625                  * it.
 626                  */
 627                 unprotected_mem_obj = apple_protect_pager_setup(
 628                         protected_object,
 629                         VME_OFFSET(&tmp_entry),
 630                         crypto_backing_offset,
 631                         crypt_info,
 632                         crypto_start,
 633                         crypto_end);
 634
 635                 /* release extra ref on protected object */
 636                 vm_object_deallocate(protected_object);
 637
 638                 if (unprotected_mem_obj == NULL) {
 639                         kr = KERN_FAILURE;
 640                         goto done;
 641                 }
 642
 643                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 644
 645                 /* map this memory object in place of the current one */
 646                 map_addr = tmp_entry.vme_start;
 647                 kr = vm_map_enter_mem_object(map,
 648                                              &map_addr,
 649                                              (tmp_entry.vme_end -
 650                                               tmp_entry.vme_start),
 651                                              (mach_vm_offset_t) 0,
 652                                              vm_flags,
 653                                              (ipc_port_t) unprotected_mem_obj,
 654                                              0,
 655                                              TRUE,
 656                                              tmp_entry.protection,
 657                                              tmp_entry.max_protection,
 658                                              tmp_entry.inheritance);
 659                 assert(kr == KERN_SUCCESS);
 660                 assert(map_addr == tmp_entry.vme_start);
 661
 662 #if VM_MAP_DEBUG_APPLE_PROTECT
 663                 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: "
 664                        "backing:[object:%p,offset:0x%llx,"
 665                        "crypto_backing_offset:0x%llx,"
 666                        "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 667                        map,
 668                        (uint64_t) map_addr,
 669                        (uint64_t) (map_addr + (tmp_entry.vme_end -
 670                                                tmp_entry.vme_start)),
 671                        unprotected_mem_obj,
 672                        protected_object,
 673                        VME_OFFSET(&tmp_entry),
 674                        crypto_backing_offset,
 675                        crypto_start,
 676                        crypto_end);
 677 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 678
 679                 /*
 680                  * Release the reference obtained by
 681                  * apple_protect_pager_setup().
 682                  * The mapping (if it succeeded) is now holding a reference on
 683                  * the memory object.
 684                  */
 685                 memory_object_deallocate(unprotected_mem_obj);
 686                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 687
 688                 /* continue with next map entry */
 689                 crypto_backing_offset += (tmp_entry.vme_end -
 690                                           tmp_entry.vme_start);
 691                 crypto_backing_offset -= crypto_start;
 692         }
 693         kr = KERN_SUCCESS;
 694
 695 done:
 696         if (map_locked) {
 697                 vm_map_unlock(map);
 698         }
 699         return kr;
 700 }
 701 #endif  /* CONFIG_CODE_DECRYPTION */
 702
 703
 704 lck_grp_t               vm_map_lck_grp;
 705 lck_grp_attr_t  vm_map_lck_grp_attr;
 706 lck_attr_t              vm_map_lck_attr;
 707 lck_attr_t              vm_map_lck_rw_attr;
 708
 709
 710 /*
 711  *      vm_map_init:
 712  *
 713  *      Initialize the vm_map module.  Must be called before
 714  *      any other vm_map routines.
 715  *
 716  *      Map and entry structures are allocated from zones -- we must
 717  *      initialize those zones.
 718  *
 719  *      There are three zones of interest:
 720  *
 721  *      vm_map_zone:            used to allocate maps.
 722  *      vm_map_entry_zone:      used to allocate map entries.
 723  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 724  *
 725  *      The kernel allocates map entries from a special zone that is initially
 726  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 727  *      the kernel to allocate more memory to a entry zone when it became
 728  *      empty since the very act of allocating memory implies the creation
 729  *      of a new entry.
 730  */
 731 void
 732 vm_map_init(
 733         void)
 734 {
 735         vm_size_t entry_zone_alloc_size;
 736         const char *mez_name = "VM map entries";
 737
 738         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 739                             PAGE_SIZE, "maps");
 740         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 741 #if     defined(__LP64__)
 742         entry_zone_alloc_size = PAGE_SIZE * 5;
 743 #else
 744         entry_zone_alloc_size = PAGE_SIZE * 6;
 745 #endif
 746         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 747                                   1024*1024, entry_zone_alloc_size,
 748                                   mez_name);
 749         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 750         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 751         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 752
 753         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 754                                    kentry_data_size * 64, kentry_data_size,
 755                                    "Reserved VM map entries");
 756         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 757
 758         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 759                                  16*1024, PAGE_SIZE, "VM map copies");
 760         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 761
 762         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 763                                  16*1024, PAGE_SIZE, "VM map holes");
 764         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 765
 766         /*
 767          *      Cram the map and kentry zones with initial data.
 768          *      Set reserved_zone non-collectible to aid zone_gc().
 769          */
 770         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 771
 772         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 773         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 774         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 775         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 776         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 777         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 778         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 779
 780         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 781         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 782         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 783         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 784         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 785         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 786
 787         /*
 788          * Add the stolen memory to zones, adjust zone size and stolen counts.
 789          */
 790         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 791         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 792         zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
 793         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 794
 795         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 796         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 797         lck_attr_setdefault(&vm_map_lck_attr);
 798
 799         lck_attr_setdefault(&vm_map_lck_rw_attr);
 800         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 801
 802 #if CONFIG_FREEZE
 803         default_freezer_init();
 804 #endif /* CONFIG_FREEZE */
 805 }
 806
 807 void
 808 vm_map_steal_memory(
 809         void)
 810 {
 811         uint32_t kentry_initial_pages;
 812
 813         map_data_size = round_page(10 * sizeof(struct _vm_map));
 814         map_data = pmap_steal_memory(map_data_size);
 815
 816         /*
 817          * kentry_initial_pages corresponds to the number of kernel map entries
 818          * required during bootstrap until the asynchronous replenishment
 819          * scheme is activated and/or entries are available from the general
 820          * map entry pool.
 821          */
 822 #if     defined(__LP64__)
 823         kentry_initial_pages = 10;
 824 #else
 825         kentry_initial_pages = 6;
 826 #endif
 827
 828 #if CONFIG_GZALLOC
 829         /* If using the guard allocator, reserve more memory for the kernel
 830          * reserved map entry pool.
 831         */
 832         if (gzalloc_enabled())
 833                 kentry_initial_pages *= 1024;
 834 #endif
 835
 836         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 837         kentry_data = pmap_steal_memory(kentry_data_size);
 838
 839         map_holes_data_size = kentry_data_size;
 840         map_holes_data = pmap_steal_memory(map_holes_data_size);
 841 }
 842
 843 void
 844 vm_kernel_reserved_entry_init(void) {
 845         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 846         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
 847 }
 848
 849 void
 850 vm_map_disable_hole_optimization(vm_map_t map)
 851 {
 852         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
 853
 854         if (map->holelistenabled) {
 855
 856                 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
 857
 858                 while (hole_entry != NULL) {
 859
 860                         next_hole_entry = hole_entry->vme_next;
 861
 862                         hole_entry->vme_next = NULL;
 863                         hole_entry->vme_prev = NULL;
 864                         zfree(vm_map_holes_zone, hole_entry);
 865
 866                         if (next_hole_entry == head_entry) {
 867                                 hole_entry = NULL;
 868                         } else {
 869                                 hole_entry = next_hole_entry;
 870                         }
 871                 }
 872
 873                 map->holes_list = NULL;
 874                 map->holelistenabled = FALSE;
 875
 876                 map->first_free = vm_map_first_entry(map);
 877                 SAVE_HINT_HOLE_WRITE(map, NULL);
 878         }
 879 }
 880
 881 boolean_t
 882 vm_kernel_map_is_kernel(vm_map_t map) {
 883         return (map->pmap == kernel_pmap);
 884 }
 885
 886 /*
 887  *      vm_map_create:
 888  *
 889  *      Creates and returns a new empty VM map with
 890  *      the given physical map structure, and having
 891  *      the given lower and upper address bounds.
 892  */
 893
 894 boolean_t vm_map_supports_hole_optimization = TRUE;
 895
 896 vm_map_t
 897 vm_map_create(
 898         pmap_t                  pmap,
 899         vm_map_offset_t min,
 900         vm_map_offset_t max,
 901         boolean_t               pageable)
 902 {
 903         static int              color_seed = 0;
 904         register vm_map_t       result;
 905         struct vm_map_links     *hole_entry = NULL;
 906
 907         result = (vm_map_t) zalloc(vm_map_zone);
 908         if (result == VM_MAP_NULL)
 909                 panic("vm_map_create");
 910
 911         vm_map_first_entry(result) = vm_map_to_entry(result);
 912         vm_map_last_entry(result)  = vm_map_to_entry(result);
 913         result->hdr.nentries = 0;
 914         result->hdr.entries_pageable = pageable;
 915
 916         vm_map_store_init( &(result->hdr) );
 917
 918         result->hdr.page_shift = PAGE_SHIFT;
 919
 920         result->size = 0;
 921         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 922         result->user_wire_size  = 0;
 923         result->ref_count = 1;
 924 #if     TASK_SWAPPER
 925         result->res_count = 1;
 926         result->sw_state = MAP_SW_IN;
 927 #endif  /* TASK_SWAPPER */
 928         result->pmap = pmap;
 929         result->min_offset = min;
 930         result->max_offset = max;
 931         result->wiring_required = FALSE;
 932         result->no_zero_fill = FALSE;
 933         result->mapped_in_other_pmaps = FALSE;
 934         result->wait_for_space = FALSE;
 935         result->switch_protect = FALSE;
 936         result->disable_vmentry_reuse = FALSE;
 937         result->map_disallow_data_exec = FALSE;
 938         result->highest_entry_end = 0;
 939         result->first_free = vm_map_to_entry(result);
 940         result->hint = vm_map_to_entry(result);
 941         result->color_rr = (color_seed++) & vm_color_mask;
 942         result->jit_entry_exists = FALSE;
 943
 944         if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
 945                 hole_entry = zalloc(vm_map_holes_zone);
 946
 947                 hole_entry->start = min;
 948                 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
 949                 result->holes_list = result->hole_hint = hole_entry;
 950                 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
 951                 result->holelistenabled = TRUE;
 952
 953         } else {
 954
 955                 result->holelistenabled = FALSE;
 956         }
 957
 958 #if CONFIG_FREEZE
 959         result->default_freezer_handle = NULL;
 960 #endif
 961         vm_map_lock_init(result);
 962         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 963
 964         return(result);
 965 }
 966
 967 /*
 968  *      vm_map_entry_create:    [ internal use only ]
 969  *
 970  *      Allocates a VM map entry for insertion in the
 971  *      given map (or map copy).  No fields are filled.
 972  */
 973 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 974
 975 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 976         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 977 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 978
 979 static vm_map_entry_t
 980 _vm_map_entry_create(
 981         struct vm_map_header    *map_header, boolean_t __unused map_locked)
 982 {
 983         zone_t  zone;
 984         vm_map_entry_t  entry;
 985
 986         zone = vm_map_entry_zone;
 987
 988         assert(map_header->entries_pageable ? !map_locked : TRUE);
 989
 990         if (map_header->entries_pageable) {
 991                 entry = (vm_map_entry_t) zalloc(zone);
 992         }
 993         else {
 994                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
 995
 996                 if (entry == VM_MAP_ENTRY_NULL) {
 997                         zone = vm_map_entry_reserved_zone;
 998                         entry = (vm_map_entry_t) zalloc(zone);
 999                         OSAddAtomic(1, &reserved_zalloc_count);
1000                 } else
1001                         OSAddAtomic(1, &nonreserved_zalloc_count);
1002         }
1003
1004         if (entry == VM_MAP_ENTRY_NULL)
1005                 panic("vm_map_entry_create");
1006         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1007
1008         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1009 #if     MAP_ENTRY_CREATION_DEBUG
1010         entry->vme_creation_maphdr = map_header;
1011         fastbacktrace(&entry->vme_creation_bt[0],
1012                       (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1013 #endif
1014         return(entry);
1015 }
1016
1017 /*
1018  *      vm_map_entry_dispose:   [ internal use only ]
1019  *
1020  *      Inverse of vm_map_entry_create.
1021  *
1022  *      write map lock held so no need to
1023  *      do anything special to insure correctness
1024  *      of the stores
1025  */
1026 #define vm_map_entry_dispose(map, entry)                        \
1027         _vm_map_entry_dispose(&(map)->hdr, (entry))
1028
1029 #define vm_map_copy_entry_dispose(map, entry) \
1030         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1031
1032 static void
1033 _vm_map_entry_dispose(
1034         register struct vm_map_header   *map_header,
1035         register vm_map_entry_t         entry)
1036 {
1037         register zone_t         zone;
1038
1039         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1040                 zone = vm_map_entry_zone;
1041         else
1042                 zone = vm_map_entry_reserved_zone;
1043
1044         if (!map_header->entries_pageable) {
1045                 if (zone == vm_map_entry_zone)
1046                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1047                 else
1048                         OSAddAtomic(-1, &reserved_zalloc_count);
1049         }
1050
1051         zfree(zone, entry);
1052 }
1053
1054 #if MACH_ASSERT
1055 static boolean_t first_free_check = FALSE;
1056 boolean_t
1057 first_free_is_valid(
1058         vm_map_t        map)
1059 {
1060         if (!first_free_check)
1061                 return TRUE;
1062
1063         return( first_free_is_valid_store( map ));
1064 }
1065 #endif /* MACH_ASSERT */
1066
1067
1068 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1069         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1070
1071 #define vm_map_copy_entry_unlink(copy, entry)                           \
1072         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1073
1074 #if     MACH_ASSERT && TASK_SWAPPER
1075 /*
1076  *      vm_map_res_reference:
1077  *
1078  *      Adds another valid residence count to the given map.
1079  *
1080  *      Map is locked so this function can be called from
1081  *      vm_map_swapin.
1082  *
1083  */
1084 void vm_map_res_reference(register vm_map_t map)
1085 {
1086         /* assert map is locked */
1087         assert(map->res_count >= 0);
1088         assert(map->ref_count >= map->res_count);
1089         if (map->res_count == 0) {
1090                 lck_mtx_unlock(&map->s_lock);
1091                 vm_map_lock(map);
1092                 vm_map_swapin(map);
1093                 lck_mtx_lock(&map->s_lock);
1094                 ++map->res_count;
1095                 vm_map_unlock(map);
1096         } else
1097                 ++map->res_count;
1098 }
1099
1100 /*
1101  *      vm_map_reference_swap:
1102  *
1103  *      Adds valid reference and residence counts to the given map.
1104  *
1105  *      The map may not be in memory (i.e. zero residence count).
1106  *
1107  */
1108 void vm_map_reference_swap(register vm_map_t map)
1109 {
1110         assert(map != VM_MAP_NULL);
1111         lck_mtx_lock(&map->s_lock);
1112         assert(map->res_count >= 0);
1113         assert(map->ref_count >= map->res_count);
1114         map->ref_count++;
1115         vm_map_res_reference(map);
1116         lck_mtx_unlock(&map->s_lock);
1117 }
1118
1119 /*
1120  *      vm_map_res_deallocate:
1121  *
1122  *      Decrement residence count on a map; possibly causing swapout.
1123  *
1124  *      The map must be in memory (i.e. non-zero residence count).
1125  *
1126  *      The map is locked, so this function is callable from vm_map_deallocate.
1127  *
1128  */
1129 void vm_map_res_deallocate(register vm_map_t map)
1130 {
1131         assert(map->res_count > 0);
1132         if (--map->res_count == 0) {
1133                 lck_mtx_unlock(&map->s_lock);
1134                 vm_map_lock(map);
1135                 vm_map_swapout(map);
1136                 vm_map_unlock(map);
1137                 lck_mtx_lock(&map->s_lock);
1138         }
1139         assert(map->ref_count >= map->res_count);
1140 }
1141 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1142
1143 /*
1144  *      vm_map_destroy:
1145  *
1146  *      Actually destroy a map.
1147  */
1148 void
1149 vm_map_destroy(
1150         vm_map_t        map,
1151         int             flags)
1152 {
1153         vm_map_lock(map);
1154
1155         /* final cleanup: no need to unnest shared region */
1156         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1157
1158         /* clean up regular map entries */
1159         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1160                              flags, VM_MAP_NULL);
1161         /* clean up leftover special mappings (commpage, etc...) */
1162         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1163                              flags, VM_MAP_NULL);
1164
1165 #if CONFIG_FREEZE
1166         if (map->default_freezer_handle) {
1167                 default_freezer_handle_deallocate(map->default_freezer_handle);
1168                 map->default_freezer_handle = NULL;
1169         }
1170 #endif
1171         vm_map_disable_hole_optimization(map);
1172         vm_map_unlock(map);
1173
1174         assert(map->hdr.nentries == 0);
1175
1176         if(map->pmap)
1177                 pmap_destroy(map->pmap);
1178
1179         zfree(vm_map_zone, map);
1180 }
1181
1182 #if     TASK_SWAPPER
1183 /*
1184  * vm_map_swapin/vm_map_swapout
1185  *
1186  * Swap a map in and out, either referencing or releasing its resources.
1187  * These functions are internal use only; however, they must be exported
1188  * because they may be called from macros, which are exported.
1189  *
1190  * In the case of swapout, there could be races on the residence count,
1191  * so if the residence count is up, we return, assuming that a
1192  * vm_map_deallocate() call in the near future will bring us back.
1193  *
1194  * Locking:
1195  *      -- We use the map write lock for synchronization among races.
1196  *      -- The map write lock, and not the simple s_lock, protects the
1197  *         swap state of the map.
1198  *      -- If a map entry is a share map, then we hold both locks, in
1199  *         hierarchical order.
1200  *
1201  * Synchronization Notes:
1202  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1203  *      will block on the map lock and proceed when swapout is through.
1204  *      2) A vm_map_reference() call at this time is illegal, and will
1205  *      cause a panic.  vm_map_reference() is only allowed on resident
1206  *      maps, since it refuses to block.
1207  *      3) A vm_map_swapin() call during a swapin will block, and
1208  *      proceeed when the first swapin is done, turning into a nop.
1209  *      This is the reason the res_count is not incremented until
1210  *      after the swapin is complete.
1211  *      4) There is a timing hole after the checks of the res_count, before
1212  *      the map lock is taken, during which a swapin may get the lock
1213  *      before a swapout about to happen.  If this happens, the swapin
1214  *      will detect the state and increment the reference count, causing
1215  *      the swapout to be a nop, thereby delaying it until a later
1216  *      vm_map_deallocate.  If the swapout gets the lock first, then
1217  *      the swapin will simply block until the swapout is done, and
1218  *      then proceed.
1219  *
1220  * Because vm_map_swapin() is potentially an expensive operation, it
1221  * should be used with caution.
1222  *
1223  * Invariants:
1224  *      1) A map with a residence count of zero is either swapped, or
1225  *         being swapped.
1226  *      2) A map with a non-zero residence count is either resident,
1227  *         or being swapped in.
1228  */
1229
1230 int vm_map_swap_enable = 1;
1231
1232 void vm_map_swapin (vm_map_t map)
1233 {
1234         register vm_map_entry_t entry;
1235
1236         if (!vm_map_swap_enable)        /* debug */
1237                 return;
1238
1239         /*
1240          * Map is locked
1241          * First deal with various races.
1242          */
1243         if (map->sw_state == MAP_SW_IN)
1244                 /*
1245                  * we raced with swapout and won.  Returning will incr.
1246                  * the res_count, turning the swapout into a nop.
1247                  */
1248                 return;
1249
1250         /*
1251          * The residence count must be zero.  If we raced with another
1252          * swapin, the state would have been IN; if we raced with a
1253          * swapout (after another competing swapin), we must have lost
1254          * the race to get here (see above comment), in which case
1255          * res_count is still 0.
1256          */
1257         assert(map->res_count == 0);
1258
1259         /*
1260          * There are no intermediate states of a map going out or
1261          * coming in, since the map is locked during the transition.
1262          */
1263         assert(map->sw_state == MAP_SW_OUT);
1264
1265         /*
1266          * We now operate upon each map entry.  If the entry is a sub-
1267          * or share-map, we call vm_map_res_reference upon it.
1268          * If the entry is an object, we call vm_object_res_reference
1269          * (this may iterate through the shadow chain).
1270          * Note that we hold the map locked the entire time,
1271          * even if we get back here via a recursive call in
1272          * vm_map_res_reference.
1273          */
1274         entry = vm_map_first_entry(map);
1275
1276         while (entry != vm_map_to_entry(map)) {
1277                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1278                         if (entry->is_sub_map) {
1279                                 vm_map_t lmap = VME_SUBMAP(entry);
1280                                 lck_mtx_lock(&lmap->s_lock);
1281                                 vm_map_res_reference(lmap);
1282                                 lck_mtx_unlock(&lmap->s_lock);
1283                         } else {
1284                                 vm_object_t object = VME_OBEJCT(entry);
1285                                 vm_object_lock(object);
1286                                 /*
1287                                  * This call may iterate through the
1288                                  * shadow chain.
1289                                  */
1290                                 vm_object_res_reference(object);
1291                                 vm_object_unlock(object);
1292                         }
1293                 }
1294                 entry = entry->vme_next;
1295         }
1296         assert(map->sw_state == MAP_SW_OUT);
1297         map->sw_state = MAP_SW_IN;
1298 }
1299
1300 void vm_map_swapout(vm_map_t map)
1301 {
1302         register vm_map_entry_t entry;
1303
1304         /*
1305          * Map is locked
1306          * First deal with various races.
1307          * If we raced with a swapin and lost, the residence count
1308          * will have been incremented to 1, and we simply return.
1309          */
1310         lck_mtx_lock(&map->s_lock);
1311         if (map->res_count != 0) {
1312                 lck_mtx_unlock(&map->s_lock);
1313                 return;
1314         }
1315         lck_mtx_unlock(&map->s_lock);
1316
1317         /*
1318          * There are no intermediate states of a map going out or
1319          * coming in, since the map is locked during the transition.
1320          */
1321         assert(map->sw_state == MAP_SW_IN);
1322
1323         if (!vm_map_swap_enable)
1324                 return;
1325
1326         /*
1327          * We now operate upon each map entry.  If the entry is a sub-
1328          * or share-map, we call vm_map_res_deallocate upon it.
1329          * If the entry is an object, we call vm_object_res_deallocate
1330          * (this may iterate through the shadow chain).
1331          * Note that we hold the map locked the entire time,
1332          * even if we get back here via a recursive call in
1333          * vm_map_res_deallocate.
1334          */
1335         entry = vm_map_first_entry(map);
1336
1337         while (entry != vm_map_to_entry(map)) {
1338                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1339                         if (entry->is_sub_map) {
1340                                 vm_map_t lmap = VME_SUBMAP(entry);
1341                                 lck_mtx_lock(&lmap->s_lock);
1342                                 vm_map_res_deallocate(lmap);
1343                                 lck_mtx_unlock(&lmap->s_lock);
1344                         } else {
1345                                 vm_object_t object = VME_OBJECT(entry);
1346                                 vm_object_lock(object);
1347                                 /*
1348                                  * This call may take a long time,
1349                                  * since it could actively push
1350                                  * out pages (if we implement it
1351                                  * that way).
1352                                  */
1353                                 vm_object_res_deallocate(object);
1354                                 vm_object_unlock(object);
1355                         }
1356                 }
1357                 entry = entry->vme_next;
1358         }
1359         assert(map->sw_state == MAP_SW_IN);
1360         map->sw_state = MAP_SW_OUT;
1361 }
1362
1363 #endif  /* TASK_SWAPPER */
1364
1365 /*
1366  *      vm_map_lookup_entry:    [ internal use only ]
1367  *
1368  *      Calls into the vm map store layer to find the map
1369  *      entry containing (or immediately preceding) the
1370  *      specified address in the given map; the entry is returned
1371  *      in the "entry" parameter.  The boolean
1372  *      result indicates whether the address is
1373  *      actually contained in the map.
1374  */
1375 boolean_t
1376 vm_map_lookup_entry(
1377         register vm_map_t               map,
1378         register vm_map_offset_t        address,
1379         vm_map_entry_t          *entry)         /* OUT */
1380 {
1381         return ( vm_map_store_lookup_entry( map, address, entry ));
1382 }
1383
1384 /*
1385  *      Routine:        vm_map_find_space
1386  *      Purpose:
1387  *              Allocate a range in the specified virtual address map,
1388  *              returning the entry allocated for that range.
1389  *              Used by kmem_alloc, etc.
1390  *
1391  *              The map must be NOT be locked. It will be returned locked
1392  *              on KERN_SUCCESS, unlocked on failure.
1393  *
1394  *              If an entry is allocated, the object/offset fields
1395  *              are initialized to zero.
1396  */
1397 kern_return_t
1398 vm_map_find_space(
1399         register vm_map_t       map,
1400         vm_map_offset_t         *address,       /* OUT */
1401         vm_map_size_t           size,
1402         vm_map_offset_t         mask,
1403         int                     flags,
1404         vm_map_entry_t          *o_entry)       /* OUT */
1405 {
1406         vm_map_entry_t                  entry, new_entry;
1407         register vm_map_offset_t        start;
1408         register vm_map_offset_t        end;
1409         vm_map_entry_t                  hole_entry;
1410
1411         if (size == 0) {
1412                 *address = 0;
1413                 return KERN_INVALID_ARGUMENT;
1414         }
1415
1416         if (flags & VM_FLAGS_GUARD_AFTER) {
1417                 /* account for the back guard page in the size */
1418                 size += VM_MAP_PAGE_SIZE(map);
1419         }
1420
1421         new_entry = vm_map_entry_create(map, FALSE);
1422
1423         /*
1424          *      Look for the first possible address; if there's already
1425          *      something at this address, we have to start after it.
1426          */
1427
1428         vm_map_lock(map);
1429
1430         if( map->disable_vmentry_reuse == TRUE) {
1431                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1432         } else {
1433                 if (map->holelistenabled) {
1434                         hole_entry = (vm_map_entry_t)map->holes_list;
1435
1436                         if (hole_entry == NULL) {
1437                                 /*
1438                                  * No more space in the map?
1439                                  */
1440                                 vm_map_entry_dispose(map, new_entry);
1441                                 vm_map_unlock(map);
1442                                 return(KERN_NO_SPACE);
1443                         }
1444
1445                         entry = hole_entry;
1446                         start = entry->vme_start;
1447                 } else {
1448                         assert(first_free_is_valid(map));
1449                         if ((entry = map->first_free) == vm_map_to_entry(map))
1450                                 start = map->min_offset;
1451                         else
1452                                 start = entry->vme_end;
1453                 }
1454         }
1455
1456         /*
1457          *      In any case, the "entry" always precedes
1458          *      the proposed new region throughout the loop:
1459          */
1460
1461         while (TRUE) {
1462                 register vm_map_entry_t next;
1463
1464                 /*
1465                  *      Find the end of the proposed new region.
1466                  *      Be sure we didn't go beyond the end, or
1467                  *      wrap around the address.
1468                  */
1469
1470                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1471                         /* reserve space for the front guard page */
1472                         start += VM_MAP_PAGE_SIZE(map);
1473                 }
1474                 end = ((start + mask) & ~mask);
1475
1476                 if (end < start) {
1477                         vm_map_entry_dispose(map, new_entry);
1478                         vm_map_unlock(map);
1479                         return(KERN_NO_SPACE);
1480                 }
1481                 start = end;
1482                 end += size;
1483
1484                 if ((end > map->max_offset) || (end < start)) {
1485                         vm_map_entry_dispose(map, new_entry);
1486                         vm_map_unlock(map);
1487                         return(KERN_NO_SPACE);
1488                 }
1489
1490                 next = entry->vme_next;
1491
1492                 if (map->holelistenabled) {
1493                         if (entry->vme_end >= end)
1494                                 break;
1495                 } else {
1496                         /*
1497                          *      If there are no more entries, we must win.
1498                          *
1499                          *      OR
1500                          *
1501                          *      If there is another entry, it must be
1502                          *      after the end of the potential new region.
1503                          */
1504
1505                         if (next == vm_map_to_entry(map))
1506                                 break;
1507
1508                         if (next->vme_start >= end)
1509                                 break;
1510                 }
1511
1512                 /*
1513                  *      Didn't fit -- move to the next entry.
1514                  */
1515
1516                 entry = next;
1517
1518                 if (map->holelistenabled) {
1519                         if (entry == (vm_map_entry_t) map->holes_list) {
1520                                 /*
1521                                  * Wrapped around
1522                                  */
1523                                 vm_map_entry_dispose(map, new_entry);
1524                                 vm_map_unlock(map);
1525                                 return(KERN_NO_SPACE);
1526                         }
1527                         start = entry->vme_start;
1528                 } else {
1529                         start = entry->vme_end;
1530                 }
1531         }
1532
1533         if (map->holelistenabled) {
1534                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1535                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1536                 }
1537         }
1538
1539         /*
1540          *      At this point,
1541          *              "start" and "end" should define the endpoints of the
1542          *                      available new range, and
1543          *              "entry" should refer to the region before the new
1544          *                      range, and
1545          *
1546          *              the map should be locked.
1547          */
1548
1549         if (flags & VM_FLAGS_GUARD_BEFORE) {
1550                 /* go back for the front guard page */
1551                 start -= VM_MAP_PAGE_SIZE(map);
1552         }
1553         *address = start;
1554
1555         assert(start < end);
1556         new_entry->vme_start = start;
1557         new_entry->vme_end = end;
1558         assert(page_aligned(new_entry->vme_start));
1559         assert(page_aligned(new_entry->vme_end));
1560         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1561                                    VM_MAP_PAGE_MASK(map)));
1562         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1563                                    VM_MAP_PAGE_MASK(map)));
1564
1565         new_entry->is_shared = FALSE;
1566         new_entry->is_sub_map = FALSE;
1567         new_entry->use_pmap = TRUE;
1568         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1569         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1570
1571         new_entry->needs_copy = FALSE;
1572
1573         new_entry->inheritance = VM_INHERIT_DEFAULT;
1574         new_entry->protection = VM_PROT_DEFAULT;
1575         new_entry->max_protection = VM_PROT_ALL;
1576         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1577         new_entry->wired_count = 0;
1578         new_entry->user_wired_count = 0;
1579
1580         new_entry->in_transition = FALSE;
1581         new_entry->needs_wakeup = FALSE;
1582         new_entry->no_cache = FALSE;
1583         new_entry->permanent = FALSE;
1584         new_entry->superpage_size = FALSE;
1585         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1586                 new_entry->map_aligned = TRUE;
1587         } else {
1588                 new_entry->map_aligned = FALSE;
1589         }
1590
1591         new_entry->used_for_jit = FALSE;
1592         new_entry->zero_wired_pages = FALSE;
1593         new_entry->iokit_acct = FALSE;
1594         new_entry->vme_resilient_codesign = FALSE;
1595         new_entry->vme_resilient_media = FALSE;
1596
1597         int alias;
1598         VM_GET_FLAGS_ALIAS(flags, alias);
1599         VME_ALIAS_SET(new_entry, alias);
1600
1601         /*
1602          *      Insert the new entry into the list
1603          */
1604
1605         vm_map_store_entry_link(map, entry, new_entry);
1606
1607         map->size += size;
1608
1609         /*
1610          *      Update the lookup hint
1611          */
1612         SAVE_HINT_MAP_WRITE(map, new_entry);
1613
1614         *o_entry = new_entry;
1615         return(KERN_SUCCESS);
1616 }
1617
1618 int vm_map_pmap_enter_print = FALSE;
1619 int vm_map_pmap_enter_enable = FALSE;
1620
1621 /*
1622  *      Routine:        vm_map_pmap_enter [internal only]
1623  *
1624  *      Description:
1625  *              Force pages from the specified object to be entered into
1626  *              the pmap at the specified address if they are present.
1627  *              As soon as a page not found in the object the scan ends.
1628  *
1629  *      Returns:
1630  *              Nothing.
1631  *
1632  *      In/out conditions:
1633  *              The source map should not be locked on entry.
1634  */
1635 __unused static void
1636 vm_map_pmap_enter(
1637         vm_map_t                map,
1638         register vm_map_offset_t        addr,
1639         register vm_map_offset_t        end_addr,
1640         register vm_object_t    object,
1641         vm_object_offset_t      offset,
1642         vm_prot_t               protection)
1643 {
1644         int                     type_of_fault;
1645         kern_return_t           kr;
1646
1647         if(map->pmap == 0)
1648                 return;
1649
1650         while (addr < end_addr) {
1651                 register vm_page_t      m;
1652
1653
1654                 /*
1655                  * TODO:
1656                  * From vm_map_enter(), we come into this function without the map
1657                  * lock held or the object lock held.
1658                  * We haven't taken a reference on the object either.
1659                  * We should do a proper lookup on the map to make sure
1660                  * that things are sane before we go locking objects that
1661                  * could have been deallocated from under us.
1662                  */
1663
1664                 vm_object_lock(object);
1665
1666                 m = vm_page_lookup(object, offset);
1667                 /*
1668                  * ENCRYPTED SWAP:
1669                  * The user should never see encrypted data, so do not
1670                  * enter an encrypted page in the page table.
1671                  */
1672                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1673                     m->fictitious ||
1674                     (m->unusual && ( m->error || m->restart || m->absent))) {
1675                         vm_object_unlock(object);
1676                         return;
1677                 }
1678
1679                 if (vm_map_pmap_enter_print) {
1680                         printf("vm_map_pmap_enter:");
1681                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1682                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1683                 }
1684                 type_of_fault = DBG_CACHE_HIT_FAULT;
1685                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1686                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1687                                     0, /* XXX need user tag / alias? */
1688                                     0, /* alternate accounting? */
1689                                     NULL,
1690                                     &type_of_fault);
1691
1692                 vm_object_unlock(object);
1693
1694                 offset += PAGE_SIZE_64;
1695                 addr += PAGE_SIZE;
1696         }
1697 }
1698
1699 boolean_t vm_map_pmap_is_empty(
1700         vm_map_t        map,
1701         vm_map_offset_t start,
1702         vm_map_offset_t end);
1703 boolean_t vm_map_pmap_is_empty(
1704         vm_map_t        map,
1705         vm_map_offset_t start,
1706         vm_map_offset_t end)
1707 {
1708 #ifdef MACHINE_PMAP_IS_EMPTY
1709         return pmap_is_empty(map->pmap, start, end);
1710 #else   /* MACHINE_PMAP_IS_EMPTY */
1711         vm_map_offset_t offset;
1712         ppnum_t         phys_page;
1713
1714         if (map->pmap == NULL) {
1715                 return TRUE;
1716         }
1717
1718         for (offset = start;
1719              offset < end;
1720              offset += PAGE_SIZE) {
1721                 phys_page = pmap_find_phys(map->pmap, offset);
1722                 if (phys_page) {
1723                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1724                                 "page %d at 0x%llx\n",
1725                                 map, (long long)start, (long long)end,
1726                                 phys_page, (long long)offset);
1727                         return FALSE;
1728                 }
1729         }
1730         return TRUE;
1731 #endif  /* MACHINE_PMAP_IS_EMPTY */
1732 }
1733
1734 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1735 kern_return_t
1736 vm_map_random_address_for_size(
1737         vm_map_t        map,
1738         vm_map_offset_t *address,
1739         vm_map_size_t   size)
1740 {
1741         kern_return_t   kr = KERN_SUCCESS;
1742         int             tries = 0;
1743         vm_map_offset_t random_addr = 0;
1744         vm_map_offset_t hole_end;
1745
1746         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1747         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1748         vm_map_size_t   vm_hole_size = 0;
1749         vm_map_size_t   addr_space_size;
1750
1751         addr_space_size = vm_map_max(map) - vm_map_min(map);
1752
1753         assert(page_aligned(size));
1754
1755         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1756                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1757                 random_addr = vm_map_trunc_page(
1758                         vm_map_min(map) +(random_addr % addr_space_size),
1759                         VM_MAP_PAGE_MASK(map));
1760
1761                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1762                         if (prev_entry == vm_map_to_entry(map)) {
1763                                 next_entry = vm_map_first_entry(map);
1764                         } else {
1765                                 next_entry = prev_entry->vme_next;
1766                         }
1767                         if (next_entry == vm_map_to_entry(map)) {
1768                                 hole_end = vm_map_max(map);
1769                         } else {
1770                                 hole_end = next_entry->vme_start;
1771                         }
1772                         vm_hole_size = hole_end - random_addr;
1773                         if (vm_hole_size >= size) {
1774                                 *address = random_addr;
1775                                 break;
1776                         }
1777                 }
1778                 tries++;
1779         }
1780
1781         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1782                 kr = KERN_NO_SPACE;
1783         }
1784         return kr;
1785 }
1786
1787 /*
1788  *      Routine:        vm_map_enter
1789  *
1790  *      Description:
1791  *              Allocate a range in the specified virtual address map.
1792  *              The resulting range will refer to memory defined by
1793  *              the given memory object and offset into that object.
1794  *
1795  *              Arguments are as defined in the vm_map call.
1796  */
1797 int _map_enter_debug = 0;
1798 static unsigned int vm_map_enter_restore_successes = 0;
1799 static unsigned int vm_map_enter_restore_failures = 0;
1800 kern_return_t
1801 vm_map_enter(
1802         vm_map_t                map,
1803         vm_map_offset_t         *address,       /* IN/OUT */
1804         vm_map_size_t           size,
1805         vm_map_offset_t         mask,
1806         int                     flags,
1807         vm_object_t             object,
1808         vm_object_offset_t      offset,
1809         boolean_t               needs_copy,
1810         vm_prot_t               cur_protection,
1811         vm_prot_t               max_protection,
1812         vm_inherit_t            inheritance)
1813 {
1814         vm_map_entry_t          entry, new_entry;
1815         vm_map_offset_t         start, tmp_start, tmp_offset;
1816         vm_map_offset_t         end, tmp_end;
1817         vm_map_offset_t         tmp2_start, tmp2_end;
1818         vm_map_offset_t         step;
1819         kern_return_t           result = KERN_SUCCESS;
1820         vm_map_t                zap_old_map = VM_MAP_NULL;
1821         vm_map_t                zap_new_map = VM_MAP_NULL;
1822         boolean_t               map_locked = FALSE;
1823         boolean_t               pmap_empty = TRUE;
1824         boolean_t               new_mapping_established = FALSE;
1825         boolean_t               keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1826         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1827         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1828         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1829         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1830         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1831         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1832         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1833         boolean_t               iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1834         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1835         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1836         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1837         vm_tag_t                alias, user_alias;
1838         vm_map_offset_t         effective_min_offset, effective_max_offset;
1839         kern_return_t           kr;
1840         boolean_t               clear_map_aligned = FALSE;
1841         vm_map_entry_t          hole_entry;
1842
1843         if (superpage_size) {
1844                 switch (superpage_size) {
1845                         /*
1846                          * Note that the current implementation only supports
1847                          * a single size for superpages, SUPERPAGE_SIZE, per
1848                          * architecture. As soon as more sizes are supposed
1849                          * to be supported, SUPERPAGE_SIZE has to be replaced
1850                          * with a lookup of the size depending on superpage_size.
1851                          */
1852 #ifdef __x86_64__
1853                         case SUPERPAGE_SIZE_ANY:
1854                                 /* handle it like 2 MB and round up to page size */
1855                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1856                         case SUPERPAGE_SIZE_2MB:
1857                                 break;
1858 #endif
1859                         default:
1860                                 return KERN_INVALID_ARGUMENT;
1861                 }
1862                 mask = SUPERPAGE_SIZE-1;
1863                 if (size & (SUPERPAGE_SIZE-1))
1864                         return KERN_INVALID_ARGUMENT;
1865                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1866         }
1867
1868
1869
1870         if (resilient_codesign || resilient_media) {
1871                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1872                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1873                         return KERN_PROTECTION_FAILURE;
1874                 }
1875         }
1876
1877         if (is_submap) {
1878                 if (purgable) {
1879                         /* submaps can not be purgeable */
1880                         return KERN_INVALID_ARGUMENT;
1881                 }
1882                 if (object == VM_OBJECT_NULL) {
1883                         /* submaps can not be created lazily */
1884                         return KERN_INVALID_ARGUMENT;
1885                 }
1886         }
1887         if (flags & VM_FLAGS_ALREADY) {
1888                 /*
1889                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1890                  * is already present.  For it to be meaningul, the requested
1891                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1892                  * we shouldn't try and remove what was mapped there first
1893                  * (!VM_FLAGS_OVERWRITE).
1894                  */
1895                 if ((flags & VM_FLAGS_ANYWHERE) ||
1896                     (flags & VM_FLAGS_OVERWRITE)) {
1897                         return KERN_INVALID_ARGUMENT;
1898                 }
1899         }
1900
1901         effective_min_offset = map->min_offset;
1902
1903         if (flags & VM_FLAGS_BEYOND_MAX) {
1904                 /*
1905                  * Allow an insertion beyond the map's max offset.
1906                  */
1907                 if (vm_map_is_64bit(map))
1908                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1909                 else
1910                         effective_max_offset = 0x00000000FFFFF000ULL;
1911         } else {
1912                 effective_max_offset = map->max_offset;
1913         }
1914
1915         if (size == 0 ||
1916             (offset & PAGE_MASK_64) != 0) {
1917                 *address = 0;
1918                 return KERN_INVALID_ARGUMENT;
1919         }
1920
1921         VM_GET_FLAGS_ALIAS(flags, alias);
1922         if (map->pmap == kernel_pmap) {
1923                 user_alias = VM_KERN_MEMORY_NONE;
1924         } else {
1925                 user_alias = alias;
1926         }
1927
1928 #define RETURN(value)   { result = value; goto BailOut; }
1929
1930         assert(page_aligned(*address));
1931         assert(page_aligned(size));
1932
1933         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1934                 /*
1935                  * In most cases, the caller rounds the size up to the
1936                  * map's page size.
1937                  * If we get a size that is explicitly not map-aligned here,
1938                  * we'll have to respect the caller's wish and mark the
1939                  * mapping as "not map-aligned" to avoid tripping the
1940                  * map alignment checks later.
1941                  */
1942                 clear_map_aligned = TRUE;
1943         }
1944         if (!anywhere &&
1945             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1946                 /*
1947                  * We've been asked to map at a fixed address and that
1948                  * address is not aligned to the map's specific alignment.
1949                  * The caller should know what it's doing (i.e. most likely
1950                  * mapping some fragmented copy map, transferring memory from
1951                  * a VM map with a different alignment), so clear map_aligned
1952                  * for this new VM map entry and proceed.
1953                  */
1954                 clear_map_aligned = TRUE;
1955         }
1956
1957         /*
1958          * Only zero-fill objects are allowed to be purgable.
1959          * LP64todo - limit purgable objects to 32-bits for now
1960          */
1961         if (purgable &&
1962             (offset != 0 ||
1963              (object != VM_OBJECT_NULL &&
1964               (object->vo_size != size ||
1965                object->purgable == VM_PURGABLE_DENY))
1966              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1967                 return KERN_INVALID_ARGUMENT;
1968
1969         if (!anywhere && overwrite) {
1970                 /*
1971                  * Create a temporary VM map to hold the old mappings in the
1972                  * affected area while we create the new one.
1973                  * This avoids releasing the VM map lock in
1974                  * vm_map_entry_delete() and allows atomicity
1975                  * when we want to replace some mappings with a new one.
1976                  * It also allows us to restore the old VM mappings if the
1977                  * new mapping fails.
1978                  */
1979                 zap_old_map = vm_map_create(PMAP_NULL,
1980                                             *address,
1981                                             *address + size,
1982                                             map->hdr.entries_pageable);
1983                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1984                 vm_map_disable_hole_optimization(zap_old_map);
1985         }
1986
1987 StartAgain: ;
1988
1989         start = *address;
1990
1991         if (anywhere) {
1992                 vm_map_lock(map);
1993                 map_locked = TRUE;
1994
1995                 if (entry_for_jit) {
1996                         if (map->jit_entry_exists) {
1997                                 result = KERN_INVALID_ARGUMENT;
1998                                 goto BailOut;
1999                         }
2000                         /*
2001                          * Get a random start address.
2002                          */
2003                         result = vm_map_random_address_for_size(map, address, size);
2004                         if (result != KERN_SUCCESS) {
2005                                 goto BailOut;
2006                         }
2007                         start = *address;
2008                 }
2009
2010
2011                 /*
2012                  *      Calculate the first possible address.
2013                  */
2014
2015                 if (start < effective_min_offset)
2016                         start = effective_min_offset;
2017                 if (start > effective_max_offset)
2018                         RETURN(KERN_NO_SPACE);
2019
2020                 /*
2021                  *      Look for the first possible address;
2022                  *      if there's already something at this
2023                  *      address, we have to start after it.
2024                  */
2025
2026                 if( map->disable_vmentry_reuse == TRUE) {
2027                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2028                 } else {
2029
2030                         if (map->holelistenabled) {
2031                                 hole_entry = (vm_map_entry_t)map->holes_list;
2032
2033                                 if (hole_entry == NULL) {
2034                                         /*
2035                                          * No more space in the map?
2036                                          */
2037                                         result = KERN_NO_SPACE;
2038                                         goto BailOut;
2039                                 } else {
2040
2041                                         boolean_t found_hole = FALSE;
2042
2043                                         do {
2044                                                 if (hole_entry->vme_start >= start) {
2045                                                         start = hole_entry->vme_start;
2046                                                         found_hole = TRUE;
2047                                                         break;
2048                                                 }
2049
2050                                                 if (hole_entry->vme_end > start) {
2051                                                         found_hole = TRUE;
2052                                                         break;
2053                                                 }
2054                                                 hole_entry = hole_entry->vme_next;
2055
2056                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
2057
2058                                         if (found_hole == FALSE) {
2059                                                 result = KERN_NO_SPACE;
2060                                                 goto BailOut;
2061                                         }
2062
2063                                         entry = hole_entry;
2064
2065                                         if (start == 0)
2066                                                 start += PAGE_SIZE_64;
2067                                 }
2068                         } else {
2069                                 assert(first_free_is_valid(map));
2070
2071                                 entry = map->first_free;
2072
2073                                 if (entry == vm_map_to_entry(map)) {
2074                                         entry = NULL;
2075                                 } else {
2076                                        if (entry->vme_next == vm_map_to_entry(map)){
2077                                                /*
2078                                                 * Hole at the end of the map.
2079                                                 */
2080                                                 entry = NULL;
2081                                        } else {
2082                                                 if (start < (entry->vme_next)->vme_start ) {
2083                                                         start = entry->vme_end;
2084                                                         start = vm_map_round_page(start,
2085                                                                                   VM_MAP_PAGE_MASK(map));
2086                                                 } else {
2087                                                         /*
2088                                                          * Need to do a lookup.
2089                                                          */
2090                                                         entry = NULL;
2091                                                 }
2092                                        }
2093                                 }
2094
2095                                 if (entry == NULL) {
2096                                         vm_map_entry_t  tmp_entry;
2097                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2098                                                 assert(!entry_for_jit);
2099                                                 start = tmp_entry->vme_end;
2100                                                 start = vm_map_round_page(start,
2101                                                                           VM_MAP_PAGE_MASK(map));
2102                                         }
2103                                         entry = tmp_entry;
2104                                 }
2105                         }
2106                 }
2107
2108                 /*
2109                  *      In any case, the "entry" always precedes
2110                  *      the proposed new region throughout the
2111                  *      loop:
2112                  */
2113
2114                 while (TRUE) {
2115                         register vm_map_entry_t next;
2116
2117                         /*
2118                          *      Find the end of the proposed new region.
2119                          *      Be sure we didn't go beyond the end, or
2120                          *      wrap around the address.
2121                          */
2122
2123                         end = ((start + mask) & ~mask);
2124                         end = vm_map_round_page(end,
2125                                                 VM_MAP_PAGE_MASK(map));
2126                         if (end < start)
2127                                 RETURN(KERN_NO_SPACE);
2128                         start = end;
2129                         assert(VM_MAP_PAGE_ALIGNED(start,
2130                                                    VM_MAP_PAGE_MASK(map)));
2131                         end += size;
2132
2133                         if ((end > effective_max_offset) || (end < start)) {
2134                                 if (map->wait_for_space) {
2135                                         assert(!keep_map_locked);
2136                                         if (size <= (effective_max_offset -
2137                                                      effective_min_offset)) {
2138                                                 assert_wait((event_t)map,
2139                                                             THREAD_ABORTSAFE);
2140                                                 vm_map_unlock(map);
2141                                                 map_locked = FALSE;
2142                                                 thread_block(THREAD_CONTINUE_NULL);
2143                                                 goto StartAgain;
2144                                         }
2145                                 }
2146                                 RETURN(KERN_NO_SPACE);
2147                         }
2148
2149                         next = entry->vme_next;
2150
2151                         if (map->holelistenabled) {
2152                                 if (entry->vme_end >= end)
2153                                         break;
2154                         } else {
2155                                 /*
2156                                  *      If there are no more entries, we must win.
2157                                  *
2158                                  *      OR
2159                                  *
2160                                  *      If there is another entry, it must be
2161                                  *      after the end of the potential new region.
2162                                  */
2163
2164                                 if (next == vm_map_to_entry(map))
2165                                         break;
2166
2167                                 if (next->vme_start >= end)
2168                                         break;
2169                         }
2170
2171                         /*
2172                          *      Didn't fit -- move to the next entry.
2173                          */
2174
2175                         entry = next;
2176
2177                         if (map->holelistenabled) {
2178                                 if (entry == (vm_map_entry_t) map->holes_list) {
2179                                         /*
2180                                          * Wrapped around
2181                                          */
2182                                         result = KERN_NO_SPACE;
2183                                         goto BailOut;
2184                                 }
2185                                 start = entry->vme_start;
2186                         } else {
2187                                 start = entry->vme_end;
2188                         }
2189
2190                         start = vm_map_round_page(start,
2191                                                   VM_MAP_PAGE_MASK(map));
2192                 }
2193
2194                 if (map->holelistenabled) {
2195                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2196                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2197                         }
2198                 }
2199
2200                 *address = start;
2201                 assert(VM_MAP_PAGE_ALIGNED(*address,
2202                                            VM_MAP_PAGE_MASK(map)));
2203         } else {
2204                 /*
2205                  *      Verify that:
2206                  *              the address doesn't itself violate
2207                  *              the mask requirement.
2208                  */
2209
2210                 vm_map_lock(map);
2211                 map_locked = TRUE;
2212                 if ((start & mask) != 0)
2213                         RETURN(KERN_NO_SPACE);
2214
2215                 /*
2216                  *      ...     the address is within bounds
2217                  */
2218
2219                 end = start + size;
2220
2221                 if ((start < effective_min_offset) ||
2222                     (end > effective_max_offset) ||
2223                     (start >= end)) {
2224                         RETURN(KERN_INVALID_ADDRESS);
2225                 }
2226
2227                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2228                         /*
2229                          * Fixed mapping and "overwrite" flag: attempt to
2230                          * remove all existing mappings in the specified
2231                          * address range, saving them in our "zap_old_map".
2232                          */
2233                         (void) vm_map_delete(map, start, end,
2234                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2235                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2236                                              zap_old_map);
2237                 }
2238
2239                 /*
2240                  *      ...     the starting address isn't allocated
2241                  */
2242
2243                 if (vm_map_lookup_entry(map, start, &entry)) {
2244                         if (! (flags & VM_FLAGS_ALREADY)) {
2245                                 RETURN(KERN_NO_SPACE);
2246                         }
2247                         /*
2248                          * Check if what's already there is what we want.
2249                          */
2250                         tmp_start = start;
2251                         tmp_offset = offset;
2252                         if (entry->vme_start < start) {
2253                                 tmp_start -= start - entry->vme_start;
2254                                 tmp_offset -= start - entry->vme_start;
2255
2256                         }
2257                         for (; entry->vme_start < end;
2258                              entry = entry->vme_next) {
2259                                 /*
2260                                  * Check if the mapping's attributes
2261                                  * match the existing map entry.
2262                                  */
2263                                 if (entry == vm_map_to_entry(map) ||
2264                                     entry->vme_start != tmp_start ||
2265                                     entry->is_sub_map != is_submap ||
2266                                     VME_OFFSET(entry) != tmp_offset ||
2267                                     entry->needs_copy != needs_copy ||
2268                                     entry->protection != cur_protection ||
2269                                     entry->max_protection != max_protection ||
2270                                     entry->inheritance != inheritance ||
2271                                     entry->iokit_acct != iokit_acct ||
2272                                     VME_ALIAS(entry) != alias) {
2273                                         /* not the same mapping ! */
2274                                         RETURN(KERN_NO_SPACE);
2275                                 }
2276                                 /*
2277                                  * Check if the same object is being mapped.
2278                                  */
2279                                 if (is_submap) {
2280                                         if (VME_SUBMAP(entry) !=
2281                                             (vm_map_t) object) {
2282                                                 /* not the same submap */
2283                                                 RETURN(KERN_NO_SPACE);
2284                                         }
2285                                 } else {
2286                                         if (VME_OBJECT(entry) != object) {
2287                                                 /* not the same VM object... */
2288                                                 vm_object_t obj2;
2289
2290                                                 obj2 = VME_OBJECT(entry);
2291                                                 if ((obj2 == VM_OBJECT_NULL ||
2292                                                      obj2->internal) &&
2293                                                     (object == VM_OBJECT_NULL ||
2294                                                      object->internal)) {
2295                                                         /*
2296                                                          * ... but both are
2297                                                          * anonymous memory,
2298                                                          * so equivalent.
2299                                                          */
2300                                                 } else {
2301                                                         RETURN(KERN_NO_SPACE);
2302                                                 }
2303                                         }
2304                                 }
2305
2306                                 tmp_offset += entry->vme_end - entry->vme_start;
2307                                 tmp_start += entry->vme_end - entry->vme_start;
2308                                 if (entry->vme_end >= end) {
2309                                         /* reached the end of our mapping */
2310                                         break;
2311                                 }
2312                         }
2313                         /* it all matches:  let's use what's already there ! */
2314                         RETURN(KERN_MEMORY_PRESENT);
2315                 }
2316
2317                 /*
2318                  *      ...     the next region doesn't overlap the
2319                  *              end point.
2320                  */
2321
2322                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2323                     (entry->vme_next->vme_start < end))
2324                         RETURN(KERN_NO_SPACE);
2325         }
2326
2327         /*
2328          *      At this point,
2329          *              "start" and "end" should define the endpoints of the
2330          *                      available new range, and
2331          *              "entry" should refer to the region before the new
2332          *                      range, and
2333          *
2334          *              the map should be locked.
2335          */
2336
2337         /*
2338          *      See whether we can avoid creating a new entry (and object) by
2339          *      extending one of our neighbors.  [So far, we only attempt to
2340          *      extend from below.]  Note that we can never extend/join
2341          *      purgable objects because they need to remain distinct
2342          *      entities in order to implement their "volatile object"
2343          *      semantics.
2344          */
2345
2346         if (purgable || entry_for_jit) {
2347                 if (object == VM_OBJECT_NULL) {
2348
2349                         object = vm_object_allocate(size);
2350                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2351                         object->true_share = TRUE;
2352                         if (purgable) {
2353                                 task_t owner;
2354                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2355                                 if (map->pmap == kernel_pmap) {
2356                                         /*
2357                                          * Purgeable mappings made in a kernel
2358                                          * map are "owned" by the kernel itself
2359                                          * rather than the current user task
2360                                          * because they're likely to be used by
2361                                          * more than this user task (see
2362                                          * execargs_purgeable_allocate(), for
2363                                          * example).
2364                                          */
2365                                         owner = kernel_task;
2366                                 } else {
2367                                         owner = current_task();
2368                                 }
2369                                 assert(object->vo_purgeable_owner == NULL);
2370                                 assert(object->resident_page_count == 0);
2371                                 assert(object->wired_page_count == 0);
2372                                 vm_object_lock(object);
2373                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2374                                 vm_object_unlock(object);
2375                         }
2376                         offset = (vm_object_offset_t)0;
2377                 }
2378         } else if ((is_submap == FALSE) &&
2379                    (object == VM_OBJECT_NULL) &&
2380                    (entry != vm_map_to_entry(map)) &&
2381                    (entry->vme_end == start) &&
2382                    (!entry->is_shared) &&
2383                    (!entry->is_sub_map) &&
2384                    (!entry->in_transition) &&
2385                    (!entry->needs_wakeup) &&
2386                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2387                    (entry->protection == cur_protection) &&
2388                    (entry->max_protection == max_protection) &&
2389                    (entry->inheritance == inheritance) &&
2390                    ((user_alias == VM_MEMORY_REALLOC) ||
2391                     (VME_ALIAS(entry) == alias)) &&
2392                    (entry->no_cache == no_cache) &&
2393                    (entry->permanent == permanent) &&
2394                    (!entry->superpage_size && !superpage_size) &&
2395                    /*
2396                     * No coalescing if not map-aligned, to avoid propagating
2397                     * that condition any further than needed:
2398                     */
2399                    (!entry->map_aligned || !clear_map_aligned) &&
2400                    (!entry->zero_wired_pages) &&
2401                    (!entry->used_for_jit && !entry_for_jit) &&
2402                    (entry->iokit_acct == iokit_acct) &&
2403                    (!entry->vme_resilient_codesign) &&
2404                    (!entry->vme_resilient_media) &&
2405
2406                    ((entry->vme_end - entry->vme_start) + size <=
2407                     (user_alias == VM_MEMORY_REALLOC ?
2408                      ANON_CHUNK_SIZE :
2409                      NO_COALESCE_LIMIT)) &&
2410
2411                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2412                 if (vm_object_coalesce(VME_OBJECT(entry),
2413                                        VM_OBJECT_NULL,
2414                                        VME_OFFSET(entry),
2415                                        (vm_object_offset_t) 0,
2416                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2417                                        (vm_map_size_t)(end - entry->vme_end))) {
2418
2419                         /*
2420                          *      Coalesced the two objects - can extend
2421                          *      the previous map entry to include the
2422                          *      new range.
2423                          */
2424                         map->size += (end - entry->vme_end);
2425                         assert(entry->vme_start < end);
2426                         assert(VM_MAP_PAGE_ALIGNED(end,
2427                                                    VM_MAP_PAGE_MASK(map)));
2428                         if (__improbable(vm_debug_events))
2429                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2430                         entry->vme_end = end;
2431                         if (map->holelistenabled) {
2432                                 vm_map_store_update_first_free(map, entry, TRUE);
2433                         } else {
2434                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2435                         }
2436                         new_mapping_established = TRUE;
2437                         RETURN(KERN_SUCCESS);
2438                 }
2439         }
2440
2441         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2442         new_entry = NULL;
2443
2444         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2445                 tmp2_end = tmp2_start + step;
2446                 /*
2447                  *      Create a new entry
2448                  *      LP64todo - for now, we can only allocate 4GB internal objects
2449                  *      because the default pager can't page bigger ones.  Remove this
2450                  *      when it can.
2451                  *
2452                  * XXX FBDP
2453                  * The reserved "page zero" in each process's address space can
2454                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2455                  * therefore different VM map entries serves no purpose and just
2456                  * slows down operations on the VM map, so let's not split the
2457                  * allocation into 4GB chunks if the max protection is NONE.  That
2458                  * memory should never be accessible, so it will never get to the
2459                  * default pager.
2460                  */
2461                 tmp_start = tmp2_start;
2462                 if (object == VM_OBJECT_NULL &&
2463                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2464                     max_protection != VM_PROT_NONE &&
2465                     superpage_size == 0)
2466                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2467                 else
2468                         tmp_end = tmp2_end;
2469                 do {
2470                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2471                                                         object, offset, needs_copy,
2472                                                         FALSE, FALSE,
2473                                                         cur_protection, max_protection,
2474                                                         VM_BEHAVIOR_DEFAULT,
2475                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2476                                                         0, no_cache,
2477                                                         permanent,
2478                                                         superpage_size,
2479                                                         clear_map_aligned,
2480                                                         is_submap);
2481
2482                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2483                         VME_ALIAS_SET(new_entry, alias);
2484
2485                         if (entry_for_jit){
2486                                 if (!(map->jit_entry_exists)){
2487                                         new_entry->used_for_jit = TRUE;
2488                                         map->jit_entry_exists = TRUE;
2489                                 }
2490                         }
2491
2492                         if (resilient_codesign &&
2493                             ! ((cur_protection | max_protection) &
2494                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2495                                 new_entry->vme_resilient_codesign = TRUE;
2496                         }
2497
2498                         if (resilient_media &&
2499                             ! ((cur_protection | max_protection) &
2500                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2501                                 new_entry->vme_resilient_media = TRUE;
2502                         }
2503
2504                         assert(!new_entry->iokit_acct);
2505                         if (!is_submap &&
2506                             object != VM_OBJECT_NULL &&
2507                             object->purgable != VM_PURGABLE_DENY) {
2508                                 assert(new_entry->use_pmap);
2509                                 assert(!new_entry->iokit_acct);
2510                                 /*
2511                                  * Turn off pmap accounting since
2512                                  * purgeable objects have their
2513                                  * own ledgers.
2514                                  */
2515                                 new_entry->use_pmap = FALSE;
2516                         } else if (!is_submap &&
2517                                    iokit_acct &&
2518                                    object != VM_OBJECT_NULL &&
2519                                    object->internal) {
2520                                 /* alternate accounting */
2521                                 assert(!new_entry->iokit_acct);
2522                                 assert(new_entry->use_pmap);
2523                                 new_entry->iokit_acct = TRUE;
2524                                 new_entry->use_pmap = FALSE;
2525                                 DTRACE_VM4(
2526                                         vm_map_iokit_mapped_region,
2527                                         vm_map_t, map,
2528                                         vm_map_offset_t, new_entry->vme_start,
2529                                         vm_map_offset_t, new_entry->vme_end,
2530                                         int, VME_ALIAS(new_entry));
2531                                 vm_map_iokit_mapped_region(
2532                                         map,
2533                                         (new_entry->vme_end -
2534                                          new_entry->vme_start));
2535                         } else if (!is_submap) {
2536                                 assert(!new_entry->iokit_acct);
2537                                 assert(new_entry->use_pmap);
2538                         }
2539
2540                         if (is_submap) {
2541                                 vm_map_t        submap;
2542                                 boolean_t       submap_is_64bit;
2543                                 boolean_t       use_pmap;
2544
2545                                 assert(new_entry->is_sub_map);
2546                                 assert(!new_entry->use_pmap);
2547                                 assert(!new_entry->iokit_acct);
2548                                 submap = (vm_map_t) object;
2549                                 submap_is_64bit = vm_map_is_64bit(submap);
2550                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2551 #ifndef NO_NESTED_PMAP
2552                                 if (use_pmap && submap->pmap == NULL) {
2553                                         ledger_t ledger = map->pmap->ledger;
2554                                         /* we need a sub pmap to nest... */
2555                                         submap->pmap = pmap_create(ledger, 0,
2556                                             submap_is_64bit);
2557                                         if (submap->pmap == NULL) {
2558                                                 /* let's proceed without nesting... */
2559                                         }
2560                                 }
2561                                 if (use_pmap && submap->pmap != NULL) {
2562                                         kr = pmap_nest(map->pmap,
2563                                                        submap->pmap,
2564                                                        tmp_start,
2565                                                        tmp_start,
2566                                                        tmp_end - tmp_start);
2567                                         if (kr != KERN_SUCCESS) {
2568                                                 printf("vm_map_enter: "
2569                                                        "pmap_nest(0x%llx,0x%llx) "
2570                                                        "error 0x%x\n",
2571                                                        (long long)tmp_start,
2572                                                        (long long)tmp_end,
2573                                                        kr);
2574                                         } else {
2575                                                 /* we're now nested ! */
2576                                                 new_entry->use_pmap = TRUE;
2577                                                 pmap_empty = FALSE;
2578                                         }
2579                                 }
2580 #endif /* NO_NESTED_PMAP */
2581                         }
2582                         entry = new_entry;
2583
2584                         if (superpage_size) {
2585                                 vm_page_t pages, m;
2586                                 vm_object_t sp_object;
2587
2588                                 VME_OFFSET_SET(entry, 0);
2589
2590                                 /* allocate one superpage */
2591                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2592                                 if (kr != KERN_SUCCESS) {
2593                                         /* deallocate whole range... */
2594                                         new_mapping_established = TRUE;
2595                                         /* ... but only up to "tmp_end" */
2596                                         size -= end - tmp_end;
2597                                         RETURN(kr);
2598                                 }
2599
2600                                 /* create one vm_object per superpage */
2601                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2602                                 sp_object->phys_contiguous = TRUE;
2603                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2604                                 VME_OBJECT_SET(entry, sp_object);
2605                                 assert(entry->use_pmap);
2606
2607                                 /* enter the base pages into the object */
2608                                 vm_object_lock(sp_object);
2609                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2610                                         m = pages;
2611                                         pmap_zero_page(m->phys_page);
2612                                         pages = NEXT_PAGE(m);
2613                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2614                                         vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2615                                 }
2616                                 vm_object_unlock(sp_object);
2617                         }
2618                 } while (tmp_end != tmp2_end &&
2619                          (tmp_start = tmp_end) &&
2620                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2621                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2622         }
2623
2624         new_mapping_established = TRUE;
2625
2626 BailOut:
2627         assert(map_locked == TRUE);
2628
2629         if (result == KERN_SUCCESS) {
2630                 vm_prot_t pager_prot;
2631                 memory_object_t pager;
2632
2633 #if DEBUG
2634                 if (pmap_empty &&
2635                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2636                         assert(vm_map_pmap_is_empty(map,
2637                                                     *address,
2638                                                     *address+size));
2639                 }
2640 #endif /* DEBUG */
2641
2642                 /*
2643                  * For "named" VM objects, let the pager know that the
2644                  * memory object is being mapped.  Some pagers need to keep
2645                  * track of this, to know when they can reclaim the memory
2646                  * object, for example.
2647                  * VM calls memory_object_map() for each mapping (specifying
2648                  * the protection of each mapping) and calls
2649                  * memory_object_last_unmap() when all the mappings are gone.
2650                  */
2651                 pager_prot = max_protection;
2652                 if (needs_copy) {
2653                         /*
2654                          * Copy-On-Write mapping: won't modify
2655                          * the memory object.
2656                          */
2657                         pager_prot &= ~VM_PROT_WRITE;
2658                 }
2659                 if (!is_submap &&
2660                     object != VM_OBJECT_NULL &&
2661                     object->named &&
2662                     object->pager != MEMORY_OBJECT_NULL) {
2663                         vm_object_lock(object);
2664                         pager = object->pager;
2665                         if (object->named &&
2666                             pager != MEMORY_OBJECT_NULL) {
2667                                 assert(object->pager_ready);
2668                                 vm_object_mapping_wait(object, THREAD_UNINT);
2669                                 vm_object_mapping_begin(object);
2670                                 vm_object_unlock(object);
2671
2672                                 kr = memory_object_map(pager, pager_prot);
2673                                 assert(kr == KERN_SUCCESS);
2674
2675                                 vm_object_lock(object);
2676                                 vm_object_mapping_end(object);
2677                         }
2678                         vm_object_unlock(object);
2679                 }
2680         }
2681
2682         assert(map_locked == TRUE);
2683
2684         if (!keep_map_locked) {
2685                 vm_map_unlock(map);
2686                 map_locked = FALSE;
2687         }
2688
2689         /*
2690          * We can't hold the map lock if we enter this block.
2691          */
2692
2693         if (result == KERN_SUCCESS) {
2694
2695                 /*      Wire down the new entry if the user
2696                  *      requested all new map entries be wired.
2697                  */
2698                 if ((map->wiring_required)||(superpage_size)) {
2699                         assert(!keep_map_locked);
2700                         pmap_empty = FALSE; /* pmap won't be empty */
2701                         kr = vm_map_wire(map, start, end,
2702                                              new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2703                                              TRUE);
2704                         result = kr;
2705                 }
2706
2707         }
2708
2709         if (result != KERN_SUCCESS) {
2710                 if (new_mapping_established) {
2711                         /*
2712                          * We have to get rid of the new mappings since we
2713                          * won't make them available to the user.
2714                          * Try and do that atomically, to minimize the risk
2715                          * that someone else create new mappings that range.
2716                          */
2717                         zap_new_map = vm_map_create(PMAP_NULL,
2718                                                     *address,
2719                                                     *address + size,
2720                                                     map->hdr.entries_pageable);
2721                         vm_map_set_page_shift(zap_new_map,
2722                                               VM_MAP_PAGE_SHIFT(map));
2723                         vm_map_disable_hole_optimization(zap_new_map);
2724
2725                         if (!map_locked) {
2726                                 vm_map_lock(map);
2727                                 map_locked = TRUE;
2728                         }
2729                         (void) vm_map_delete(map, *address, *address+size,
2730                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2731                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2732                                              zap_new_map);
2733                 }
2734                 if (zap_old_map != VM_MAP_NULL &&
2735                     zap_old_map->hdr.nentries != 0) {
2736                         vm_map_entry_t  entry1, entry2;
2737
2738                         /*
2739                          * The new mapping failed.  Attempt to restore
2740                          * the old mappings, saved in the "zap_old_map".
2741                          */
2742                         if (!map_locked) {
2743                                 vm_map_lock(map);
2744                                 map_locked = TRUE;
2745                         }
2746
2747                         /* first check if the coast is still clear */
2748                         start = vm_map_first_entry(zap_old_map)->vme_start;
2749                         end = vm_map_last_entry(zap_old_map)->vme_end;
2750                         if (vm_map_lookup_entry(map, start, &entry1) ||
2751                             vm_map_lookup_entry(map, end, &entry2) ||
2752                             entry1 != entry2) {
2753                                 /*
2754                                  * Part of that range has already been
2755                                  * re-mapped:  we can't restore the old
2756                                  * mappings...
2757                                  */
2758                                 vm_map_enter_restore_failures++;
2759                         } else {
2760                                 /*
2761                                  * Transfer the saved map entries from
2762                                  * "zap_old_map" to the original "map",
2763                                  * inserting them all after "entry1".
2764                                  */
2765                                 for (entry2 = vm_map_first_entry(zap_old_map);
2766                                      entry2 != vm_map_to_entry(zap_old_map);
2767                                      entry2 = vm_map_first_entry(zap_old_map)) {
2768                                         vm_map_size_t entry_size;
2769
2770                                         entry_size = (entry2->vme_end -
2771                                                       entry2->vme_start);
2772                                         vm_map_store_entry_unlink(zap_old_map,
2773                                                             entry2);
2774                                         zap_old_map->size -= entry_size;
2775                                         vm_map_store_entry_link(map, entry1, entry2);
2776                                         map->size += entry_size;
2777                                         entry1 = entry2;
2778                                 }
2779                                 if (map->wiring_required) {
2780                                         /*
2781                                          * XXX TODO: we should rewire the
2782                                          * old pages here...
2783                                          */
2784                                 }
2785                                 vm_map_enter_restore_successes++;
2786                         }
2787                 }
2788         }
2789
2790         /*
2791          * The caller is responsible for releasing the lock if it requested to
2792          * keep the map locked.
2793          */
2794         if (map_locked && !keep_map_locked) {
2795                 vm_map_unlock(map);
2796         }
2797
2798         /*
2799          * Get rid of the "zap_maps" and all the map entries that
2800          * they may still contain.
2801          */
2802         if (zap_old_map != VM_MAP_NULL) {
2803                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2804                 zap_old_map = VM_MAP_NULL;
2805         }
2806         if (zap_new_map != VM_MAP_NULL) {
2807                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2808                 zap_new_map = VM_MAP_NULL;
2809         }
2810
2811         return result;
2812
2813 #undef  RETURN
2814 }
2815
2816
2817 /*
2818  * Counters for the prefault optimization.
2819  */
2820 int64_t vm_prefault_nb_pages = 0;
2821 int64_t vm_prefault_nb_bailout = 0;
2822
2823 static kern_return_t
2824 vm_map_enter_mem_object_helper(
2825         vm_map_t                target_map,
2826         vm_map_offset_t         *address,
2827         vm_map_size_t           initial_size,
2828         vm_map_offset_t         mask,
2829         int                     flags,
2830         ipc_port_t              port,
2831         vm_object_offset_t      offset,
2832         boolean_t               copy,
2833         vm_prot_t               cur_protection,
2834         vm_prot_t               max_protection,
2835         vm_inherit_t            inheritance,
2836         upl_page_list_ptr_t     page_list,
2837         unsigned int            page_list_count)
2838 {
2839         vm_map_address_t        map_addr;
2840         vm_map_size_t           map_size;
2841         vm_object_t             object;
2842         vm_object_size_t        size;
2843         kern_return_t           result;
2844         boolean_t               mask_cur_protection, mask_max_protection;
2845         boolean_t               try_prefault = (page_list_count != 0);
2846         vm_map_offset_t         offset_in_mapping = 0;
2847
2848         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2849         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2850         cur_protection &= ~VM_PROT_IS_MASK;
2851         max_protection &= ~VM_PROT_IS_MASK;
2852
2853         /*
2854          * Check arguments for validity
2855          */
2856         if ((target_map == VM_MAP_NULL) ||
2857             (cur_protection & ~VM_PROT_ALL) ||
2858             (max_protection & ~VM_PROT_ALL) ||
2859             (inheritance > VM_INHERIT_LAST_VALID) ||
2860             (try_prefault && (copy || !page_list)) ||
2861             initial_size == 0) {
2862                 return KERN_INVALID_ARGUMENT;
2863         }
2864
2865         {
2866                 map_addr = vm_map_trunc_page(*address,
2867                                              VM_MAP_PAGE_MASK(target_map));
2868                 map_size = vm_map_round_page(initial_size,
2869                                              VM_MAP_PAGE_MASK(target_map));
2870         }
2871         size = vm_object_round_page(initial_size);
2872
2873         /*
2874          * Find the vm object (if any) corresponding to this port.
2875          */
2876         if (!IP_VALID(port)) {
2877                 object = VM_OBJECT_NULL;
2878                 offset = 0;
2879                 copy = FALSE;
2880         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2881                 vm_named_entry_t        named_entry;
2882
2883                 named_entry = (vm_named_entry_t) port->ip_kobject;
2884
2885                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2886                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2887                         offset += named_entry->data_offset;
2888                 }
2889
2890                 /* a few checks to make sure user is obeying rules */
2891                 if (size == 0) {
2892                         if (offset >= named_entry->size)
2893                                 return KERN_INVALID_RIGHT;
2894                         size = named_entry->size - offset;
2895                 }
2896                 if (mask_max_protection) {
2897                         max_protection &= named_entry->protection;
2898                 }
2899                 if (mask_cur_protection) {
2900                         cur_protection &= named_entry->protection;
2901                 }
2902                 if ((named_entry->protection & max_protection) !=
2903                     max_protection)
2904                         return KERN_INVALID_RIGHT;
2905                 if ((named_entry->protection & cur_protection) !=
2906                     cur_protection)
2907                         return KERN_INVALID_RIGHT;
2908                 if (offset + size < offset) {
2909                         /* overflow */
2910                         return KERN_INVALID_ARGUMENT;
2911                 }
2912                 if (named_entry->size < (offset + initial_size)) {
2913                         return KERN_INVALID_ARGUMENT;
2914                 }
2915
2916                 if (named_entry->is_copy) {
2917                         /* for a vm_map_copy, we can only map it whole */
2918                         if ((size != named_entry->size) &&
2919                             (vm_map_round_page(size,
2920                                                VM_MAP_PAGE_MASK(target_map)) ==
2921                              named_entry->size)) {
2922                                 /* XXX FBDP use the rounded size... */
2923                                 size = vm_map_round_page(
2924                                         size,
2925                                         VM_MAP_PAGE_MASK(target_map));
2926                         }
2927
2928                         if (!(flags & VM_FLAGS_ANYWHERE) &&
2929                             (offset != 0 ||
2930                              size != named_entry->size)) {
2931                                 /*
2932                                  * XXX for a mapping at a "fixed" address,
2933                                  * we can't trim after mapping the whole
2934                                  * memory entry, so reject a request for a
2935                                  * partial mapping.
2936                                  */
2937                                 return KERN_INVALID_ARGUMENT;
2938                         }
2939                 }
2940
2941                 /* the callers parameter offset is defined to be the */
2942                 /* offset from beginning of named entry offset in object */
2943                 offset = offset + named_entry->offset;
2944
2945                 if (! VM_MAP_PAGE_ALIGNED(size,
2946                                           VM_MAP_PAGE_MASK(target_map))) {
2947                         /*
2948                          * Let's not map more than requested;
2949                          * vm_map_enter() will handle this "not map-aligned"
2950                          * case.
2951                          */
2952                         map_size = size;
2953                 }
2954
2955                 named_entry_lock(named_entry);
2956                 if (named_entry->is_sub_map) {
2957                         vm_map_t                submap;
2958
2959                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2960                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2961                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2962                         }
2963
2964                         submap = named_entry->backing.map;
2965                         vm_map_lock(submap);
2966                         vm_map_reference(submap);
2967                         vm_map_unlock(submap);
2968                         named_entry_unlock(named_entry);
2969
2970                         result = vm_map_enter(target_map,
2971                                               &map_addr,
2972                                               map_size,
2973                                               mask,
2974                                               flags | VM_FLAGS_SUBMAP,
2975                                               (vm_object_t) submap,
2976                                               offset,
2977                                               copy,
2978                                               cur_protection,
2979                                               max_protection,
2980                                               inheritance);
2981                         if (result != KERN_SUCCESS) {
2982                                 vm_map_deallocate(submap);
2983                         } else {
2984                                 /*
2985                                  * No need to lock "submap" just to check its
2986                                  * "mapped" flag: that flag is never reset
2987                                  * once it's been set and if we race, we'll
2988                                  * just end up setting it twice, which is OK.
2989                                  */
2990                                 if (submap->mapped_in_other_pmaps == FALSE &&
2991                                     vm_map_pmap(submap) != PMAP_NULL &&
2992                                     vm_map_pmap(submap) !=
2993                                     vm_map_pmap(target_map)) {
2994                                         /*
2995                                          * This submap is being mapped in a map
2996                                          * that uses a different pmap.
2997                                          * Set its "mapped_in_other_pmaps" flag
2998                                          * to indicate that we now need to
2999                                          * remove mappings from all pmaps rather
3000                                          * than just the submap's pmap.
3001                                          */
3002                                         vm_map_lock(submap);
3003                                         submap->mapped_in_other_pmaps = TRUE;
3004                                         vm_map_unlock(submap);
3005                                 }
3006                                 *address = map_addr;
3007                         }
3008                         return result;
3009
3010                 } else if (named_entry->is_pager) {
3011                         unsigned int    access;
3012                         vm_prot_t       protections;
3013                         unsigned int    wimg_mode;
3014
3015                         protections = named_entry->protection & VM_PROT_ALL;
3016                         access = GET_MAP_MEM(named_entry->protection);
3017
3018                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3019                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3020                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3021                         }
3022
3023                         object = vm_object_enter(named_entry->backing.pager,
3024                                                  named_entry->size,
3025                                                  named_entry->internal,
3026                                                  FALSE,
3027                                                  FALSE);
3028                         if (object == VM_OBJECT_NULL) {
3029                                 named_entry_unlock(named_entry);
3030                                 return KERN_INVALID_OBJECT;
3031                         }
3032
3033                         /* JMM - drop reference on pager here */
3034
3035                         /* create an extra ref for the named entry */
3036                         vm_object_lock(object);
3037                         vm_object_reference_locked(object);
3038                         named_entry->backing.object = object;
3039                         named_entry->is_pager = FALSE;
3040                         named_entry_unlock(named_entry);
3041
3042                         wimg_mode = object->wimg_bits;
3043
3044                         if (access == MAP_MEM_IO) {
3045                                 wimg_mode = VM_WIMG_IO;
3046                         } else if (access == MAP_MEM_COPYBACK) {
3047                                 wimg_mode = VM_WIMG_USE_DEFAULT;
3048                         } else if (access == MAP_MEM_INNERWBACK) {
3049                                 wimg_mode = VM_WIMG_INNERWBACK;
3050                         } else if (access == MAP_MEM_WTHRU) {
3051                                 wimg_mode = VM_WIMG_WTHRU;
3052                         } else if (access == MAP_MEM_WCOMB) {
3053                                 wimg_mode = VM_WIMG_WCOMB;
3054                         }
3055
3056                         /* wait for object (if any) to be ready */
3057                         if (!named_entry->internal) {
3058                                 while (!object->pager_ready) {
3059                                         vm_object_wait(
3060                                                 object,
3061                                                 VM_OBJECT_EVENT_PAGER_READY,
3062                                                 THREAD_UNINT);
3063                                         vm_object_lock(object);
3064                                 }
3065                         }
3066
3067                         if (object->wimg_bits != wimg_mode)
3068                                 vm_object_change_wimg_mode(object, wimg_mode);
3069
3070 #if VM_OBJECT_TRACKING_OP_TRUESHARE
3071                         if (!object->true_share &&
3072                             vm_object_tracking_inited) {
3073                                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3074                                 int num = 0;
3075
3076                                 num = OSBacktrace(bt,
3077                                                   VM_OBJECT_TRACKING_BTDEPTH);
3078                                 btlog_add_entry(vm_object_tracking_btlog,
3079                                                 object,
3080                                                 VM_OBJECT_TRACKING_OP_TRUESHARE,
3081                                                 bt,
3082                                                 num);
3083                         }
3084 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3085
3086                         object->true_share = TRUE;
3087
3088                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3089                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3090                         vm_object_unlock(object);
3091
3092                 } else if (named_entry->is_copy) {
3093                         kern_return_t   kr;
3094                         vm_map_copy_t   copy_map;
3095                         vm_map_entry_t  copy_entry;
3096                         vm_map_offset_t copy_addr;
3097
3098                         if (flags & ~(VM_FLAGS_FIXED |
3099                                       VM_FLAGS_ANYWHERE |
3100                                       VM_FLAGS_OVERWRITE |
3101                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
3102                                       VM_FLAGS_RETURN_DATA_ADDR)) {
3103                                 named_entry_unlock(named_entry);
3104                                 return KERN_INVALID_ARGUMENT;
3105                         }
3106
3107                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3108                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3109                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3110                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3111                                         offset_in_mapping &= ~((signed)(0xFFF));
3112                                 offset = vm_object_trunc_page(offset);
3113                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3114                         }
3115
3116                         copy_map = named_entry->backing.copy;
3117                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3118                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3119                                 /* unsupported type; should not happen */
3120                                 printf("vm_map_enter_mem_object: "
3121                                        "memory_entry->backing.copy "
3122                                        "unsupported type 0x%x\n",
3123                                        copy_map->type);
3124                                 named_entry_unlock(named_entry);
3125                                 return KERN_INVALID_ARGUMENT;
3126                         }
3127
3128                         /* reserve a contiguous range */
3129                         kr = vm_map_enter(target_map,
3130                                           &map_addr,
3131                                           /* map whole mem entry, trim later: */
3132                                           named_entry->size,
3133                                           mask,
3134                                           flags & (VM_FLAGS_ANYWHERE |
3135                                                    VM_FLAGS_OVERWRITE |
3136                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
3137                                                    VM_FLAGS_RETURN_DATA_ADDR),
3138                                           VM_OBJECT_NULL,
3139                                           0,
3140                                           FALSE, /* copy */
3141                                           cur_protection,
3142                                           max_protection,
3143                                           inheritance);
3144                         if (kr != KERN_SUCCESS) {
3145                                 named_entry_unlock(named_entry);
3146                                 return kr;
3147                         }
3148
3149                         copy_addr = map_addr;
3150
3151                         for (copy_entry = vm_map_copy_first_entry(copy_map);
3152                              copy_entry != vm_map_copy_to_entry(copy_map);
3153                              copy_entry = copy_entry->vme_next) {
3154                                 int                     remap_flags = 0;
3155                                 vm_map_t                copy_submap;
3156                                 vm_object_t             copy_object;
3157                                 vm_map_size_t           copy_size;
3158                                 vm_object_offset_t      copy_offset;
3159
3160                                 copy_offset = VME_OFFSET(copy_entry);
3161                                 copy_size = (copy_entry->vme_end -
3162                                              copy_entry->vme_start);
3163
3164                                 /* sanity check */
3165                                 if ((copy_addr + copy_size) >
3166                                     (map_addr +
3167                                      named_entry->size /* XXX full size */ )) {
3168                                         /* over-mapping too much !? */
3169                                         kr = KERN_INVALID_ARGUMENT;
3170                                         /* abort */
3171                                         break;
3172                                 }
3173
3174                                 /* take a reference on the object */
3175                                 if (copy_entry->is_sub_map) {
3176                                         remap_flags |= VM_FLAGS_SUBMAP;
3177                                         copy_submap = VME_SUBMAP(copy_entry);
3178                                         vm_map_lock(copy_submap);
3179                                         vm_map_reference(copy_submap);
3180                                         vm_map_unlock(copy_submap);
3181                                         copy_object = (vm_object_t) copy_submap;
3182                                 } else {
3183                                         copy_object = VME_OBJECT(copy_entry);
3184                                         vm_object_reference(copy_object);
3185                                 }
3186
3187                                 /* over-map the object into destination */
3188                                 remap_flags |= flags;
3189                                 remap_flags |= VM_FLAGS_FIXED;
3190                                 remap_flags |= VM_FLAGS_OVERWRITE;
3191                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
3192                                 kr = vm_map_enter(target_map,
3193                                                   &copy_addr,
3194                                                   copy_size,
3195                                                   (vm_map_offset_t) 0,
3196                                                   remap_flags,
3197                                                   copy_object,
3198                                                   copy_offset,
3199                                                   copy,
3200                                                   cur_protection,
3201                                                   max_protection,
3202                                                   inheritance);
3203                                 if (kr != KERN_SUCCESS) {
3204                                         if (copy_entry->is_sub_map) {
3205                                                 vm_map_deallocate(copy_submap);
3206                                         } else {
3207                                                 vm_object_deallocate(copy_object);
3208                                         }
3209                                         /* abort */
3210                                         break;
3211                                 }
3212
3213                                 /* next mapping */
3214                                 copy_addr += copy_size;
3215                         }
3216
3217                         if (kr == KERN_SUCCESS) {
3218                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3219                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3220                                         *address = map_addr + offset_in_mapping;
3221                                 } else {
3222                                         *address = map_addr;
3223                                 }
3224
3225                                 if (offset) {
3226                                         /*
3227                                          * Trim in front, from 0 to "offset".
3228                                          */
3229                                         vm_map_remove(target_map,
3230                                                       map_addr,
3231                                                       map_addr + offset,
3232                                                       0);
3233                                         *address += offset;
3234                                 }
3235                                 if (offset + map_size < named_entry->size) {
3236                                         /*
3237                                          * Trim in back, from
3238                                          * "offset + map_size" to
3239                                          * "named_entry->size".
3240                                          */
3241                                         vm_map_remove(target_map,
3242                                                       (map_addr +
3243                                                        offset + map_size),
3244                                                       (map_addr +
3245                                                        named_entry->size),
3246                                                       0);
3247                                 }
3248                         }
3249                         named_entry_unlock(named_entry);
3250
3251                         if (kr != KERN_SUCCESS) {
3252                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
3253                                         /* deallocate the contiguous range */
3254                                         (void) vm_deallocate(target_map,
3255                                                              map_addr,
3256                                                              map_size);
3257                                 }
3258                         }
3259
3260                         return kr;
3261
3262                 } else {
3263                         /* This is the case where we are going to map */
3264                         /* an already mapped object.  If the object is */
3265                         /* not ready it is internal.  An external     */
3266                         /* object cannot be mapped until it is ready  */
3267                         /* we can therefore avoid the ready check     */
3268                         /* in this case.  */
3269                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3270                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3271                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3272                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3273                                         offset_in_mapping &= ~((signed)(0xFFF));
3274                                 offset = vm_object_trunc_page(offset);
3275                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3276                         }
3277
3278                         object = named_entry->backing.object;
3279                         assert(object != VM_OBJECT_NULL);
3280                         named_entry_unlock(named_entry);
3281                         vm_object_reference(object);
3282                 }
3283         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3284                 /*
3285                  * JMM - This is temporary until we unify named entries
3286                  * and raw memory objects.
3287                  *
3288                  * Detected fake ip_kotype for a memory object.  In
3289                  * this case, the port isn't really a port at all, but
3290                  * instead is just a raw memory object.
3291                  */
3292                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3293                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3294                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3295                 }
3296
3297                 object = vm_object_enter((memory_object_t)port,
3298                                          size, FALSE, FALSE, FALSE);
3299                 if (object == VM_OBJECT_NULL)
3300                         return KERN_INVALID_OBJECT;
3301
3302                 /* wait for object (if any) to be ready */
3303                 if (object != VM_OBJECT_NULL) {
3304                         if (object == kernel_object) {
3305                                 printf("Warning: Attempt to map kernel object"
3306                                         " by a non-private kernel entity\n");
3307                                 return KERN_INVALID_OBJECT;
3308                         }
3309                         if (!object->pager_ready) {
3310                                 vm_object_lock(object);
3311
3312                                 while (!object->pager_ready) {
3313                                         vm_object_wait(object,
3314                                                        VM_OBJECT_EVENT_PAGER_READY,
3315                                                        THREAD_UNINT);
3316                                         vm_object_lock(object);
3317                                 }
3318                                 vm_object_unlock(object);
3319                         }
3320                 }
3321         } else {
3322                 return KERN_INVALID_OBJECT;
3323         }
3324
3325         if (object != VM_OBJECT_NULL &&
3326             object->named &&
3327             object->pager != MEMORY_OBJECT_NULL &&
3328             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3329                 memory_object_t pager;
3330                 vm_prot_t       pager_prot;
3331                 kern_return_t   kr;
3332
3333                 /*
3334                  * For "named" VM objects, let the pager know that the
3335                  * memory object is being mapped.  Some pagers need to keep
3336                  * track of this, to know when they can reclaim the memory
3337                  * object, for example.
3338                  * VM calls memory_object_map() for each mapping (specifying
3339                  * the protection of each mapping) and calls
3340                  * memory_object_last_unmap() when all the mappings are gone.
3341                  */
3342                 pager_prot = max_protection;
3343                 if (copy) {
3344                         /*
3345                          * Copy-On-Write mapping: won't modify the
3346                          * memory object.
3347                          */
3348                         pager_prot &= ~VM_PROT_WRITE;
3349                 }
3350                 vm_object_lock(object);
3351                 pager = object->pager;
3352                 if (object->named &&
3353                     pager != MEMORY_OBJECT_NULL &&
3354                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3355                         assert(object->pager_ready);
3356                         vm_object_mapping_wait(object, THREAD_UNINT);
3357                         vm_object_mapping_begin(object);
3358                         vm_object_unlock(object);
3359
3360                         kr = memory_object_map(pager, pager_prot);
3361                         assert(kr == KERN_SUCCESS);
3362
3363                         vm_object_lock(object);
3364                         vm_object_mapping_end(object);
3365                 }
3366                 vm_object_unlock(object);
3367         }
3368
3369         /*
3370          *      Perform the copy if requested
3371          */
3372
3373         if (copy) {
3374                 vm_object_t             new_object;
3375                 vm_object_offset_t      new_offset;
3376
3377                 result = vm_object_copy_strategically(object, offset,
3378                                                       map_size,
3379                                                       &new_object, &new_offset,
3380                                                       &copy);
3381
3382
3383                 if (result == KERN_MEMORY_RESTART_COPY) {
3384                         boolean_t success;
3385                         boolean_t src_needs_copy;
3386
3387                         /*
3388                          * XXX
3389                          * We currently ignore src_needs_copy.
3390                          * This really is the issue of how to make
3391                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3392                          * non-kernel users to use. Solution forthcoming.
3393                          * In the meantime, since we don't allow non-kernel
3394                          * memory managers to specify symmetric copy,
3395                          * we won't run into problems here.
3396                          */
3397                         new_object = object;
3398                         new_offset = offset;
3399                         success = vm_object_copy_quickly(&new_object,
3400                                                          new_offset,
3401                                                          map_size,
3402                                                          &src_needs_copy,
3403                                                          &copy);
3404                         assert(success);
3405                         result = KERN_SUCCESS;
3406                 }
3407                 /*
3408                  *      Throw away the reference to the
3409                  *      original object, as it won't be mapped.
3410                  */
3411
3412                 vm_object_deallocate(object);
3413
3414                 if (result != KERN_SUCCESS) {
3415                         return result;
3416                 }
3417
3418                 object = new_object;
3419                 offset = new_offset;
3420         }
3421
3422         /*
3423          * If users want to try to prefault pages, the mapping and prefault
3424          * needs to be atomic.
3425          */
3426         if (try_prefault)
3427                 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3428
3429         {
3430                 result = vm_map_enter(target_map,
3431                                       &map_addr, map_size,
3432                                       (vm_map_offset_t)mask,
3433                                       flags,
3434                                       object, offset,
3435                                       copy,
3436                                       cur_protection, max_protection,
3437                                       inheritance);
3438         }
3439         if (result != KERN_SUCCESS)
3440                 vm_object_deallocate(object);
3441
3442         /*
3443          * Try to prefault, and do not forget to release the vm map lock.
3444          */
3445         if (result == KERN_SUCCESS && try_prefault) {
3446                 mach_vm_address_t va = map_addr;
3447                 kern_return_t kr = KERN_SUCCESS;
3448                 unsigned int i = 0;
3449
3450                 for (i = 0; i < page_list_count; ++i) {
3451                         if (UPL_VALID_PAGE(page_list, i)) {
3452                                 /*
3453                                  * If this function call failed, we should stop
3454                                  * trying to optimize, other calls are likely
3455                                  * going to fail too.
3456                                  *
3457                                  * We are not gonna report an error for such
3458                                  * failure though. That's an optimization, not
3459                                  * something critical.
3460                                  */
3461                                 kr = pmap_enter_options(target_map->pmap,
3462                                                         va, UPL_PHYS_PAGE(page_list, i),
3463                                                         cur_protection, VM_PROT_NONE,
3464                                                         0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3465                                 if (kr != KERN_SUCCESS) {
3466                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
3467                                         break;
3468                                 }
3469                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
3470                         }
3471
3472                         /* Next virtual address */
3473                         va += PAGE_SIZE;
3474                 }
3475                 vm_map_unlock(target_map);
3476         }
3477
3478         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3479                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3480                 *address = map_addr + offset_in_mapping;
3481         } else {
3482                 *address = map_addr;
3483         }
3484         return result;
3485 }
3486
3487 kern_return_t
3488 vm_map_enter_mem_object(
3489         vm_map_t                target_map,
3490         vm_map_offset_t         *address,
3491         vm_map_size_t           initial_size,
3492         vm_map_offset_t         mask,
3493         int                     flags,
3494         ipc_port_t              port,
3495         vm_object_offset_t      offset,
3496         boolean_t               copy,
3497         vm_prot_t               cur_protection,
3498         vm_prot_t               max_protection,
3499         vm_inherit_t            inheritance)
3500 {
3501         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3502                                               port, offset, copy, cur_protection, max_protection,
3503                                               inheritance, NULL, 0);
3504 }
3505
3506 kern_return_t
3507 vm_map_enter_mem_object_prefault(
3508         vm_map_t                target_map,
3509         vm_map_offset_t         *address,
3510         vm_map_size_t           initial_size,
3511         vm_map_offset_t         mask,
3512         int                     flags,
3513         ipc_port_t              port,
3514         vm_object_offset_t      offset,
3515         vm_prot_t               cur_protection,
3516         vm_prot_t               max_protection,
3517         upl_page_list_ptr_t     page_list,
3518         unsigned int            page_list_count)
3519 {
3520         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3521                                               port, offset, FALSE, cur_protection, max_protection,
3522                                               VM_INHERIT_DEFAULT, page_list, page_list_count);
3523 }
3524
3525
3526 kern_return_t
3527 vm_map_enter_mem_object_control(
3528         vm_map_t                target_map,
3529         vm_map_offset_t         *address,
3530         vm_map_size_t           initial_size,
3531         vm_map_offset_t         mask,
3532         int                     flags,
3533         memory_object_control_t control,
3534         vm_object_offset_t      offset,
3535         boolean_t               copy,
3536         vm_prot_t               cur_protection,
3537         vm_prot_t               max_protection,
3538         vm_inherit_t            inheritance)
3539 {
3540         vm_map_address_t        map_addr;
3541         vm_map_size_t           map_size;
3542         vm_object_t             object;
3543         vm_object_size_t        size;
3544         kern_return_t           result;
3545         memory_object_t         pager;
3546         vm_prot_t               pager_prot;
3547         kern_return_t           kr;
3548
3549         /*
3550          * Check arguments for validity
3551          */
3552         if ((target_map == VM_MAP_NULL) ||
3553             (cur_protection & ~VM_PROT_ALL) ||
3554             (max_protection & ~VM_PROT_ALL) ||
3555             (inheritance > VM_INHERIT_LAST_VALID) ||
3556             initial_size == 0) {
3557                 return KERN_INVALID_ARGUMENT;
3558         }
3559
3560         {
3561                 map_addr = vm_map_trunc_page(*address,
3562                                              VM_MAP_PAGE_MASK(target_map));
3563                 map_size = vm_map_round_page(initial_size,
3564                                              VM_MAP_PAGE_MASK(target_map));
3565         }
3566         size = vm_object_round_page(initial_size);
3567
3568         object = memory_object_control_to_vm_object(control);
3569
3570         if (object == VM_OBJECT_NULL)
3571                 return KERN_INVALID_OBJECT;
3572
3573         if (object == kernel_object) {
3574                 printf("Warning: Attempt to map kernel object"
3575                        " by a non-private kernel entity\n");
3576                 return KERN_INVALID_OBJECT;
3577         }
3578
3579         vm_object_lock(object);
3580         object->ref_count++;
3581         vm_object_res_reference(object);
3582
3583         /*
3584          * For "named" VM objects, let the pager know that the
3585          * memory object is being mapped.  Some pagers need to keep
3586          * track of this, to know when they can reclaim the memory
3587          * object, for example.
3588          * VM calls memory_object_map() for each mapping (specifying
3589          * the protection of each mapping) and calls
3590          * memory_object_last_unmap() when all the mappings are gone.
3591          */
3592         pager_prot = max_protection;
3593         if (copy) {
3594                 pager_prot &= ~VM_PROT_WRITE;
3595         }
3596         pager = object->pager;
3597         if (object->named &&
3598             pager != MEMORY_OBJECT_NULL &&
3599             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3600                 assert(object->pager_ready);
3601                 vm_object_mapping_wait(object, THREAD_UNINT);
3602                 vm_object_mapping_begin(object);
3603                 vm_object_unlock(object);
3604
3605                 kr = memory_object_map(pager, pager_prot);
3606                 assert(kr == KERN_SUCCESS);
3607
3608                 vm_object_lock(object);
3609                 vm_object_mapping_end(object);
3610         }
3611         vm_object_unlock(object);
3612
3613         /*
3614          *      Perform the copy if requested
3615          */
3616
3617         if (copy) {
3618                 vm_object_t             new_object;
3619                 vm_object_offset_t      new_offset;
3620
3621                 result = vm_object_copy_strategically(object, offset, size,
3622                                                       &new_object, &new_offset,
3623                                                       &copy);
3624
3625
3626                 if (result == KERN_MEMORY_RESTART_COPY) {
3627                         boolean_t success;
3628                         boolean_t src_needs_copy;
3629
3630                         /*
3631                          * XXX
3632                          * We currently ignore src_needs_copy.
3633                          * This really is the issue of how to make
3634                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3635                          * non-kernel users to use. Solution forthcoming.
3636                          * In the meantime, since we don't allow non-kernel
3637                          * memory managers to specify symmetric copy,
3638                          * we won't run into problems here.
3639                          */
3640                         new_object = object;
3641                         new_offset = offset;
3642                         success = vm_object_copy_quickly(&new_object,
3643                                                          new_offset, size,
3644                                                          &src_needs_copy,
3645                                                          &copy);
3646                         assert(success);
3647                         result = KERN_SUCCESS;
3648                 }
3649                 /*
3650                  *      Throw away the reference to the
3651                  *      original object, as it won't be mapped.
3652                  */
3653
3654                 vm_object_deallocate(object);
3655
3656                 if (result != KERN_SUCCESS) {
3657                         return result;
3658                 }
3659
3660                 object = new_object;
3661                 offset = new_offset;
3662         }
3663
3664         {
3665                 result = vm_map_enter(target_map,
3666                                       &map_addr, map_size,
3667                                       (vm_map_offset_t)mask,
3668                                       flags,
3669                                       object, offset,
3670                                       copy,
3671                                       cur_protection, max_protection,
3672                                       inheritance);
3673         }
3674         if (result != KERN_SUCCESS)
3675                 vm_object_deallocate(object);
3676         *address = map_addr;
3677
3678         return result;
3679 }
3680
3681
3682 #if     VM_CPM
3683
3684 #ifdef MACH_ASSERT
3685 extern pmap_paddr_t     avail_start, avail_end;
3686 #endif
3687
3688 /*
3689  *      Allocate memory in the specified map, with the caveat that
3690  *      the memory is physically contiguous.  This call may fail
3691  *      if the system can't find sufficient contiguous memory.
3692  *      This call may cause or lead to heart-stopping amounts of
3693  *      paging activity.
3694  *
3695  *      Memory obtained from this call should be freed in the
3696  *      normal way, viz., via vm_deallocate.
3697  */
3698 kern_return_t
3699 vm_map_enter_cpm(
3700         vm_map_t                map,
3701         vm_map_offset_t *addr,
3702         vm_map_size_t           size,
3703         int                     flags)
3704 {
3705         vm_object_t             cpm_obj;
3706         pmap_t                  pmap;
3707         vm_page_t               m, pages;
3708         kern_return_t           kr;
3709         vm_map_offset_t         va, start, end, offset;
3710 #if     MACH_ASSERT
3711         vm_map_offset_t         prev_addr = 0;
3712 #endif  /* MACH_ASSERT */
3713
3714         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3715         vm_tag_t tag;
3716
3717         VM_GET_FLAGS_ALIAS(flags, tag);
3718
3719         if (size == 0) {
3720                 *addr = 0;
3721                 return KERN_SUCCESS;
3722         }
3723         if (anywhere)
3724                 *addr = vm_map_min(map);
3725         else
3726                 *addr = vm_map_trunc_page(*addr,
3727                                           VM_MAP_PAGE_MASK(map));
3728         size = vm_map_round_page(size,
3729                                  VM_MAP_PAGE_MASK(map));
3730
3731         /*
3732          * LP64todo - cpm_allocate should probably allow
3733          * allocations of >4GB, but not with the current
3734          * algorithm, so just cast down the size for now.
3735          */
3736         if (size > VM_MAX_ADDRESS)
3737                 return KERN_RESOURCE_SHORTAGE;
3738         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3739                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3740                 return kr;
3741
3742         cpm_obj = vm_object_allocate((vm_object_size_t)size);
3743         assert(cpm_obj != VM_OBJECT_NULL);
3744         assert(cpm_obj->internal);
3745         assert(cpm_obj->vo_size == (vm_object_size_t)size);
3746         assert(cpm_obj->can_persist == FALSE);
3747         assert(cpm_obj->pager_created == FALSE);
3748         assert(cpm_obj->pageout == FALSE);
3749         assert(cpm_obj->shadow == VM_OBJECT_NULL);
3750
3751         /*
3752          *      Insert pages into object.
3753          */
3754
3755         vm_object_lock(cpm_obj);
3756         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3757                 m = pages;
3758                 pages = NEXT_PAGE(m);
3759                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3760
3761                 assert(!m->gobbled);
3762                 assert(!m->wanted);
3763                 assert(!m->pageout);
3764                 assert(!m->tabled);
3765                 assert(VM_PAGE_WIRED(m));
3766                 /*
3767                  * ENCRYPTED SWAP:
3768                  * "m" is not supposed to be pageable, so it
3769                  * should not be encrypted.  It wouldn't be safe
3770                  * to enter it in a new VM object while encrypted.
3771                  */
3772                 ASSERT_PAGE_DECRYPTED(m);
3773                 assert(m->busy);
3774                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3775
3776                 m->busy = FALSE;
3777                 vm_page_insert(m, cpm_obj, offset);
3778         }
3779         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3780         vm_object_unlock(cpm_obj);
3781
3782         /*
3783          *      Hang onto a reference on the object in case a
3784          *      multi-threaded application for some reason decides
3785          *      to deallocate the portion of the address space into
3786          *      which we will insert this object.
3787          *
3788          *      Unfortunately, we must insert the object now before
3789          *      we can talk to the pmap module about which addresses
3790          *      must be wired down.  Hence, the race with a multi-
3791          *      threaded app.
3792          */
3793         vm_object_reference(cpm_obj);
3794
3795         /*
3796          *      Insert object into map.
3797          */
3798
3799         kr = vm_map_enter(
3800                 map,
3801                 addr,
3802                 size,
3803                 (vm_map_offset_t)0,
3804                 flags,
3805                 cpm_obj,
3806                 (vm_object_offset_t)0,
3807                 FALSE,
3808                 VM_PROT_ALL,
3809                 VM_PROT_ALL,
3810                 VM_INHERIT_DEFAULT);
3811
3812         if (kr != KERN_SUCCESS) {
3813                 /*
3814                  *      A CPM object doesn't have can_persist set,
3815                  *      so all we have to do is deallocate it to
3816                  *      free up these pages.
3817                  */
3818                 assert(cpm_obj->pager_created == FALSE);
3819                 assert(cpm_obj->can_persist == FALSE);
3820                 assert(cpm_obj->pageout == FALSE);
3821                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3822                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3823                 vm_object_deallocate(cpm_obj); /* kill creation ref */
3824         }
3825
3826         /*
3827          *      Inform the physical mapping system that the
3828          *      range of addresses may not fault, so that
3829          *      page tables and such can be locked down as well.
3830          */
3831         start = *addr;
3832         end = start + size;
3833         pmap = vm_map_pmap(map);
3834         pmap_pageable(pmap, start, end, FALSE);
3835
3836         /*
3837          *      Enter each page into the pmap, to avoid faults.
3838          *      Note that this loop could be coded more efficiently,
3839          *      if the need arose, rather than looking up each page
3840          *      again.
3841          */
3842         for (offset = 0, va = start; offset < size;
3843              va += PAGE_SIZE, offset += PAGE_SIZE) {
3844                 int type_of_fault;
3845
3846                 vm_object_lock(cpm_obj);
3847                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3848                 assert(m != VM_PAGE_NULL);
3849
3850                 vm_page_zero_fill(m);
3851
3852                 type_of_fault = DBG_ZERO_FILL_FAULT;
3853
3854                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3855                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3856                                &type_of_fault);
3857
3858                 vm_object_unlock(cpm_obj);
3859         }
3860
3861 #if     MACH_ASSERT
3862         /*
3863          *      Verify ordering in address space.
3864          */
3865         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3866                 vm_object_lock(cpm_obj);
3867                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3868                 vm_object_unlock(cpm_obj);
3869                 if (m == VM_PAGE_NULL)
3870                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3871                               cpm_obj, (uint64_t)offset);
3872                 assert(m->tabled);
3873                 assert(!m->busy);
3874                 assert(!m->wanted);
3875                 assert(!m->fictitious);
3876                 assert(!m->private);
3877                 assert(!m->absent);
3878                 assert(!m->error);
3879                 assert(!m->cleaning);
3880                 assert(!m->laundry);
3881                 assert(!m->precious);
3882                 assert(!m->clustered);
3883                 if (offset != 0) {
3884                         if (m->phys_page != prev_addr + 1) {
3885                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3886                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
3887                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3888                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3889                                 panic("vm_allocate_cpm:  pages not contig!");
3890                         }
3891                 }
3892                 prev_addr = m->phys_page;
3893         }
3894 #endif  /* MACH_ASSERT */
3895
3896         vm_object_deallocate(cpm_obj); /* kill extra ref */
3897
3898         return kr;
3899 }
3900
3901
3902 #else   /* VM_CPM */
3903
3904 /*
3905  *      Interface is defined in all cases, but unless the kernel
3906  *      is built explicitly for this option, the interface does
3907  *      nothing.
3908  */
3909
3910 kern_return_t
3911 vm_map_enter_cpm(
3912         __unused vm_map_t       map,
3913         __unused vm_map_offset_t        *addr,
3914         __unused vm_map_size_t  size,
3915         __unused int            flags)
3916 {
3917         return KERN_FAILURE;
3918 }
3919 #endif /* VM_CPM */
3920
3921 /* Not used without nested pmaps */
3922 #ifndef NO_NESTED_PMAP
3923 /*
3924  * Clip and unnest a portion of a nested submap mapping.
3925  */
3926
3927
3928 static void
3929 vm_map_clip_unnest(
3930         vm_map_t        map,
3931         vm_map_entry_t  entry,
3932         vm_map_offset_t start_unnest,
3933         vm_map_offset_t end_unnest)
3934 {
3935         vm_map_offset_t old_start_unnest = start_unnest;
3936         vm_map_offset_t old_end_unnest = end_unnest;
3937
3938         assert(entry->is_sub_map);
3939         assert(VME_SUBMAP(entry) != NULL);
3940         assert(entry->use_pmap);
3941
3942         /*
3943          * Query the platform for the optimal unnest range.
3944          * DRK: There's some duplication of effort here, since
3945          * callers may have adjusted the range to some extent. This
3946          * routine was introduced to support 1GiB subtree nesting
3947          * for x86 platforms, which can also nest on 2MiB boundaries
3948          * depending on size/alignment.
3949          */
3950         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3951                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3952         }
3953
3954         if (entry->vme_start > start_unnest ||
3955             entry->vme_end < end_unnest) {
3956                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3957                       "bad nested entry: start=0x%llx end=0x%llx\n",
3958                       (long long)start_unnest, (long long)end_unnest,
3959                       (long long)entry->vme_start, (long long)entry->vme_end);
3960         }
3961
3962         if (start_unnest > entry->vme_start) {
3963                 _vm_map_clip_start(&map->hdr,
3964                                    entry,
3965                                    start_unnest);
3966                 if (map->holelistenabled) {
3967                         vm_map_store_update_first_free(map, NULL, FALSE);
3968                 } else {
3969                         vm_map_store_update_first_free(map, map->first_free, FALSE);
3970                 }
3971         }
3972         if (entry->vme_end > end_unnest) {
3973                 _vm_map_clip_end(&map->hdr,
3974                                  entry,
3975                                  end_unnest);
3976                 if (map->holelistenabled) {
3977                         vm_map_store_update_first_free(map, NULL, FALSE);
3978                 } else {
3979                         vm_map_store_update_first_free(map, map->first_free, FALSE);
3980                 }
3981         }
3982
3983         pmap_unnest(map->pmap,
3984                     entry->vme_start,
3985                     entry->vme_end - entry->vme_start);
3986         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3987                 /* clean up parent map/maps */
3988                 vm_map_submap_pmap_clean(
3989                         map, entry->vme_start,
3990                         entry->vme_end,
3991                         VME_SUBMAP(entry),
3992                         VME_OFFSET(entry));
3993         }
3994         entry->use_pmap = FALSE;
3995         if ((map->pmap != kernel_pmap) &&
3996             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
3997                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
3998         }
3999 }
4000 #endif  /* NO_NESTED_PMAP */
4001
4002 /*
4003  *      vm_map_clip_start:      [ internal use only ]
4004  *
4005  *      Asserts that the given entry begins at or after
4006  *      the specified address; if necessary,
4007  *      it splits the entry into two.
4008  */
4009 void
4010 vm_map_clip_start(
4011         vm_map_t        map,
4012         vm_map_entry_t  entry,
4013         vm_map_offset_t startaddr)
4014 {
4015 #ifndef NO_NESTED_PMAP
4016         if (entry->is_sub_map &&
4017             entry->use_pmap &&
4018             startaddr >= entry->vme_start) {
4019                 vm_map_offset_t start_unnest, end_unnest;
4020
4021                 /*
4022                  * Make sure "startaddr" is no longer in a nested range
4023                  * before we clip.  Unnest only the minimum range the platform
4024                  * can handle.
4025                  * vm_map_clip_unnest may perform additional adjustments to
4026                  * the unnest range.
4027                  */
4028                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4029                 end_unnest = start_unnest + pmap_nesting_size_min;
4030                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4031         }
4032 #endif /* NO_NESTED_PMAP */
4033         if (startaddr > entry->vme_start) {
4034                 if (VME_OBJECT(entry) &&
4035                     !entry->is_sub_map &&
4036                     VME_OBJECT(entry)->phys_contiguous) {
4037                         pmap_remove(map->pmap,
4038                                     (addr64_t)(entry->vme_start),
4039                                     (addr64_t)(entry->vme_end));
4040                 }
4041                 _vm_map_clip_start(&map->hdr, entry, startaddr);
4042                 if (map->holelistenabled) {
4043                         vm_map_store_update_first_free(map, NULL, FALSE);
4044                 } else {
4045                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4046                 }
4047         }
4048 }
4049
4050
4051 #define vm_map_copy_clip_start(copy, entry, startaddr) \
4052         MACRO_BEGIN \
4053         if ((startaddr) > (entry)->vme_start) \
4054                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4055         MACRO_END
4056
4057 /*
4058  *      This routine is called only when it is known that
4059  *      the entry must be split.
4060  */
4061 static void
4062 _vm_map_clip_start(
4063         register struct vm_map_header   *map_header,
4064         register vm_map_entry_t         entry,
4065         register vm_map_offset_t        start)
4066 {
4067         register vm_map_entry_t new_entry;
4068
4069         /*
4070          *      Split off the front portion --
4071          *      note that we must insert the new
4072          *      entry BEFORE this one, so that
4073          *      this entry has the specified starting
4074          *      address.
4075          */
4076
4077         if (entry->map_aligned) {
4078                 assert(VM_MAP_PAGE_ALIGNED(start,
4079                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4080         }
4081
4082         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4083         vm_map_entry_copy_full(new_entry, entry);
4084
4085         new_entry->vme_end = start;
4086         assert(new_entry->vme_start < new_entry->vme_end);
4087         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
4088         assert(start < entry->vme_end);
4089         entry->vme_start = start;
4090
4091         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
4092
4093         if (entry->is_sub_map)
4094                 vm_map_reference(VME_SUBMAP(new_entry));
4095         else
4096                 vm_object_reference(VME_OBJECT(new_entry));
4097 }
4098
4099
4100 /*
4101  *      vm_map_clip_end:        [ internal use only ]
4102  *
4103  *      Asserts that the given entry ends at or before
4104  *      the specified address; if necessary,
4105  *      it splits the entry into two.
4106  */
4107 void
4108 vm_map_clip_end(
4109         vm_map_t        map,
4110         vm_map_entry_t  entry,
4111         vm_map_offset_t endaddr)
4112 {
4113         if (endaddr > entry->vme_end) {
4114                 /*
4115                  * Within the scope of this clipping, limit "endaddr" to
4116                  * the end of this map entry...
4117                  */
4118                 endaddr = entry->vme_end;
4119         }
4120 #ifndef NO_NESTED_PMAP
4121         if (entry->is_sub_map && entry->use_pmap) {
4122                 vm_map_offset_t start_unnest, end_unnest;
4123
4124                 /*
4125                  * Make sure the range between the start of this entry and
4126                  * the new "endaddr" is no longer nested before we clip.
4127                  * Unnest only the minimum range the platform can handle.
4128                  * vm_map_clip_unnest may perform additional adjustments to
4129                  * the unnest range.
4130                  */
4131                 start_unnest = entry->vme_start;
4132                 end_unnest =
4133                         (endaddr + pmap_nesting_size_min - 1) &
4134                         ~(pmap_nesting_size_min - 1);
4135                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4136         }
4137 #endif /* NO_NESTED_PMAP */
4138         if (endaddr < entry->vme_end) {
4139                 if (VME_OBJECT(entry) &&
4140                     !entry->is_sub_map &&
4141                     VME_OBJECT(entry)->phys_contiguous) {
4142                         pmap_remove(map->pmap,
4143                                     (addr64_t)(entry->vme_start),
4144                                     (addr64_t)(entry->vme_end));
4145                 }
4146                 _vm_map_clip_end(&map->hdr, entry, endaddr);
4147                 if (map->holelistenabled) {
4148                         vm_map_store_update_first_free(map, NULL, FALSE);
4149                 } else {
4150                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4151                 }
4152         }
4153 }
4154
4155
4156 #define vm_map_copy_clip_end(copy, entry, endaddr) \
4157         MACRO_BEGIN \
4158         if ((endaddr) < (entry)->vme_end) \
4159                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4160         MACRO_END
4161
4162 /*
4163  *      This routine is called only when it is known that
4164  *      the entry must be split.
4165  */
4166 static void
4167 _vm_map_clip_end(
4168         register struct vm_map_header   *map_header,
4169         register vm_map_entry_t         entry,
4170         register vm_map_offset_t        end)
4171 {
4172         register vm_map_entry_t new_entry;
4173
4174         /*
4175          *      Create a new entry and insert it
4176          *      AFTER the specified entry
4177          */
4178
4179         if (entry->map_aligned) {
4180                 assert(VM_MAP_PAGE_ALIGNED(end,
4181                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4182         }
4183
4184         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4185         vm_map_entry_copy_full(new_entry, entry);
4186
4187         assert(entry->vme_start < end);
4188         new_entry->vme_start = entry->vme_end = end;
4189         VME_OFFSET_SET(new_entry,
4190                        VME_OFFSET(new_entry) + (end - entry->vme_start));
4191         assert(new_entry->vme_start < new_entry->vme_end);
4192
4193         _vm_map_store_entry_link(map_header, entry, new_entry);
4194
4195         if (entry->is_sub_map)
4196                 vm_map_reference(VME_SUBMAP(new_entry));
4197         else
4198                 vm_object_reference(VME_OBJECT(new_entry));
4199 }
4200
4201
4202 /*
4203  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
4204  *
4205  *      Asserts that the starting and ending region
4206  *      addresses fall within the valid range of the map.
4207  */
4208 #define VM_MAP_RANGE_CHECK(map, start, end)     \
4209         MACRO_BEGIN                             \
4210         if (start < vm_map_min(map))            \
4211                 start = vm_map_min(map);        \
4212         if (end > vm_map_max(map))              \
4213                 end = vm_map_max(map);          \
4214         if (start > end)                        \
4215                 start = end;                    \
4216         MACRO_END
4217
4218 /*
4219  *      vm_map_range_check:     [ internal use only ]
4220  *
4221  *      Check that the region defined by the specified start and
4222  *      end addresses are wholly contained within a single map
4223  *      entry or set of adjacent map entries of the spacified map,
4224  *      i.e. the specified region contains no unmapped space.
4225  *      If any or all of the region is unmapped, FALSE is returned.
4226  *      Otherwise, TRUE is returned and if the output argument 'entry'
4227  *      is not NULL it points to the map entry containing the start
4228  *      of the region.
4229  *
4230  *      The map is locked for reading on entry and is left locked.
4231  */
4232 static boolean_t
4233 vm_map_range_check(
4234         register vm_map_t       map,
4235         register vm_map_offset_t        start,
4236         register vm_map_offset_t        end,
4237         vm_map_entry_t          *entry)
4238 {
4239         vm_map_entry_t          cur;
4240         register vm_map_offset_t        prev;
4241
4242         /*
4243          *      Basic sanity checks first
4244          */
4245         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4246                 return (FALSE);
4247
4248         /*
4249          *      Check first if the region starts within a valid
4250          *      mapping for the map.
4251          */
4252         if (!vm_map_lookup_entry(map, start, &cur))
4253                 return (FALSE);
4254
4255         /*
4256          *      Optimize for the case that the region is contained
4257          *      in a single map entry.
4258          */
4259         if (entry != (vm_map_entry_t *) NULL)
4260                 *entry = cur;
4261         if (end <= cur->vme_end)
4262                 return (TRUE);
4263
4264         /*
4265          *      If the region is not wholly contained within a
4266          *      single entry, walk the entries looking for holes.
4267          */
4268         prev = cur->vme_end;
4269         cur = cur->vme_next;
4270         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4271                 if (end <= cur->vme_end)
4272                         return (TRUE);
4273                 prev = cur->vme_end;
4274                 cur = cur->vme_next;
4275         }
4276         return (FALSE);
4277 }
4278
4279 /*
4280  *      vm_map_submap:          [ kernel use only ]
4281  *
4282  *      Mark the given range as handled by a subordinate map.
4283  *
4284  *      This range must have been created with vm_map_find using
4285  *      the vm_submap_object, and no other operations may have been
4286  *      performed on this range prior to calling vm_map_submap.
4287  *
4288  *      Only a limited number of operations can be performed
4289  *      within this rage after calling vm_map_submap:
4290  *              vm_fault
4291  *      [Don't try vm_map_copyin!]
4292  *
4293  *      To remove a submapping, one must first remove the
4294  *      range from the superior map, and then destroy the
4295  *      submap (if desired).  [Better yet, don't try it.]
4296  */
4297 kern_return_t
4298 vm_map_submap(
4299         vm_map_t        map,
4300         vm_map_offset_t start,
4301         vm_map_offset_t end,
4302         vm_map_t        submap,
4303         vm_map_offset_t offset,
4304 #ifdef NO_NESTED_PMAP
4305         __unused
4306 #endif  /* NO_NESTED_PMAP */
4307         boolean_t       use_pmap)
4308 {
4309         vm_map_entry_t          entry;
4310         register kern_return_t  result = KERN_INVALID_ARGUMENT;
4311         register vm_object_t    object;
4312
4313         vm_map_lock(map);
4314
4315         if (! vm_map_lookup_entry(map, start, &entry)) {
4316                 entry = entry->vme_next;
4317         }
4318
4319         if (entry == vm_map_to_entry(map) ||
4320             entry->is_sub_map) {
4321                 vm_map_unlock(map);
4322                 return KERN_INVALID_ARGUMENT;
4323         }
4324
4325         vm_map_clip_start(map, entry, start);
4326         vm_map_clip_end(map, entry, end);
4327
4328         if ((entry->vme_start == start) && (entry->vme_end == end) &&
4329             (!entry->is_sub_map) &&
4330             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
4331             (object->resident_page_count == 0) &&
4332             (object->copy == VM_OBJECT_NULL) &&
4333             (object->shadow == VM_OBJECT_NULL) &&
4334             (!object->pager_created)) {
4335                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4336                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
4337                 vm_object_deallocate(object);
4338                 entry->is_sub_map = TRUE;
4339                 entry->use_pmap = FALSE;
4340                 VME_SUBMAP_SET(entry, submap);
4341                 vm_map_reference(submap);
4342                 if (submap->mapped_in_other_pmaps == FALSE &&
4343                     vm_map_pmap(submap) != PMAP_NULL &&
4344                     vm_map_pmap(submap) != vm_map_pmap(map)) {
4345                         /*
4346                          * This submap is being mapped in a map
4347                          * that uses a different pmap.
4348                          * Set its "mapped_in_other_pmaps" flag
4349                          * to indicate that we now need to
4350                          * remove mappings from all pmaps rather
4351                          * than just the submap's pmap.
4352                          */
4353                         submap->mapped_in_other_pmaps = TRUE;
4354                 }
4355
4356 #ifndef NO_NESTED_PMAP
4357                 if (use_pmap) {
4358                         /* nest if platform code will allow */
4359                         if(submap->pmap == NULL) {
4360                                 ledger_t ledger = map->pmap->ledger;
4361                                 submap->pmap = pmap_create(ledger,
4362                                                 (vm_map_size_t) 0, FALSE);
4363                                 if(submap->pmap == PMAP_NULL) {
4364                                         vm_map_unlock(map);
4365                                         return(KERN_NO_SPACE);
4366                                 }
4367                         }
4368                         result = pmap_nest(map->pmap,
4369                                            (VME_SUBMAP(entry))->pmap,
4370                                            (addr64_t)start,
4371                                            (addr64_t)start,
4372                                            (uint64_t)(end - start));
4373                         if(result)
4374                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4375                         entry->use_pmap = TRUE;
4376                 }
4377 #else   /* NO_NESTED_PMAP */
4378                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
4379 #endif  /* NO_NESTED_PMAP */
4380                 result = KERN_SUCCESS;
4381         }
4382         vm_map_unlock(map);
4383
4384         return(result);
4385 }
4386
4387
4388 /*
4389  *      vm_map_protect:
4390  *
4391  *      Sets the protection of the specified address
4392  *      region in the target map.  If "set_max" is
4393  *      specified, the maximum protection is to be set;
4394  *      otherwise, only the current protection is affected.
4395  */
4396 kern_return_t
4397 vm_map_protect(
4398         register vm_map_t       map,
4399         register vm_map_offset_t        start,
4400         register vm_map_offset_t        end,
4401         register vm_prot_t      new_prot,
4402         register boolean_t      set_max)
4403 {
4404         register vm_map_entry_t         current;
4405         register vm_map_offset_t        prev;
4406         vm_map_entry_t                  entry;
4407         vm_prot_t                       new_max;
4408
4409         XPR(XPR_VM_MAP,
4410             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4411             map, start, end, new_prot, set_max);
4412
4413         vm_map_lock(map);
4414
4415         /* LP64todo - remove this check when vm_map_commpage64()
4416          * no longer has to stuff in a map_entry for the commpage
4417          * above the map's max_offset.
4418          */
4419         if (start >= map->max_offset) {
4420                 vm_map_unlock(map);
4421                 return(KERN_INVALID_ADDRESS);
4422         }
4423
4424         while(1) {
4425                 /*
4426                  *      Lookup the entry.  If it doesn't start in a valid
4427                  *      entry, return an error.
4428                  */
4429                 if (! vm_map_lookup_entry(map, start, &entry)) {
4430                         vm_map_unlock(map);
4431                         return(KERN_INVALID_ADDRESS);
4432                 }
4433
4434                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4435                         start = SUPERPAGE_ROUND_DOWN(start);
4436                         continue;
4437                 }
4438                 break;
4439         }
4440         if (entry->superpage_size)
4441                 end = SUPERPAGE_ROUND_UP(end);
4442
4443         /*
4444          *      Make a first pass to check for protection and address
4445          *      violations.
4446          */
4447
4448         current = entry;
4449         prev = current->vme_start;
4450         while ((current != vm_map_to_entry(map)) &&
4451                (current->vme_start < end)) {
4452
4453                 /*
4454                  * If there is a hole, return an error.
4455                  */
4456                 if (current->vme_start != prev) {
4457                         vm_map_unlock(map);
4458                         return(KERN_INVALID_ADDRESS);
4459                 }
4460
4461                 new_max = current->max_protection;
4462                 if(new_prot & VM_PROT_COPY) {
4463                         new_max |= VM_PROT_WRITE;
4464                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4465                                 vm_map_unlock(map);
4466                                 return(KERN_PROTECTION_FAILURE);
4467                         }
4468                 } else {
4469                         if ((new_prot & new_max) != new_prot) {
4470                                 vm_map_unlock(map);
4471                                 return(KERN_PROTECTION_FAILURE);
4472                         }
4473                 }
4474
4475
4476                 prev = current->vme_end;
4477                 current = current->vme_next;
4478         }
4479         if (end > prev) {
4480                 vm_map_unlock(map);
4481                 return(KERN_INVALID_ADDRESS);
4482         }
4483
4484         /*
4485          *      Go back and fix up protections.
4486          *      Clip to start here if the range starts within
4487          *      the entry.
4488          */
4489
4490         current = entry;
4491         if (current != vm_map_to_entry(map)) {
4492                 /* clip and unnest if necessary */
4493                 vm_map_clip_start(map, current, start);
4494         }
4495
4496         while ((current != vm_map_to_entry(map)) &&
4497                (current->vme_start < end)) {
4498
4499                 vm_prot_t       old_prot;
4500
4501                 vm_map_clip_end(map, current, end);
4502
4503                 if (current->is_sub_map) {
4504                         /* clipping did unnest if needed */
4505                         assert(!current->use_pmap);
4506                 }
4507
4508                 old_prot = current->protection;
4509
4510                 if(new_prot & VM_PROT_COPY) {
4511                         /* caller is asking specifically to copy the      */
4512                         /* mapped data, this implies that max protection  */
4513                         /* will include write.  Caller must be prepared   */
4514                         /* for loss of shared memory communication in the */
4515                         /* target area after taking this step */
4516
4517                         if (current->is_sub_map == FALSE &&
4518                             VME_OBJECT(current) == VM_OBJECT_NULL) {
4519                                 VME_OBJECT_SET(current,
4520                                                vm_object_allocate(
4521                                                        (vm_map_size_t)
4522                                                        (current->vme_end -
4523                                                         current->vme_start)));
4524                                 VME_OFFSET_SET(current, 0);
4525                                 assert(current->use_pmap);
4526                         }
4527                         assert(current->wired_count == 0);
4528                         current->needs_copy = TRUE;
4529                         current->max_protection |= VM_PROT_WRITE;
4530                 }
4531
4532                 if (set_max)
4533                         current->protection =
4534                                 (current->max_protection =
4535                                  new_prot & ~VM_PROT_COPY) &
4536                                 old_prot;
4537                 else
4538                         current->protection = new_prot & ~VM_PROT_COPY;
4539
4540                 /*
4541                  *      Update physical map if necessary.
4542                  *      If the request is to turn off write protection,
4543                  *      we won't do it for real (in pmap). This is because
4544                  *      it would cause copy-on-write to fail.  We've already
4545                  *      set, the new protection in the map, so if a
4546                  *      write-protect fault occurred, it will be fixed up
4547                  *      properly, COW or not.
4548                  */
4549                 if (current->protection != old_prot) {
4550                         /* Look one level in we support nested pmaps */
4551                         /* from mapped submaps which are direct entries */
4552                         /* in our map */
4553
4554                         vm_prot_t prot;
4555
4556                         prot = current->protection & ~VM_PROT_WRITE;
4557
4558                         if (override_nx(map, VME_ALIAS(current)) && prot)
4559                                 prot |= VM_PROT_EXECUTE;
4560
4561
4562                         if (current->is_sub_map && current->use_pmap) {
4563                                 pmap_protect(VME_SUBMAP(current)->pmap,
4564                                              current->vme_start,
4565                                              current->vme_end,
4566                                              prot);
4567                         } else {
4568                                 pmap_protect(map->pmap,
4569                                              current->vme_start,
4570                                              current->vme_end,
4571                                              prot);
4572                         }
4573                 }
4574                 current = current->vme_next;
4575         }
4576
4577         current = entry;
4578         while ((current != vm_map_to_entry(map)) &&
4579                (current->vme_start <= end)) {
4580                 vm_map_simplify_entry(map, current);
4581                 current = current->vme_next;
4582         }
4583
4584         vm_map_unlock(map);
4585         return(KERN_SUCCESS);
4586 }
4587
4588 /*
4589  *      vm_map_inherit:
4590  *
4591  *      Sets the inheritance of the specified address
4592  *      range in the target map.  Inheritance
4593  *      affects how the map will be shared with
4594  *      child maps at the time of vm_map_fork.
4595  */
4596 kern_return_t
4597 vm_map_inherit(
4598         register vm_map_t       map,
4599         register vm_map_offset_t        start,
4600         register vm_map_offset_t        end,
4601         register vm_inherit_t   new_inheritance)
4602 {
4603         register vm_map_entry_t entry;
4604         vm_map_entry_t  temp_entry;
4605
4606         vm_map_lock(map);
4607
4608         VM_MAP_RANGE_CHECK(map, start, end);
4609
4610         if (vm_map_lookup_entry(map, start, &temp_entry)) {
4611                 entry = temp_entry;
4612         }
4613         else {
4614                 temp_entry = temp_entry->vme_next;
4615                 entry = temp_entry;
4616         }
4617
4618         /* first check entire range for submaps which can't support the */
4619         /* given inheritance. */
4620         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4621                 if(entry->is_sub_map) {
4622                         if(new_inheritance == VM_INHERIT_COPY) {
4623                                 vm_map_unlock(map);
4624                                 return(KERN_INVALID_ARGUMENT);
4625                         }
4626                 }
4627
4628                 entry = entry->vme_next;
4629         }
4630
4631         entry = temp_entry;
4632         if (entry != vm_map_to_entry(map)) {
4633                 /* clip and unnest if necessary */
4634                 vm_map_clip_start(map, entry, start);
4635         }
4636
4637         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4638                 vm_map_clip_end(map, entry, end);
4639                 if (entry->is_sub_map) {
4640                         /* clip did unnest if needed */
4641                         assert(!entry->use_pmap);
4642                 }
4643
4644                 entry->inheritance = new_inheritance;
4645
4646                 entry = entry->vme_next;
4647         }
4648
4649         vm_map_unlock(map);
4650         return(KERN_SUCCESS);
4651 }
4652
4653 /*
4654  * Update the accounting for the amount of wired memory in this map.  If the user has
4655  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
4656  */
4657
4658 static kern_return_t
4659 add_wire_counts(
4660         vm_map_t        map,
4661         vm_map_entry_t  entry,
4662         boolean_t       user_wire)
4663 {
4664         vm_map_size_t   size;
4665
4666         if (user_wire) {
4667                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
4668
4669                 /*
4670                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
4671                  * this map entry.
4672                  */
4673
4674                 if (entry->user_wired_count == 0) {
4675                         size = entry->vme_end - entry->vme_start;
4676
4677                         /*
4678                          * Since this is the first time the user is wiring this map entry, check to see if we're
4679                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4680                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4681                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4682                          * limit, then we fail.
4683                          */
4684
4685                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4686                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4687                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4688                                 return KERN_RESOURCE_SHORTAGE;
4689
4690                         /*
4691                          * The first time the user wires an entry, we also increment the wired_count and add this to
4692                          * the total that has been wired in the map.
4693                          */
4694
4695                         if (entry->wired_count >= MAX_WIRE_COUNT)
4696                                 return KERN_FAILURE;
4697
4698                         entry->wired_count++;
4699                         map->user_wire_size += size;
4700                 }
4701
4702                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4703                         return KERN_FAILURE;
4704
4705                 entry->user_wired_count++;
4706
4707         } else {
4708
4709                 /*
4710                  * The kernel's wiring the memory.  Just bump the count and continue.
4711                  */
4712
4713                 if (entry->wired_count >= MAX_WIRE_COUNT)
4714                         panic("vm_map_wire: too many wirings");
4715
4716                 entry->wired_count++;
4717         }
4718
4719         return KERN_SUCCESS;
4720 }
4721
4722 /*
4723  * Update the memory wiring accounting now that the given map entry is being unwired.
4724  */
4725
4726 static void
4727 subtract_wire_counts(
4728         vm_map_t        map,
4729         vm_map_entry_t  entry,
4730         boolean_t       user_wire)
4731 {
4732
4733         if (user_wire) {
4734
4735                 /*
4736                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4737                  */
4738
4739                 if (entry->user_wired_count == 1) {
4740
4741                         /*
4742                          * We're removing the last user wire reference.  Decrement the wired_count and the total
4743                          * user wired memory for this map.
4744                          */
4745
4746                         assert(entry->wired_count >= 1);
4747                         entry->wired_count--;
4748                         map->user_wire_size -= entry->vme_end - entry->vme_start;
4749                 }
4750
4751                 assert(entry->user_wired_count >= 1);
4752                 entry->user_wired_count--;
4753
4754         } else {
4755
4756                 /*
4757                  * The kernel is unwiring the memory.   Just update the count.
4758                  */
4759
4760                 assert(entry->wired_count >= 1);
4761                 entry->wired_count--;
4762         }
4763 }
4764
4765 /*
4766  *      vm_map_wire:
4767  *
4768  *      Sets the pageability of the specified address range in the
4769  *      target map as wired.  Regions specified as not pageable require
4770  *      locked-down physical memory and physical page maps.  The
4771  *      access_type variable indicates types of accesses that must not
4772  *      generate page faults.  This is checked against protection of
4773  *      memory being locked-down.
4774  *
4775  *      The map must not be locked, but a reference must remain to the
4776  *      map throughout the call.
4777  */
4778 static kern_return_t
4779 vm_map_wire_nested(
4780         register vm_map_t       map,
4781         register vm_map_offset_t        start,
4782         register vm_map_offset_t        end,
4783         register vm_prot_t      caller_prot,
4784         boolean_t               user_wire,
4785         pmap_t                  map_pmap,
4786         vm_map_offset_t         pmap_addr,
4787         ppnum_t                 *physpage_p)
4788 {
4789         register vm_map_entry_t entry;
4790         register vm_prot_t      access_type;
4791         struct vm_map_entry     *first_entry, tmp_entry;
4792         vm_map_t                real_map;
4793         register vm_map_offset_t        s,e;
4794         kern_return_t           rc;
4795         boolean_t               need_wakeup;
4796         boolean_t               main_map = FALSE;
4797         wait_interrupt_t        interruptible_state;
4798         thread_t                cur_thread;
4799         unsigned int            last_timestamp;
4800         vm_map_size_t           size;
4801         boolean_t               wire_and_extract;
4802
4803         access_type = (caller_prot & VM_PROT_ALL);
4804
4805         wire_and_extract = FALSE;
4806         if (physpage_p != NULL) {
4807                 /*
4808                  * The caller wants the physical page number of the
4809                  * wired page.  We return only one physical page number
4810                  * so this works for only one page at a time.
4811                  */
4812                 if ((end - start) != PAGE_SIZE) {
4813                         return KERN_INVALID_ARGUMENT;
4814                 }
4815                 wire_and_extract = TRUE;
4816                 *physpage_p = 0;
4817         }
4818
4819         vm_map_lock(map);
4820         if(map_pmap == NULL)
4821                 main_map = TRUE;
4822         last_timestamp = map->timestamp;
4823
4824         VM_MAP_RANGE_CHECK(map, start, end);
4825         assert(page_aligned(start));
4826         assert(page_aligned(end));
4827         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4828         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4829         if (start == end) {
4830                 /* We wired what the caller asked for, zero pages */
4831                 vm_map_unlock(map);
4832                 return KERN_SUCCESS;
4833         }
4834
4835         need_wakeup = FALSE;
4836         cur_thread = current_thread();
4837
4838         s = start;
4839         rc = KERN_SUCCESS;
4840
4841         if (vm_map_lookup_entry(map, s, &first_entry)) {
4842                 entry = first_entry;
4843                 /*
4844                  * vm_map_clip_start will be done later.
4845                  * We don't want to unnest any nested submaps here !
4846                  */
4847         } else {
4848                 /* Start address is not in map */
4849                 rc = KERN_INVALID_ADDRESS;
4850                 goto done;
4851         }
4852
4853         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4854                 /*
4855                  * At this point, we have wired from "start" to "s".
4856                  * We still need to wire from "s" to "end".
4857                  *
4858                  * "entry" hasn't been clipped, so it could start before "s"
4859                  * and/or end after "end".
4860                  */
4861
4862                 /* "e" is how far we want to wire in this entry */
4863                 e = entry->vme_end;
4864                 if (e > end)
4865                         e = end;
4866
4867                 /*
4868                  * If another thread is wiring/unwiring this entry then
4869                  * block after informing other thread to wake us up.
4870                  */
4871                 if (entry->in_transition) {
4872                         wait_result_t wait_result;
4873
4874                         /*
4875                          * We have not clipped the entry.  Make sure that
4876                          * the start address is in range so that the lookup
4877                          * below will succeed.
4878                          * "s" is the current starting point: we've already
4879                          * wired from "start" to "s" and we still have
4880                          * to wire from "s" to "end".
4881                          */
4882
4883                         entry->needs_wakeup = TRUE;
4884
4885                         /*
4886                          * wake up anybody waiting on entries that we have
4887                          * already wired.
4888                          */
4889                         if (need_wakeup) {
4890                                 vm_map_entry_wakeup(map);
4891                                 need_wakeup = FALSE;
4892                         }
4893                         /*
4894                          * User wiring is interruptible
4895                          */
4896                         wait_result = vm_map_entry_wait(map,
4897                                                         (user_wire) ? THREAD_ABORTSAFE :
4898                                                         THREAD_UNINT);
4899                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
4900                                 /*
4901                                  * undo the wirings we have done so far
4902                                  * We do not clear the needs_wakeup flag,
4903                                  * because we cannot tell if we were the
4904                                  * only one waiting.
4905                                  */
4906                                 rc = KERN_FAILURE;
4907                                 goto done;
4908                         }
4909
4910                         /*
4911                          * Cannot avoid a lookup here. reset timestamp.
4912                          */
4913                         last_timestamp = map->timestamp;
4914
4915                         /*
4916                          * The entry could have been clipped, look it up again.
4917                          * Worse that can happen is, it may not exist anymore.
4918                          */
4919                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4920                                 /*
4921                                  * User: undo everything upto the previous
4922                                  * entry.  let vm_map_unwire worry about
4923                                  * checking the validity of the range.
4924                                  */
4925                                 rc = KERN_FAILURE;
4926                                 goto done;
4927                         }
4928                         entry = first_entry;
4929                         continue;
4930                 }
4931
4932                 if (entry->is_sub_map) {
4933                         vm_map_offset_t sub_start;
4934                         vm_map_offset_t sub_end;
4935                         vm_map_offset_t local_start;
4936                         vm_map_offset_t local_end;
4937                         pmap_t          pmap;
4938
4939                         if (wire_and_extract) {
4940                                 /*
4941                                  * Wiring would result in copy-on-write
4942                                  * which would not be compatible with
4943                                  * the sharing we have with the original
4944                                  * provider of this memory.
4945                                  */
4946                                 rc = KERN_INVALID_ARGUMENT;
4947                                 goto done;
4948                         }
4949
4950                         vm_map_clip_start(map, entry, s);
4951                         vm_map_clip_end(map, entry, end);
4952
4953                         sub_start = VME_OFFSET(entry);
4954                         sub_end = entry->vme_end;
4955                         sub_end += VME_OFFSET(entry) - entry->vme_start;
4956
4957                         local_end = entry->vme_end;
4958                         if(map_pmap == NULL) {
4959                                 vm_object_t             object;
4960                                 vm_object_offset_t      offset;
4961                                 vm_prot_t               prot;
4962                                 boolean_t               wired;
4963                                 vm_map_entry_t          local_entry;
4964                                 vm_map_version_t         version;
4965                                 vm_map_t                lookup_map;
4966
4967                                 if(entry->use_pmap) {
4968                                         pmap = VME_SUBMAP(entry)->pmap;
4969                                         /* ppc implementation requires that */
4970                                         /* submaps pmap address ranges line */
4971                                         /* up with parent map */
4972 #ifdef notdef
4973                                         pmap_addr = sub_start;
4974 #endif
4975                                         pmap_addr = s;
4976                                 } else {
4977                                         pmap = map->pmap;
4978                                         pmap_addr = s;
4979                                 }
4980
4981                                 if (entry->wired_count) {
4982                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4983                                                 goto done;
4984
4985                                         /*
4986                                          * The map was not unlocked:
4987                                          * no need to goto re-lookup.
4988                                          * Just go directly to next entry.
4989                                          */
4990                                         entry = entry->vme_next;
4991                                         s = entry->vme_start;
4992                                         continue;
4993
4994                                 }
4995
4996                                 /* call vm_map_lookup_locked to */
4997                                 /* cause any needs copy to be   */
4998                                 /* evaluated */
4999                                 local_start = entry->vme_start;
5000                                 lookup_map = map;
5001                                 vm_map_lock_write_to_read(map);
5002                                 if(vm_map_lookup_locked(
5003                                            &lookup_map, local_start,
5004                                            access_type,
5005                                            OBJECT_LOCK_EXCLUSIVE,
5006                                            &version, &object,
5007                                            &offset, &prot, &wired,
5008                                            NULL,
5009                                            &real_map)) {
5010
5011                                         vm_map_unlock_read(lookup_map);
5012                                         assert(map_pmap == NULL);
5013                                         vm_map_unwire(map, start,
5014                                                       s, user_wire);
5015                                         return(KERN_FAILURE);
5016                                 }
5017                                 vm_object_unlock(object);
5018                                 if(real_map != lookup_map)
5019                                         vm_map_unlock(real_map);
5020                                 vm_map_unlock_read(lookup_map);
5021                                 vm_map_lock(map);
5022
5023                                 /* we unlocked, so must re-lookup */
5024                                 if (!vm_map_lookup_entry(map,
5025                                                          local_start,
5026                                                          &local_entry)) {
5027                                         rc = KERN_FAILURE;
5028                                         goto done;
5029                                 }
5030
5031                                 /*
5032                                  * entry could have been "simplified",
5033                                  * so re-clip
5034                                  */
5035                                 entry = local_entry;
5036                                 assert(s == local_start);
5037                                 vm_map_clip_start(map, entry, s);
5038                                 vm_map_clip_end(map, entry, end);
5039                                 /* re-compute "e" */
5040                                 e = entry->vme_end;
5041                                 if (e > end)
5042                                         e = end;
5043
5044                                 /* did we have a change of type? */
5045                                 if (!entry->is_sub_map) {
5046                                         last_timestamp = map->timestamp;
5047                                         continue;
5048                                 }
5049                         } else {
5050                                 local_start = entry->vme_start;
5051                                 pmap = map_pmap;
5052                         }
5053
5054                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5055                                 goto done;
5056
5057                         entry->in_transition = TRUE;
5058
5059                         vm_map_unlock(map);
5060                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
5061                                                 sub_start, sub_end,
5062                                                 caller_prot,
5063                                                 user_wire, pmap, pmap_addr,
5064                                                 NULL);
5065                         vm_map_lock(map);
5066
5067                         /*
5068                          * Find the entry again.  It could have been clipped
5069                          * after we unlocked the map.
5070                          */
5071                         if (!vm_map_lookup_entry(map, local_start,
5072                                                  &first_entry))
5073                                 panic("vm_map_wire: re-lookup failed");
5074                         entry = first_entry;
5075
5076                         assert(local_start == s);
5077                         /* re-compute "e" */
5078                         e = entry->vme_end;
5079                         if (e > end)
5080                                 e = end;
5081
5082                         last_timestamp = map->timestamp;
5083                         while ((entry != vm_map_to_entry(map)) &&
5084                                (entry->vme_start < e)) {
5085                                 assert(entry->in_transition);
5086                                 entry->in_transition = FALSE;
5087                                 if (entry->needs_wakeup) {
5088                                         entry->needs_wakeup = FALSE;
5089                                         need_wakeup = TRUE;
5090                                 }
5091                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
5092                                         subtract_wire_counts(map, entry, user_wire);
5093                                 }
5094                                 entry = entry->vme_next;
5095                         }
5096                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5097                                 goto done;
5098                         }
5099
5100                         /* no need to relookup again */
5101                         s = entry->vme_start;
5102                         continue;
5103                 }
5104
5105                 /*
5106                  * If this entry is already wired then increment
5107                  * the appropriate wire reference count.
5108                  */
5109                 if (entry->wired_count) {
5110
5111                         if ((entry->protection & access_type) != access_type) {
5112                                 /* found a protection problem */
5113
5114                                 /*
5115                                  * XXX FBDP
5116                                  * We should always return an error
5117                                  * in this case but since we didn't
5118                                  * enforce it before, let's do
5119                                  * it only for the new "wire_and_extract"
5120                                  * code path for now...
5121                                  */
5122                                 if (wire_and_extract) {
5123                                         rc = KERN_PROTECTION_FAILURE;
5124                                         goto done;
5125                                 }
5126                         }
5127
5128                         /*
5129                          * entry is already wired down, get our reference
5130                          * after clipping to our range.
5131                          */
5132                         vm_map_clip_start(map, entry, s);
5133                         vm_map_clip_end(map, entry, end);
5134
5135                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5136                                 goto done;
5137
5138                         if (wire_and_extract) {
5139                                 vm_object_t             object;
5140                                 vm_object_offset_t      offset;
5141                                 vm_page_t               m;
5142
5143                                 /*
5144                                  * We don't have to "wire" the page again
5145                                  * bit we still have to "extract" its
5146                                  * physical page number, after some sanity
5147                                  * checks.
5148                                  */
5149                                 assert((entry->vme_end - entry->vme_start)
5150                                        == PAGE_SIZE);
5151                                 assert(!entry->needs_copy);
5152                                 assert(!entry->is_sub_map);
5153                                 assert(VME_OBJECT(entry));
5154                                 if (((entry->vme_end - entry->vme_start)
5155                                      != PAGE_SIZE) ||
5156                                     entry->needs_copy ||
5157                                     entry->is_sub_map ||
5158                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
5159                                         rc = KERN_INVALID_ARGUMENT;
5160                                         goto done;
5161                                 }
5162
5163                                 object = VME_OBJECT(entry);
5164                                 offset = VME_OFFSET(entry);
5165                                 /* need exclusive lock to update m->dirty */
5166                                 if (entry->protection & VM_PROT_WRITE) {
5167                                         vm_object_lock(object);
5168                                 } else {
5169                                         vm_object_lock_shared(object);
5170                                 }
5171                                 m = vm_page_lookup(object, offset);
5172                                 assert(m != VM_PAGE_NULL);
5173                                 assert(m->wire_count);
5174                                 if (m != VM_PAGE_NULL && m->wire_count) {
5175                                         *physpage_p = m->phys_page;
5176                                         if (entry->protection & VM_PROT_WRITE) {
5177                                                 vm_object_lock_assert_exclusive(
5178                                                         m->object);
5179                                                 m->dirty = TRUE;
5180                                         }
5181                                 } else {
5182                                         /* not already wired !? */
5183                                         *physpage_p = 0;
5184                                 }
5185                                 vm_object_unlock(object);
5186                         }
5187
5188                         /* map was not unlocked: no need to relookup */
5189                         entry = entry->vme_next;
5190                         s = entry->vme_start;
5191                         continue;
5192                 }
5193
5194                 /*
5195                  * Unwired entry or wire request transmitted via submap
5196                  */
5197
5198
5199                 /*
5200                  * Perform actions of vm_map_lookup that need the write
5201                  * lock on the map: create a shadow object for a
5202                  * copy-on-write region, or an object for a zero-fill
5203                  * region.
5204                  */
5205                 size = entry->vme_end - entry->vme_start;
5206                 /*
5207                  * If wiring a copy-on-write page, we need to copy it now
5208                  * even if we're only (currently) requesting read access.
5209                  * This is aggressive, but once it's wired we can't move it.
5210                  */
5211                 if (entry->needs_copy) {
5212                         if (wire_and_extract) {
5213                                 /*
5214                                  * We're supposed to share with the original
5215                                  * provider so should not be "needs_copy"
5216                                  */
5217                                 rc = KERN_INVALID_ARGUMENT;
5218                                 goto done;
5219                         }
5220
5221                         VME_OBJECT_SHADOW(entry, size);
5222                         entry->needs_copy = FALSE;
5223                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5224                         if (wire_and_extract) {
5225                                 /*
5226                                  * We're supposed to share with the original
5227                                  * provider so should already have an object.
5228                                  */
5229                                 rc = KERN_INVALID_ARGUMENT;
5230                                 goto done;
5231                         }
5232                         VME_OBJECT_SET(entry, vm_object_allocate(size));
5233                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
5234                         assert(entry->use_pmap);
5235                 }
5236
5237                 vm_map_clip_start(map, entry, s);
5238                 vm_map_clip_end(map, entry, end);
5239
5240                 /* re-compute "e" */
5241                 e = entry->vme_end;
5242                 if (e > end)
5243                         e = end;
5244
5245                 /*
5246                  * Check for holes and protection mismatch.
5247                  * Holes: Next entry should be contiguous unless this
5248                  *        is the end of the region.
5249                  * Protection: Access requested must be allowed, unless
5250                  *      wiring is by protection class
5251                  */
5252                 if ((entry->vme_end < end) &&
5253                     ((entry->vme_next == vm_map_to_entry(map)) ||
5254                      (entry->vme_next->vme_start > entry->vme_end))) {
5255                         /* found a hole */
5256                         rc = KERN_INVALID_ADDRESS;
5257                         goto done;
5258                 }
5259                 if ((entry->protection & access_type) != access_type) {
5260                         /* found a protection problem */
5261                         rc = KERN_PROTECTION_FAILURE;
5262                         goto done;
5263                 }
5264
5265                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5266
5267                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5268                         goto done;
5269
5270                 entry->in_transition = TRUE;
5271
5272                 /*
5273                  * This entry might get split once we unlock the map.
5274                  * In vm_fault_wire(), we need the current range as
5275                  * defined by this entry.  In order for this to work
5276                  * along with a simultaneous clip operation, we make a
5277                  * temporary copy of this entry and use that for the
5278                  * wiring.  Note that the underlying objects do not
5279                  * change during a clip.
5280                  */
5281                 tmp_entry = *entry;
5282
5283                 /*
5284                  * The in_transition state guarentees that the entry
5285                  * (or entries for this range, if split occured) will be
5286                  * there when the map lock is acquired for the second time.
5287                  */
5288                 vm_map_unlock(map);
5289
5290                 if (!user_wire && cur_thread != THREAD_NULL)
5291                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
5292                 else
5293                         interruptible_state = THREAD_UNINT;
5294
5295                 if(map_pmap)
5296                         rc = vm_fault_wire(map,
5297                                            &tmp_entry, caller_prot, map_pmap, pmap_addr,
5298                                            physpage_p);
5299                 else
5300                         rc = vm_fault_wire(map,
5301                                            &tmp_entry, caller_prot, map->pmap,
5302                                            tmp_entry.vme_start,
5303                                            physpage_p);
5304
5305                 if (!user_wire && cur_thread != THREAD_NULL)
5306                         thread_interrupt_level(interruptible_state);
5307
5308                 vm_map_lock(map);
5309
5310                 if (last_timestamp+1 != map->timestamp) {
5311                         /*
5312                          * Find the entry again.  It could have been clipped
5313                          * after we unlocked the map.
5314                          */
5315                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5316                                                  &first_entry))
5317                                 panic("vm_map_wire: re-lookup failed");
5318
5319                         entry = first_entry;
5320                 }
5321
5322                 last_timestamp = map->timestamp;
5323
5324                 while ((entry != vm_map_to_entry(map)) &&
5325                        (entry->vme_start < tmp_entry.vme_end)) {
5326                         assert(entry->in_transition);
5327                         entry->in_transition = FALSE;
5328                         if (entry->needs_wakeup) {
5329                                 entry->needs_wakeup = FALSE;
5330                                 need_wakeup = TRUE;
5331                         }
5332                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5333                                 subtract_wire_counts(map, entry, user_wire);
5334                         }
5335                         entry = entry->vme_next;
5336                 }
5337
5338                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
5339                         goto done;
5340                 }
5341
5342                 s = entry->vme_start;
5343         } /* end while loop through map entries */
5344
5345 done:
5346         if (rc == KERN_SUCCESS) {
5347                 /* repair any damage we may have made to the VM map */
5348                 vm_map_simplify_range(map, start, end);
5349         }
5350
5351         vm_map_unlock(map);
5352
5353         /*
5354          * wake up anybody waiting on entries we wired.
5355          */
5356         if (need_wakeup)
5357                 vm_map_entry_wakeup(map);
5358
5359         if (rc != KERN_SUCCESS) {
5360                 /* undo what has been wired so far */
5361                 vm_map_unwire_nested(map, start, s, user_wire,
5362                                      map_pmap, pmap_addr);
5363                 if (physpage_p) {
5364                         *physpage_p = 0;
5365                 }
5366         }
5367
5368         return rc;
5369
5370 }
5371
5372 kern_return_t
5373 vm_map_wire_external(
5374         register vm_map_t       map,
5375         register vm_map_offset_t        start,
5376         register vm_map_offset_t        end,
5377         register vm_prot_t      caller_prot,
5378         boolean_t               user_wire)
5379 {
5380         kern_return_t   kret;
5381
5382         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5383         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5384         kret = vm_map_wire_nested(map, start, end, caller_prot,
5385                                   user_wire, (pmap_t)NULL, 0, NULL);
5386         return kret;
5387 }
5388
5389 kern_return_t
5390 vm_map_wire(
5391         register vm_map_t       map,
5392         register vm_map_offset_t        start,
5393         register vm_map_offset_t        end,
5394         register vm_prot_t      caller_prot,
5395         boolean_t               user_wire)
5396 {
5397         kern_return_t   kret;
5398
5399         kret = vm_map_wire_nested(map, start, end, caller_prot,
5400                                   user_wire, (pmap_t)NULL, 0, NULL);
5401         return kret;
5402 }
5403
5404 kern_return_t
5405 vm_map_wire_and_extract_external(
5406         vm_map_t        map,
5407         vm_map_offset_t start,
5408         vm_prot_t       caller_prot,
5409         boolean_t       user_wire,
5410         ppnum_t         *physpage_p)
5411 {
5412         kern_return_t   kret;
5413
5414         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5415         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5416         kret = vm_map_wire_nested(map,
5417                                   start,
5418                                   start+VM_MAP_PAGE_SIZE(map),
5419                                   caller_prot,
5420                                   user_wire,
5421                                   (pmap_t)NULL,
5422                                   0,
5423                                   physpage_p);
5424         if (kret != KERN_SUCCESS &&
5425             physpage_p != NULL) {
5426                 *physpage_p = 0;
5427         }
5428         return kret;
5429 }
5430
5431 kern_return_t
5432 vm_map_wire_and_extract(
5433         vm_map_t        map,
5434         vm_map_offset_t start,
5435         vm_prot_t       caller_prot,
5436         boolean_t       user_wire,
5437         ppnum_t         *physpage_p)
5438 {
5439         kern_return_t   kret;
5440
5441         kret = vm_map_wire_nested(map,
5442                                   start,
5443                                   start+VM_MAP_PAGE_SIZE(map),
5444                                   caller_prot,
5445                                   user_wire,
5446                                   (pmap_t)NULL,
5447                                   0,
5448                                   physpage_p);
5449         if (kret != KERN_SUCCESS &&
5450             physpage_p != NULL) {
5451                 *physpage_p = 0;
5452         }
5453         return kret;
5454 }
5455
5456 /*
5457  *      vm_map_unwire:
5458  *
5459  *      Sets the pageability of the specified address range in the target
5460  *      as pageable.  Regions specified must have been wired previously.
5461  *
5462  *      The map must not be locked, but a reference must remain to the map
5463  *      throughout the call.
5464  *
5465  *      Kernel will panic on failures.  User unwire ignores holes and
5466  *      unwired and intransition entries to avoid losing memory by leaving
5467  *      it unwired.
5468  */
5469 static kern_return_t
5470 vm_map_unwire_nested(
5471         register vm_map_t       map,
5472         register vm_map_offset_t        start,
5473         register vm_map_offset_t        end,
5474         boolean_t               user_wire,
5475         pmap_t                  map_pmap,
5476         vm_map_offset_t         pmap_addr)
5477 {
5478         register vm_map_entry_t entry;
5479         struct vm_map_entry     *first_entry, tmp_entry;
5480         boolean_t               need_wakeup;
5481         boolean_t               main_map = FALSE;
5482         unsigned int            last_timestamp;
5483
5484         vm_map_lock(map);
5485         if(map_pmap == NULL)
5486                 main_map = TRUE;
5487         last_timestamp = map->timestamp;
5488
5489         VM_MAP_RANGE_CHECK(map, start, end);
5490         assert(page_aligned(start));
5491         assert(page_aligned(end));
5492         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5493         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5494
5495         if (start == end) {
5496                 /* We unwired what the caller asked for: zero pages */
5497                 vm_map_unlock(map);
5498                 return KERN_SUCCESS;
5499         }
5500
5501         if (vm_map_lookup_entry(map, start, &first_entry)) {
5502                 entry = first_entry;
5503                 /*
5504                  * vm_map_clip_start will be done later.
5505                  * We don't want to unnest any nested sub maps here !
5506                  */
5507         }
5508         else {
5509                 if (!user_wire) {
5510                         panic("vm_map_unwire: start not found");
5511                 }
5512                 /*      Start address is not in map. */
5513                 vm_map_unlock(map);
5514                 return(KERN_INVALID_ADDRESS);
5515         }
5516
5517         if (entry->superpage_size) {
5518                 /* superpages are always wired */
5519                 vm_map_unlock(map);
5520                 return KERN_INVALID_ADDRESS;
5521         }
5522
5523         need_wakeup = FALSE;
5524         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5525                 if (entry->in_transition) {
5526                         /*
5527                          * 1)
5528                          * Another thread is wiring down this entry. Note
5529                          * that if it is not for the other thread we would
5530                          * be unwiring an unwired entry.  This is not
5531                          * permitted.  If we wait, we will be unwiring memory
5532                          * we did not wire.
5533                          *
5534                          * 2)
5535                          * Another thread is unwiring this entry.  We did not
5536                          * have a reference to it, because if we did, this
5537                          * entry will not be getting unwired now.
5538                          */
5539                         if (!user_wire) {
5540                                 /*
5541                                  * XXX FBDP
5542                                  * This could happen:  there could be some
5543                                  * overlapping vslock/vsunlock operations
5544                                  * going on.
5545                                  * We should probably just wait and retry,
5546                                  * but then we have to be careful that this
5547                                  * entry could get "simplified" after
5548                                  * "in_transition" gets unset and before
5549                                  * we re-lookup the entry, so we would
5550                                  * have to re-clip the entry to avoid
5551                                  * re-unwiring what we have already unwired...
5552                                  * See vm_map_wire_nested().
5553                                  *
5554                                  * Or we could just ignore "in_transition"
5555                                  * here and proceed to decement the wired
5556                                  * count(s) on this entry.  That should be fine
5557                                  * as long as "wired_count" doesn't drop all
5558                                  * the way to 0 (and we should panic if THAT
5559                                  * happens).
5560                                  */
5561                                 panic("vm_map_unwire: in_transition entry");
5562                         }
5563
5564                         entry = entry->vme_next;
5565                         continue;
5566                 }
5567
5568                 if (entry->is_sub_map) {
5569                         vm_map_offset_t sub_start;
5570                         vm_map_offset_t sub_end;
5571                         vm_map_offset_t local_end;
5572                         pmap_t          pmap;
5573
5574                         vm_map_clip_start(map, entry, start);
5575                         vm_map_clip_end(map, entry, end);
5576
5577                         sub_start = VME_OFFSET(entry);
5578                         sub_end = entry->vme_end - entry->vme_start;
5579                         sub_end += VME_OFFSET(entry);
5580                         local_end = entry->vme_end;
5581                         if(map_pmap == NULL) {
5582                                 if(entry->use_pmap) {
5583                                         pmap = VME_SUBMAP(entry)->pmap;
5584                                         pmap_addr = sub_start;
5585                                 } else {
5586                                         pmap = map->pmap;
5587                                         pmap_addr = start;
5588                                 }
5589                                 if (entry->wired_count == 0 ||
5590                                     (user_wire && entry->user_wired_count == 0)) {
5591                                         if (!user_wire)
5592                                                 panic("vm_map_unwire: entry is unwired");
5593                                         entry = entry->vme_next;
5594                                         continue;
5595                                 }
5596
5597                                 /*
5598                                  * Check for holes
5599                                  * Holes: Next entry should be contiguous unless
5600                                  * this is the end of the region.
5601                                  */
5602                                 if (((entry->vme_end < end) &&
5603                                      ((entry->vme_next == vm_map_to_entry(map)) ||
5604                                       (entry->vme_next->vme_start
5605                                        > entry->vme_end)))) {
5606                                         if (!user_wire)
5607                                                 panic("vm_map_unwire: non-contiguous region");
5608 /*
5609                                         entry = entry->vme_next;
5610                                         continue;
5611 */
5612                                 }
5613
5614                                 subtract_wire_counts(map, entry, user_wire);
5615
5616                                 if (entry->wired_count != 0) {
5617                                         entry = entry->vme_next;
5618                                         continue;
5619                                 }
5620
5621                                 entry->in_transition = TRUE;
5622                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
5623
5624                                 /*
5625                                  * We can unlock the map now. The in_transition state
5626                                  * guarantees existance of the entry.
5627                                  */
5628                                 vm_map_unlock(map);
5629                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5630                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
5631                                 vm_map_lock(map);
5632
5633                                 if (last_timestamp+1 != map->timestamp) {
5634                                         /*
5635                                          * Find the entry again.  It could have been
5636                                          * clipped or deleted after we unlocked the map.
5637                                          */
5638                                         if (!vm_map_lookup_entry(map,
5639                                                                  tmp_entry.vme_start,
5640                                                                  &first_entry)) {
5641                                                 if (!user_wire)
5642                                                         panic("vm_map_unwire: re-lookup failed");
5643                                                 entry = first_entry->vme_next;
5644                                         } else
5645                                                 entry = first_entry;
5646                                 }
5647                                 last_timestamp = map->timestamp;
5648
5649                                 /*
5650                                  * clear transition bit for all constituent entries
5651                                  * that were in the original entry (saved in
5652                                  * tmp_entry).  Also check for waiters.
5653                                  */
5654                                 while ((entry != vm_map_to_entry(map)) &&
5655                                        (entry->vme_start < tmp_entry.vme_end)) {
5656                                         assert(entry->in_transition);
5657                                         entry->in_transition = FALSE;
5658                                         if (entry->needs_wakeup) {
5659                                                 entry->needs_wakeup = FALSE;
5660                                                 need_wakeup = TRUE;
5661                                         }
5662                                         entry = entry->vme_next;
5663                                 }
5664                                 continue;
5665                         } else {
5666                                 vm_map_unlock(map);
5667                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5668                                                      sub_start, sub_end, user_wire, map_pmap,
5669                                                      pmap_addr);
5670                                 vm_map_lock(map);
5671
5672                                 if (last_timestamp+1 != map->timestamp) {
5673                                         /*
5674                                          * Find the entry again.  It could have been
5675                                          * clipped or deleted after we unlocked the map.
5676                                          */
5677                                         if (!vm_map_lookup_entry(map,
5678                                                                  tmp_entry.vme_start,
5679                                                                  &first_entry)) {
5680                                                 if (!user_wire)
5681                                                         panic("vm_map_unwire: re-lookup failed");
5682                                                 entry = first_entry->vme_next;
5683                                         } else
5684                                                 entry = first_entry;
5685                                 }
5686                                 last_timestamp = map->timestamp;
5687                         }
5688                 }
5689
5690
5691                 if ((entry->wired_count == 0) ||
5692                     (user_wire && entry->user_wired_count == 0)) {
5693                         if (!user_wire)
5694                                 panic("vm_map_unwire: entry is unwired");
5695
5696                         entry = entry->vme_next;
5697                         continue;
5698                 }
5699
5700                 assert(entry->wired_count > 0 &&
5701                        (!user_wire || entry->user_wired_count > 0));
5702
5703                 vm_map_clip_start(map, entry, start);
5704                 vm_map_clip_end(map, entry, end);
5705
5706                 /*
5707                  * Check for holes
5708                  * Holes: Next entry should be contiguous unless
5709                  *        this is the end of the region.
5710                  */
5711                 if (((entry->vme_end < end) &&
5712                      ((entry->vme_next == vm_map_to_entry(map)) ||
5713                       (entry->vme_next->vme_start > entry->vme_end)))) {
5714
5715                         if (!user_wire)
5716                                 panic("vm_map_unwire: non-contiguous region");
5717                         entry = entry->vme_next;
5718                         continue;
5719                 }
5720
5721                 subtract_wire_counts(map, entry, user_wire);
5722
5723                 if (entry->wired_count != 0) {
5724                         entry = entry->vme_next;
5725                         continue;
5726                 }
5727
5728                 if(entry->zero_wired_pages) {
5729                         entry->zero_wired_pages = FALSE;
5730                 }
5731
5732                 entry->in_transition = TRUE;
5733                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
5734
5735                 /*
5736                  * We can unlock the map now. The in_transition state
5737                  * guarantees existance of the entry.
5738                  */
5739                 vm_map_unlock(map);
5740                 if(map_pmap) {
5741                         vm_fault_unwire(map,
5742                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
5743                 } else {
5744                         vm_fault_unwire(map,
5745                                         &tmp_entry, FALSE, map->pmap,
5746                                         tmp_entry.vme_start);
5747                 }
5748                 vm_map_lock(map);
5749
5750                 if (last_timestamp+1 != map->timestamp) {
5751                         /*
5752                          * Find the entry again.  It could have been clipped
5753                          * or deleted after we unlocked the map.
5754                          */
5755                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5756                                                  &first_entry)) {
5757                                 if (!user_wire)
5758                                         panic("vm_map_unwire: re-lookup failed");
5759                                 entry = first_entry->vme_next;
5760                         } else
5761                                 entry = first_entry;
5762                 }
5763                 last_timestamp = map->timestamp;
5764
5765                 /*
5766                  * clear transition bit for all constituent entries that
5767                  * were in the original entry (saved in tmp_entry).  Also
5768                  * check for waiters.
5769                  */
5770                 while ((entry != vm_map_to_entry(map)) &&
5771                        (entry->vme_start < tmp_entry.vme_end)) {
5772                         assert(entry->in_transition);
5773                         entry->in_transition = FALSE;
5774                         if (entry->needs_wakeup) {
5775                                 entry->needs_wakeup = FALSE;
5776                                 need_wakeup = TRUE;
5777                         }
5778                         entry = entry->vme_next;
5779                 }
5780         }
5781
5782         /*
5783          * We might have fragmented the address space when we wired this
5784          * range of addresses.  Attempt to re-coalesce these VM map entries
5785          * with their neighbors now that they're no longer wired.
5786          * Under some circumstances, address space fragmentation can
5787          * prevent VM object shadow chain collapsing, which can cause
5788          * swap space leaks.
5789          */
5790         vm_map_simplify_range(map, start, end);
5791
5792         vm_map_unlock(map);
5793         /*
5794          * wake up anybody waiting on entries that we have unwired.
5795          */
5796         if (need_wakeup)
5797                 vm_map_entry_wakeup(map);
5798         return(KERN_SUCCESS);
5799
5800 }
5801
5802 kern_return_t
5803 vm_map_unwire(
5804         register vm_map_t       map,
5805         register vm_map_offset_t        start,
5806         register vm_map_offset_t        end,
5807         boolean_t               user_wire)
5808 {
5809         return vm_map_unwire_nested(map, start, end,
5810                                     user_wire, (pmap_t)NULL, 0);
5811 }
5812
5813
5814 /*
5815  *      vm_map_entry_delete:    [ internal use only ]
5816  *
5817  *      Deallocate the given entry from the target map.
5818  */
5819 static void
5820 vm_map_entry_delete(
5821         register vm_map_t       map,
5822         register vm_map_entry_t entry)
5823 {
5824         register vm_map_offset_t        s, e;
5825         register vm_object_t    object;
5826         register vm_map_t       submap;
5827
5828         s = entry->vme_start;
5829         e = entry->vme_end;
5830         assert(page_aligned(s));
5831         assert(page_aligned(e));
5832         if (entry->map_aligned == TRUE) {
5833                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5834                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5835         }
5836         assert(entry->wired_count == 0);
5837         assert(entry->user_wired_count == 0);
5838         assert(!entry->permanent);
5839
5840         if (entry->is_sub_map) {
5841                 object = NULL;
5842                 submap = VME_SUBMAP(entry);
5843         } else {
5844                 submap = NULL;
5845                 object = VME_OBJECT(entry);
5846         }
5847
5848         vm_map_store_entry_unlink(map, entry);
5849         map->size -= e - s;
5850
5851         vm_map_entry_dispose(map, entry);
5852
5853         vm_map_unlock(map);
5854         /*
5855          *      Deallocate the object only after removing all
5856          *      pmap entries pointing to its pages.
5857          */
5858         if (submap)
5859                 vm_map_deallocate(submap);
5860         else
5861                 vm_object_deallocate(object);
5862
5863 }
5864
5865 void
5866 vm_map_submap_pmap_clean(
5867         vm_map_t        map,
5868         vm_map_offset_t start,
5869         vm_map_offset_t end,
5870         vm_map_t        sub_map,
5871         vm_map_offset_t offset)
5872 {
5873         vm_map_offset_t submap_start;
5874         vm_map_offset_t submap_end;
5875         vm_map_size_t   remove_size;
5876         vm_map_entry_t  entry;
5877
5878         submap_end = offset + (end - start);
5879         submap_start = offset;
5880
5881         vm_map_lock_read(sub_map);
5882         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5883
5884                 remove_size = (entry->vme_end - entry->vme_start);
5885                 if(offset > entry->vme_start)
5886                         remove_size -= offset - entry->vme_start;
5887
5888
5889                 if(submap_end < entry->vme_end) {
5890                         remove_size -=
5891                                 entry->vme_end - submap_end;
5892                 }
5893                 if(entry->is_sub_map) {
5894                         vm_map_submap_pmap_clean(
5895                                 sub_map,
5896                                 start,
5897                                 start + remove_size,
5898                                 VME_SUBMAP(entry),
5899                                 VME_OFFSET(entry));
5900                 } else {
5901
5902                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5903                            && (VME_OBJECT(entry) != NULL)) {
5904                                 vm_object_pmap_protect_options(
5905                                         VME_OBJECT(entry),
5906                                         (VME_OFFSET(entry) +
5907                                          offset -
5908                                          entry->vme_start),
5909                                         remove_size,
5910                                         PMAP_NULL,
5911                                         entry->vme_start,
5912                                         VM_PROT_NONE,
5913                                         PMAP_OPTIONS_REMOVE);
5914                         } else {
5915                                 pmap_remove(map->pmap,
5916                                             (addr64_t)start,
5917                                             (addr64_t)(start + remove_size));
5918                         }
5919                 }
5920         }
5921
5922         entry = entry->vme_next;
5923
5924         while((entry != vm_map_to_entry(sub_map))
5925               && (entry->vme_start < submap_end)) {
5926                 remove_size = (entry->vme_end - entry->vme_start);
5927                 if(submap_end < entry->vme_end) {
5928                         remove_size -= entry->vme_end - submap_end;
5929                 }
5930                 if(entry->is_sub_map) {
5931                         vm_map_submap_pmap_clean(
5932                                 sub_map,
5933                                 (start + entry->vme_start) - offset,
5934                                 ((start + entry->vme_start) - offset) + remove_size,
5935                                 VME_SUBMAP(entry),
5936                                 VME_OFFSET(entry));
5937                 } else {
5938                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5939                            && (VME_OBJECT(entry) != NULL)) {
5940                                 vm_object_pmap_protect_options(
5941                                         VME_OBJECT(entry),
5942                                         VME_OFFSET(entry),
5943                                         remove_size,
5944                                         PMAP_NULL,
5945                                         entry->vme_start,
5946                                         VM_PROT_NONE,
5947                                         PMAP_OPTIONS_REMOVE);
5948                         } else {
5949                                 pmap_remove(map->pmap,
5950                                             (addr64_t)((start + entry->vme_start)
5951                                                        - offset),
5952                                             (addr64_t)(((start + entry->vme_start)
5953                                                         - offset) + remove_size));
5954                         }
5955                 }
5956                 entry = entry->vme_next;
5957         }
5958         vm_map_unlock_read(sub_map);
5959         return;
5960 }
5961
5962 /*
5963  *      vm_map_delete:  [ internal use only ]
5964  *
5965  *      Deallocates the given address range from the target map.
5966  *      Removes all user wirings. Unwires one kernel wiring if
5967  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
5968  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
5969  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5970  *
5971  *      This routine is called with map locked and leaves map locked.
5972  */
5973 static kern_return_t
5974 vm_map_delete(
5975         vm_map_t                map,
5976         vm_map_offset_t         start,
5977         vm_map_offset_t         end,
5978         int                     flags,
5979         vm_map_t                zap_map)
5980 {
5981         vm_map_entry_t          entry, next;
5982         struct   vm_map_entry   *first_entry, tmp_entry;
5983         register vm_map_offset_t s;
5984         register vm_object_t    object;
5985         boolean_t               need_wakeup;
5986         unsigned int            last_timestamp = ~0; /* unlikely value */
5987         int                     interruptible;
5988
5989         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5990                 THREAD_ABORTSAFE : THREAD_UNINT;
5991
5992         /*
5993          * All our DMA I/O operations in IOKit are currently done by
5994          * wiring through the map entries of the task requesting the I/O.
5995          * Because of this, we must always wait for kernel wirings
5996          * to go away on the entries before deleting them.
5997          *
5998          * Any caller who wants to actually remove a kernel wiring
5999          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6000          * properly remove one wiring instead of blasting through
6001          * them all.
6002          */
6003         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6004
6005         while(1) {
6006                 /*
6007                  *      Find the start of the region, and clip it
6008                  */
6009                 if (vm_map_lookup_entry(map, start, &first_entry)) {
6010                         entry = first_entry;
6011                         if (map == kalloc_map &&
6012                             (entry->vme_start != start ||
6013                              entry->vme_end != end)) {
6014                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6015                                       "mismatched entry %p [0x%llx:0x%llx]\n",
6016                                       map,
6017                                       (uint64_t)start,
6018                                       (uint64_t)end,
6019                                       entry,
6020                                       (uint64_t)entry->vme_start,
6021                                       (uint64_t)entry->vme_end);
6022                         }
6023                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
6024                                 start = SUPERPAGE_ROUND_DOWN(start);
6025                                 continue;
6026                         }
6027                         if (start == entry->vme_start) {
6028                                 /*
6029                                  * No need to clip.  We don't want to cause
6030                                  * any unnecessary unnesting in this case...
6031                                  */
6032                         } else {
6033                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6034                                     entry->map_aligned &&
6035                                     !VM_MAP_PAGE_ALIGNED(
6036                                             start,
6037                                             VM_MAP_PAGE_MASK(map))) {
6038                                         /*
6039                                          * The entry will no longer be
6040                                          * map-aligned after clipping
6041                                          * and the caller said it's OK.
6042                                          */
6043                                         entry->map_aligned = FALSE;
6044                                 }
6045                                 if (map == kalloc_map) {
6046                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
6047                                               " clipping %p at 0x%llx\n",
6048                                               map,
6049                                               (uint64_t)start,
6050                                               (uint64_t)end,
6051                                               entry,
6052                                               (uint64_t)start);
6053                                 }
6054                                 vm_map_clip_start(map, entry, start);
6055                         }
6056
6057                         /*
6058                          *      Fix the lookup hint now, rather than each
6059                          *      time through the loop.
6060                          */
6061                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6062                 } else {
6063                         if (map->pmap == kernel_pmap &&
6064                             map->ref_count != 0) {
6065                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6066                                       "no map entry at 0x%llx\n",
6067                                       map,
6068                                       (uint64_t)start,
6069                                       (uint64_t)end,
6070                                       (uint64_t)start);
6071                         }
6072                         entry = first_entry->vme_next;
6073                 }
6074                 break;
6075         }
6076         if (entry->superpage_size)
6077                 end = SUPERPAGE_ROUND_UP(end);
6078
6079         need_wakeup = FALSE;
6080         /*
6081          *      Step through all entries in this region
6082          */
6083         s = entry->vme_start;
6084         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6085                 /*
6086                  * At this point, we have deleted all the memory entries
6087                  * between "start" and "s".  We still need to delete
6088                  * all memory entries between "s" and "end".
6089                  * While we were blocked and the map was unlocked, some
6090                  * new memory entries could have been re-allocated between
6091                  * "start" and "s" and we don't want to mess with those.
6092                  * Some of those entries could even have been re-assembled
6093                  * with an entry after "s" (in vm_map_simplify_entry()), so
6094                  * we may have to vm_map_clip_start() again.
6095                  */
6096
6097                 if (entry->vme_start >= s) {
6098                         /*
6099                          * This entry starts on or after "s"
6100                          * so no need to clip its start.
6101                          */
6102                 } else {
6103                         /*
6104                          * This entry has been re-assembled by a
6105                          * vm_map_simplify_entry().  We need to
6106                          * re-clip its start.
6107                          */
6108                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6109                             entry->map_aligned &&
6110                             !VM_MAP_PAGE_ALIGNED(s,
6111                                                  VM_MAP_PAGE_MASK(map))) {
6112                                 /*
6113                                  * The entry will no longer be map-aligned
6114                                  * after clipping and the caller said it's OK.
6115                                  */
6116                                 entry->map_aligned = FALSE;
6117                         }
6118                         if (map == kalloc_map) {
6119                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6120                                       "clipping %p at 0x%llx\n",
6121                                       map,
6122                                       (uint64_t)start,
6123                                       (uint64_t)end,
6124                                       entry,
6125                                       (uint64_t)s);
6126                         }
6127                         vm_map_clip_start(map, entry, s);
6128                 }
6129                 if (entry->vme_end <= end) {
6130                         /*
6131                          * This entry is going away completely, so no need
6132                          * to clip and possibly cause an unnecessary unnesting.
6133                          */
6134                 } else {
6135                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6136                             entry->map_aligned &&
6137                             !VM_MAP_PAGE_ALIGNED(end,
6138                                                  VM_MAP_PAGE_MASK(map))) {
6139                                 /*
6140                                  * The entry will no longer be map-aligned
6141                                  * after clipping and the caller said it's OK.
6142                                  */
6143                                 entry->map_aligned = FALSE;
6144                         }
6145                         if (map == kalloc_map) {
6146                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6147                                       "clipping %p at 0x%llx\n",
6148                                       map,
6149                                       (uint64_t)start,
6150                                       (uint64_t)end,
6151                                       entry,
6152                                       (uint64_t)end);
6153                         }
6154                         vm_map_clip_end(map, entry, end);
6155                 }
6156
6157                 if (entry->permanent) {
6158                         panic("attempt to remove permanent VM map entry "
6159                               "%p [0x%llx:0x%llx]\n",
6160                               entry, (uint64_t) s, (uint64_t) end);
6161                 }
6162
6163
6164                 if (entry->in_transition) {
6165                         wait_result_t wait_result;
6166
6167                         /*
6168                          * Another thread is wiring/unwiring this entry.
6169                          * Let the other thread know we are waiting.
6170                          */
6171                         assert(s == entry->vme_start);
6172                         entry->needs_wakeup = TRUE;
6173
6174                         /*
6175                          * wake up anybody waiting on entries that we have
6176                          * already unwired/deleted.
6177                          */
6178                         if (need_wakeup) {
6179                                 vm_map_entry_wakeup(map);
6180                                 need_wakeup = FALSE;
6181                         }
6182
6183                         wait_result = vm_map_entry_wait(map, interruptible);
6184
6185                         if (interruptible &&
6186                             wait_result == THREAD_INTERRUPTED) {
6187                                 /*
6188                                  * We do not clear the needs_wakeup flag,
6189                                  * since we cannot tell if we were the only one.
6190                                  */
6191                                 return KERN_ABORTED;
6192                         }
6193
6194                         /*
6195                          * The entry could have been clipped or it
6196                          * may not exist anymore.  Look it up again.
6197                          */
6198                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6199                                 /*
6200                                  * User: use the next entry
6201                                  */
6202                                 entry = first_entry->vme_next;
6203                                 s = entry->vme_start;
6204                         } else {
6205                                 entry = first_entry;
6206                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6207                         }
6208                         last_timestamp = map->timestamp;
6209                         continue;
6210                 } /* end in_transition */
6211
6212                 if (entry->wired_count) {
6213                         boolean_t       user_wire;
6214
6215                         user_wire = entry->user_wired_count > 0;
6216
6217                         /*
6218                          *      Remove a kernel wiring if requested
6219                          */
6220                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
6221                                 entry->wired_count--;
6222                         }
6223
6224                         /*
6225                          *      Remove all user wirings for proper accounting
6226                          */
6227                         if (entry->user_wired_count > 0) {
6228                                 while (entry->user_wired_count)
6229                                         subtract_wire_counts(map, entry, user_wire);
6230                         }
6231
6232                         if (entry->wired_count != 0) {
6233                                 assert(map != kernel_map);
6234                                 /*
6235                                  * Cannot continue.  Typical case is when
6236                                  * a user thread has physical io pending on
6237                                  * on this page.  Either wait for the
6238                                  * kernel wiring to go away or return an
6239                                  * error.
6240                                  */
6241                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
6242                                         wait_result_t wait_result;
6243
6244                                         assert(s == entry->vme_start);
6245                                         entry->needs_wakeup = TRUE;
6246                                         wait_result = vm_map_entry_wait(map,
6247                                                                         interruptible);
6248
6249                                         if (interruptible &&
6250                                             wait_result == THREAD_INTERRUPTED) {
6251                                                 /*
6252                                                  * We do not clear the
6253                                                  * needs_wakeup flag, since we
6254                                                  * cannot tell if we were the
6255                                                  * only one.
6256                                                  */
6257                                                 return KERN_ABORTED;
6258                                         }
6259
6260                                         /*
6261                                          * The entry could have been clipped or
6262                                          * it may not exist anymore.  Look it
6263                                          * up again.
6264                                          */
6265                                         if (!vm_map_lookup_entry(map, s,
6266                                                                  &first_entry)) {
6267                                                 assert(map != kernel_map);
6268                                                 /*
6269                                                  * User: use the next entry
6270                                                  */
6271                                                 entry = first_entry->vme_next;
6272                                                 s = entry->vme_start;
6273                                         } else {
6274                                                 entry = first_entry;
6275                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6276                                         }
6277                                         last_timestamp = map->timestamp;
6278                                         continue;
6279                                 }
6280                                 else {
6281                                         return KERN_FAILURE;
6282                                 }
6283                         }
6284
6285                         entry->in_transition = TRUE;
6286                         /*
6287                          * copy current entry.  see comment in vm_map_wire()
6288                          */
6289                         tmp_entry = *entry;
6290                         assert(s == entry->vme_start);
6291
6292                         /*
6293                          * We can unlock the map now. The in_transition
6294                          * state guarentees existance of the entry.
6295                          */
6296                         vm_map_unlock(map);
6297
6298                         if (tmp_entry.is_sub_map) {
6299                                 vm_map_t sub_map;
6300                                 vm_map_offset_t sub_start, sub_end;
6301                                 pmap_t pmap;
6302                                 vm_map_offset_t pmap_addr;
6303
6304
6305                                 sub_map = VME_SUBMAP(&tmp_entry);
6306                                 sub_start = VME_OFFSET(&tmp_entry);
6307                                 sub_end = sub_start + (tmp_entry.vme_end -
6308                                                        tmp_entry.vme_start);
6309                                 if (tmp_entry.use_pmap) {
6310                                         pmap = sub_map->pmap;
6311                                         pmap_addr = tmp_entry.vme_start;
6312                                 } else {
6313                                         pmap = map->pmap;
6314                                         pmap_addr = tmp_entry.vme_start;
6315                                 }
6316                                 (void) vm_map_unwire_nested(sub_map,
6317                                                             sub_start, sub_end,
6318                                                             user_wire,
6319                                                             pmap, pmap_addr);
6320                         } else {
6321
6322                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
6323                                         pmap_protect_options(
6324                                                 map->pmap,
6325                                                 tmp_entry.vme_start,
6326                                                 tmp_entry.vme_end,
6327                                                 VM_PROT_NONE,
6328                                                 PMAP_OPTIONS_REMOVE,
6329                                                 NULL);
6330                                 }
6331                                 vm_fault_unwire(map, &tmp_entry,
6332                                                 VME_OBJECT(&tmp_entry) == kernel_object,
6333                                                 map->pmap, tmp_entry.vme_start);
6334                         }
6335
6336                         vm_map_lock(map);
6337
6338                         if (last_timestamp+1 != map->timestamp) {
6339                                 /*
6340                                  * Find the entry again.  It could have
6341                                  * been clipped after we unlocked the map.
6342                                  */
6343                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
6344                                         assert((map != kernel_map) &&
6345                                                (!entry->is_sub_map));
6346                                         first_entry = first_entry->vme_next;
6347                                         s = first_entry->vme_start;
6348                                 } else {
6349                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6350                                 }
6351                         } else {
6352                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6353                                 first_entry = entry;
6354                         }
6355
6356                         last_timestamp = map->timestamp;
6357
6358                         entry = first_entry;
6359                         while ((entry != vm_map_to_entry(map)) &&
6360                                (entry->vme_start < tmp_entry.vme_end)) {
6361                                 assert(entry->in_transition);
6362                                 entry->in_transition = FALSE;
6363                                 if (entry->needs_wakeup) {
6364                                         entry->needs_wakeup = FALSE;
6365                                         need_wakeup = TRUE;
6366                                 }
6367                                 entry = entry->vme_next;
6368                         }
6369                         /*
6370                          * We have unwired the entry(s).  Go back and
6371                          * delete them.
6372                          */
6373                         entry = first_entry;
6374                         continue;
6375                 }
6376
6377                 /* entry is unwired */
6378                 assert(entry->wired_count == 0);
6379                 assert(entry->user_wired_count == 0);
6380
6381                 assert(s == entry->vme_start);
6382
6383                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6384                         /*
6385                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6386                          * vm_map_delete(), some map entries might have been
6387                          * transferred to a "zap_map", which doesn't have a
6388                          * pmap.  The original pmap has already been flushed
6389                          * in the vm_map_delete() call targeting the original
6390                          * map, but when we get to destroying the "zap_map",
6391                          * we don't have any pmap to flush, so let's just skip
6392                          * all this.
6393                          */
6394                 } else if (entry->is_sub_map) {
6395                         if (entry->use_pmap) {
6396 #ifndef NO_NESTED_PMAP
6397                                 int pmap_flags;
6398
6399                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6400                                         /*
6401                                          * This is the final cleanup of the
6402                                          * address space being terminated.
6403                                          * No new mappings are expected and
6404                                          * we don't really need to unnest the
6405                                          * shared region (and lose the "global"
6406                                          * pmap mappings, if applicable).
6407                                          *
6408                                          * Tell the pmap layer that we're
6409                                          * "clean" wrt nesting.
6410                                          */
6411                                         pmap_flags = PMAP_UNNEST_CLEAN;
6412                                 } else {
6413                                         /*
6414                                          * We're unmapping part of the nested
6415                                          * shared region, so we can't keep the
6416                                          * nested pmap.
6417                                          */
6418                                         pmap_flags = 0;
6419                                 }
6420                                 pmap_unnest_options(
6421                                         map->pmap,
6422                                         (addr64_t)entry->vme_start,
6423                                         entry->vme_end - entry->vme_start,
6424                                         pmap_flags);
6425 #endif  /* NO_NESTED_PMAP */
6426                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6427                                         /* clean up parent map/maps */
6428                                         vm_map_submap_pmap_clean(
6429                                                 map, entry->vme_start,
6430                                                 entry->vme_end,
6431                                                 VME_SUBMAP(entry),
6432                                                 VME_OFFSET(entry));
6433                                 }
6434                         } else {
6435                                 vm_map_submap_pmap_clean(
6436                                         map, entry->vme_start, entry->vme_end,
6437                                         VME_SUBMAP(entry),
6438                                         VME_OFFSET(entry));
6439                         }
6440                 } else if (VME_OBJECT(entry) != kernel_object &&
6441                            VME_OBJECT(entry) != compressor_object) {
6442                         object = VME_OBJECT(entry);
6443                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6444                                 vm_object_pmap_protect_options(
6445                                         object, VME_OFFSET(entry),
6446                                         entry->vme_end - entry->vme_start,
6447                                         PMAP_NULL,
6448                                         entry->vme_start,
6449                                         VM_PROT_NONE,
6450                                         PMAP_OPTIONS_REMOVE);
6451                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
6452                                    (map->pmap == kernel_pmap)) {
6453                                 /* Remove translations associated
6454                                  * with this range unless the entry
6455                                  * does not have an object, or
6456                                  * it's the kernel map or a descendant
6457                                  * since the platform could potentially
6458                                  * create "backdoor" mappings invisible
6459                                  * to the VM. It is expected that
6460                                  * objectless, non-kernel ranges
6461                                  * do not have such VM invisible
6462                                  * translations.
6463                                  */
6464                                 pmap_remove_options(map->pmap,
6465                                                     (addr64_t)entry->vme_start,
6466                                                     (addr64_t)entry->vme_end,
6467                                                     PMAP_OPTIONS_REMOVE);
6468                         }
6469                 }
6470
6471                 if (entry->iokit_acct) {
6472                         /* alternate accounting */
6473                         DTRACE_VM4(vm_map_iokit_unmapped_region,
6474                                    vm_map_t, map,
6475                                    vm_map_offset_t, entry->vme_start,
6476                                    vm_map_offset_t, entry->vme_end,
6477                                    int, VME_ALIAS(entry));
6478                         vm_map_iokit_unmapped_region(map,
6479                                                      (entry->vme_end -
6480                                                       entry->vme_start));
6481                         entry->iokit_acct = FALSE;
6482                 }
6483
6484                 /*
6485                  * All pmap mappings for this map entry must have been
6486                  * cleared by now.
6487                  */
6488 #if DEBUG
6489                 assert(vm_map_pmap_is_empty(map,
6490                                             entry->vme_start,
6491                                             entry->vme_end));
6492 #endif /* DEBUG */
6493
6494                 next = entry->vme_next;
6495
6496                 if (map->pmap == kernel_pmap &&
6497                     map->ref_count != 0 &&
6498                     entry->vme_end < end &&
6499                     (next == vm_map_to_entry(map) ||
6500                      next->vme_start != entry->vme_end)) {
6501                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
6502                               "hole after %p at 0x%llx\n",
6503                               map,
6504                               (uint64_t)start,
6505                               (uint64_t)end,
6506                               entry,
6507                               (uint64_t)entry->vme_end);
6508                 }
6509
6510                 s = next->vme_start;
6511                 last_timestamp = map->timestamp;
6512
6513                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6514                     zap_map != VM_MAP_NULL) {
6515                         vm_map_size_t entry_size;
6516                         /*
6517                          * The caller wants to save the affected VM map entries
6518                          * into the "zap_map".  The caller will take care of
6519                          * these entries.
6520                          */
6521                         /* unlink the entry from "map" ... */
6522                         vm_map_store_entry_unlink(map, entry);
6523                         /* ... and add it to the end of the "zap_map" */
6524                         vm_map_store_entry_link(zap_map,
6525                                           vm_map_last_entry(zap_map),
6526                                           entry);
6527                         entry_size = entry->vme_end - entry->vme_start;
6528                         map->size -= entry_size;
6529                         zap_map->size += entry_size;
6530                         /* we didn't unlock the map, so no timestamp increase */
6531                         last_timestamp--;
6532                 } else {
6533                         vm_map_entry_delete(map, entry);
6534                         /* vm_map_entry_delete unlocks the map */
6535                         vm_map_lock(map);
6536                 }
6537
6538                 entry = next;
6539
6540                 if(entry == vm_map_to_entry(map)) {
6541                         break;
6542                 }
6543                 if (last_timestamp+1 != map->timestamp) {
6544                         /*
6545                          * we are responsible for deleting everything
6546                          * from the give space, if someone has interfered
6547                          * we pick up where we left off, back fills should
6548                          * be all right for anyone except map_delete and
6549                          * we have to assume that the task has been fully
6550                          * disabled before we get here
6551                          */
6552                         if (!vm_map_lookup_entry(map, s, &entry)){
6553                                 entry = entry->vme_next;
6554                                 s = entry->vme_start;
6555                         } else {
6556                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6557                         }
6558                         /*
6559                          * others can not only allocate behind us, we can
6560                          * also see coalesce while we don't have the map lock
6561                          */
6562                         if(entry == vm_map_to_entry(map)) {
6563                                 break;
6564                         }
6565                 }
6566                 last_timestamp = map->timestamp;
6567         }
6568
6569         if (map->wait_for_space)
6570                 thread_wakeup((event_t) map);
6571         /*
6572          * wake up anybody waiting on entries that we have already deleted.
6573          */
6574         if (need_wakeup)
6575                 vm_map_entry_wakeup(map);
6576
6577         return KERN_SUCCESS;
6578 }
6579
6580 /*
6581  *      vm_map_remove:
6582  *
6583  *      Remove the given address range from the target map.
6584  *      This is the exported form of vm_map_delete.
6585  */
6586 kern_return_t
6587 vm_map_remove(
6588         register vm_map_t       map,
6589         register vm_map_offset_t        start,
6590         register vm_map_offset_t        end,
6591         register boolean_t      flags)
6592 {
6593         register kern_return_t  result;
6594
6595         vm_map_lock(map);
6596         VM_MAP_RANGE_CHECK(map, start, end);
6597         /*
6598          * For the zone_map, the kernel controls the allocation/freeing of memory.
6599          * Any free to the zone_map should be within the bounds of the map and
6600          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6601          * free to the zone_map into a no-op, there is a problem and we should
6602          * panic.
6603          */
6604         if ((map == zone_map) && (start == end))
6605                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6606         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6607         vm_map_unlock(map);
6608
6609         return(result);
6610 }
6611
6612
6613 /*
6614  *      Routine:        vm_map_copy_discard
6615  *
6616  *      Description:
6617  *              Dispose of a map copy object (returned by
6618  *              vm_map_copyin).
6619  */
6620 void
6621 vm_map_copy_discard(
6622         vm_map_copy_t   copy)
6623 {
6624         if (copy == VM_MAP_COPY_NULL)
6625                 return;
6626
6627         switch (copy->type) {
6628         case VM_MAP_COPY_ENTRY_LIST:
6629                 while (vm_map_copy_first_entry(copy) !=
6630                        vm_map_copy_to_entry(copy)) {
6631                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
6632
6633                         vm_map_copy_entry_unlink(copy, entry);
6634                         if (entry->is_sub_map) {
6635                                 vm_map_deallocate(VME_SUBMAP(entry));
6636                         } else {
6637                                 vm_object_deallocate(VME_OBJECT(entry));
6638                         }
6639                         vm_map_copy_entry_dispose(copy, entry);
6640                 }
6641                 break;
6642         case VM_MAP_COPY_OBJECT:
6643                 vm_object_deallocate(copy->cpy_object);
6644                 break;
6645         case VM_MAP_COPY_KERNEL_BUFFER:
6646
6647                 /*
6648                  * The vm_map_copy_t and possibly the data buffer were
6649                  * allocated by a single call to kalloc(), i.e. the
6650                  * vm_map_copy_t was not allocated out of the zone.
6651                  */
6652                 if (copy->size > msg_ool_size_small || copy->offset)
6653                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6654                               (long long)copy->size, (long long)copy->offset);
6655                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
6656                 return;
6657         }
6658         zfree(vm_map_copy_zone, copy);
6659 }
6660
6661 /*
6662  *      Routine:        vm_map_copy_copy
6663  *
6664  *      Description:
6665  *                      Move the information in a map copy object to
6666  *                      a new map copy object, leaving the old one
6667  *                      empty.
6668  *
6669  *                      This is used by kernel routines that need
6670  *                      to look at out-of-line data (in copyin form)
6671  *                      before deciding whether to return SUCCESS.
6672  *                      If the routine returns FAILURE, the original
6673  *                      copy object will be deallocated; therefore,
6674  *                      these routines must make a copy of the copy
6675  *                      object and leave the original empty so that
6676  *                      deallocation will not fail.
6677  */
6678 vm_map_copy_t
6679 vm_map_copy_copy(
6680         vm_map_copy_t   copy)
6681 {
6682         vm_map_copy_t   new_copy;
6683
6684         if (copy == VM_MAP_COPY_NULL)
6685                 return VM_MAP_COPY_NULL;
6686
6687         /*
6688          * Allocate a new copy object, and copy the information
6689          * from the old one into it.
6690          */
6691
6692         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6693         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6694         *new_copy = *copy;
6695
6696         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6697                 /*
6698                  * The links in the entry chain must be
6699                  * changed to point to the new copy object.
6700                  */
6701                 vm_map_copy_first_entry(copy)->vme_prev
6702                         = vm_map_copy_to_entry(new_copy);
6703                 vm_map_copy_last_entry(copy)->vme_next
6704                         = vm_map_copy_to_entry(new_copy);
6705         }
6706
6707         /*
6708          * Change the old copy object into one that contains
6709          * nothing to be deallocated.
6710          */
6711         copy->type = VM_MAP_COPY_OBJECT;
6712         copy->cpy_object = VM_OBJECT_NULL;
6713
6714         /*
6715          * Return the new object.
6716          */
6717         return new_copy;
6718 }
6719
6720 static kern_return_t
6721 vm_map_overwrite_submap_recurse(
6722         vm_map_t        dst_map,
6723         vm_map_offset_t dst_addr,
6724         vm_map_size_t   dst_size)
6725 {
6726         vm_map_offset_t dst_end;
6727         vm_map_entry_t  tmp_entry;
6728         vm_map_entry_t  entry;
6729         kern_return_t   result;
6730         boolean_t       encountered_sub_map = FALSE;
6731
6732
6733
6734         /*
6735          *      Verify that the destination is all writeable
6736          *      initially.  We have to trunc the destination
6737          *      address and round the copy size or we'll end up
6738          *      splitting entries in strange ways.
6739          */
6740
6741         dst_end = vm_map_round_page(dst_addr + dst_size,
6742                                     VM_MAP_PAGE_MASK(dst_map));
6743         vm_map_lock(dst_map);
6744
6745 start_pass_1:
6746         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6747                 vm_map_unlock(dst_map);
6748                 return(KERN_INVALID_ADDRESS);
6749         }
6750
6751         vm_map_clip_start(dst_map,
6752                           tmp_entry,
6753                           vm_map_trunc_page(dst_addr,
6754                                             VM_MAP_PAGE_MASK(dst_map)));
6755         if (tmp_entry->is_sub_map) {
6756                 /* clipping did unnest if needed */
6757                 assert(!tmp_entry->use_pmap);
6758         }
6759
6760         for (entry = tmp_entry;;) {
6761                 vm_map_entry_t  next;
6762
6763                 next = entry->vme_next;
6764                 while(entry->is_sub_map) {
6765                         vm_map_offset_t sub_start;
6766                         vm_map_offset_t sub_end;
6767                         vm_map_offset_t local_end;
6768
6769                         if (entry->in_transition) {
6770                                 /*
6771                                  * Say that we are waiting, and wait for entry.
6772                                  */
6773                                 entry->needs_wakeup = TRUE;
6774                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6775
6776                                 goto start_pass_1;
6777                         }
6778
6779                         encountered_sub_map = TRUE;
6780                         sub_start = VME_OFFSET(entry);
6781
6782                         if(entry->vme_end < dst_end)
6783                                 sub_end = entry->vme_end;
6784                         else
6785                                 sub_end = dst_end;
6786                         sub_end -= entry->vme_start;
6787                         sub_end += VME_OFFSET(entry);
6788                         local_end = entry->vme_end;
6789                         vm_map_unlock(dst_map);
6790
6791                         result = vm_map_overwrite_submap_recurse(
6792                                 VME_SUBMAP(entry),
6793                                 sub_start,
6794                                 sub_end - sub_start);
6795
6796                         if(result != KERN_SUCCESS)
6797                                 return result;
6798                         if (dst_end <= entry->vme_end)
6799                                 return KERN_SUCCESS;
6800                         vm_map_lock(dst_map);
6801                         if(!vm_map_lookup_entry(dst_map, local_end,
6802                                                 &tmp_entry)) {
6803                                 vm_map_unlock(dst_map);
6804                                 return(KERN_INVALID_ADDRESS);
6805                         }
6806                         entry = tmp_entry;
6807                         next = entry->vme_next;
6808                 }
6809
6810                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6811                         vm_map_unlock(dst_map);
6812                         return(KERN_PROTECTION_FAILURE);
6813                 }
6814
6815                 /*
6816                  *      If the entry is in transition, we must wait
6817                  *      for it to exit that state.  Anything could happen
6818                  *      when we unlock the map, so start over.
6819                  */
6820                 if (entry->in_transition) {
6821
6822                         /*
6823                          * Say that we are waiting, and wait for entry.
6824                          */
6825                         entry->needs_wakeup = TRUE;
6826                         vm_map_entry_wait(dst_map, THREAD_UNINT);
6827
6828                         goto start_pass_1;
6829                 }
6830
6831 /*
6832  *              our range is contained completely within this map entry
6833  */
6834                 if (dst_end <= entry->vme_end) {
6835                         vm_map_unlock(dst_map);
6836                         return KERN_SUCCESS;
6837                 }
6838 /*
6839  *              check that range specified is contiguous region
6840  */
6841                 if ((next == vm_map_to_entry(dst_map)) ||
6842                     (next->vme_start != entry->vme_end)) {
6843                         vm_map_unlock(dst_map);
6844                         return(KERN_INVALID_ADDRESS);
6845                 }
6846
6847                 /*
6848                  *      Check for permanent objects in the destination.
6849                  */
6850                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6851                     ((!VME_OBJECT(entry)->internal) ||
6852                      (VME_OBJECT(entry)->true_share))) {
6853                         if(encountered_sub_map) {
6854                                 vm_map_unlock(dst_map);
6855                                 return(KERN_FAILURE);
6856                         }
6857                 }
6858
6859
6860                 entry = next;
6861         }/* for */
6862         vm_map_unlock(dst_map);
6863         return(KERN_SUCCESS);
6864 }
6865
6866 /*
6867  *      Routine:        vm_map_copy_overwrite
6868  *
6869  *      Description:
6870  *              Copy the memory described by the map copy
6871  *              object (copy; returned by vm_map_copyin) onto
6872  *              the specified destination region (dst_map, dst_addr).
6873  *              The destination must be writeable.
6874  *
6875  *              Unlike vm_map_copyout, this routine actually
6876  *              writes over previously-mapped memory.  If the
6877  *              previous mapping was to a permanent (user-supplied)
6878  *              memory object, it is preserved.
6879  *
6880  *              The attributes (protection and inheritance) of the
6881  *              destination region are preserved.
6882  *
6883  *              If successful, consumes the copy object.
6884  *              Otherwise, the caller is responsible for it.
6885  *
6886  *      Implementation notes:
6887  *              To overwrite aligned temporary virtual memory, it is
6888  *              sufficient to remove the previous mapping and insert
6889  *              the new copy.  This replacement is done either on
6890  *              the whole region (if no permanent virtual memory
6891  *              objects are embedded in the destination region) or
6892  *              in individual map entries.
6893  *
6894  *              To overwrite permanent virtual memory , it is necessary
6895  *              to copy each page, as the external memory management
6896  *              interface currently does not provide any optimizations.
6897  *
6898  *              Unaligned memory also has to be copied.  It is possible
6899  *              to use 'vm_trickery' to copy the aligned data.  This is
6900  *              not done but not hard to implement.
6901  *
6902  *              Once a page of permanent memory has been overwritten,
6903  *              it is impossible to interrupt this function; otherwise,
6904  *              the call would be neither atomic nor location-independent.
6905  *              The kernel-state portion of a user thread must be
6906  *              interruptible.
6907  *
6908  *              It may be expensive to forward all requests that might
6909  *              overwrite permanent memory (vm_write, vm_copy) to
6910  *              uninterruptible kernel threads.  This routine may be
6911  *              called by interruptible threads; however, success is
6912  *              not guaranteed -- if the request cannot be performed
6913  *              atomically and interruptibly, an error indication is
6914  *              returned.
6915  */
6916
6917 static kern_return_t
6918 vm_map_copy_overwrite_nested(
6919         vm_map_t                dst_map,
6920         vm_map_address_t        dst_addr,
6921         vm_map_copy_t           copy,
6922         boolean_t               interruptible,
6923         pmap_t                  pmap,
6924         boolean_t               discard_on_success)
6925 {
6926         vm_map_offset_t         dst_end;
6927         vm_map_entry_t          tmp_entry;
6928         vm_map_entry_t          entry;
6929         kern_return_t           kr;
6930         boolean_t               aligned = TRUE;
6931         boolean_t               contains_permanent_objects = FALSE;
6932         boolean_t               encountered_sub_map = FALSE;
6933         vm_map_offset_t         base_addr;
6934         vm_map_size_t           copy_size;
6935         vm_map_size_t           total_size;
6936
6937
6938         /*
6939          *      Check for null copy object.
6940          */
6941
6942         if (copy == VM_MAP_COPY_NULL)
6943                 return(KERN_SUCCESS);
6944
6945         /*
6946          *      Check for special kernel buffer allocated
6947          *      by new_ipc_kmsg_copyin.
6948          */
6949
6950         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6951                 return(vm_map_copyout_kernel_buffer(
6952                                dst_map, &dst_addr,
6953                                copy, TRUE, discard_on_success));
6954         }
6955
6956         /*
6957          *      Only works for entry lists at the moment.  Will
6958          *      support page lists later.
6959          */
6960
6961         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6962
6963         if (copy->size == 0) {
6964                 if (discard_on_success)
6965                         vm_map_copy_discard(copy);
6966                 return(KERN_SUCCESS);
6967         }
6968
6969         /*
6970          *      Verify that the destination is all writeable
6971          *      initially.  We have to trunc the destination
6972          *      address and round the copy size or we'll end up
6973          *      splitting entries in strange ways.
6974          */
6975
6976         if (!VM_MAP_PAGE_ALIGNED(copy->size,
6977                                  VM_MAP_PAGE_MASK(dst_map)) ||
6978             !VM_MAP_PAGE_ALIGNED(copy->offset,
6979                                  VM_MAP_PAGE_MASK(dst_map)) ||
6980             !VM_MAP_PAGE_ALIGNED(dst_addr,
6981                                  VM_MAP_PAGE_MASK(dst_map)))
6982         {
6983                 aligned = FALSE;
6984                 dst_end = vm_map_round_page(dst_addr + copy->size,
6985                                             VM_MAP_PAGE_MASK(dst_map));
6986         } else {
6987                 dst_end = dst_addr + copy->size;
6988         }
6989
6990         vm_map_lock(dst_map);
6991
6992         /* LP64todo - remove this check when vm_map_commpage64()
6993          * no longer has to stuff in a map_entry for the commpage
6994          * above the map's max_offset.
6995          */
6996         if (dst_addr >= dst_map->max_offset) {
6997                 vm_map_unlock(dst_map);
6998                 return(KERN_INVALID_ADDRESS);
6999         }
7000
7001 start_pass_1:
7002         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7003                 vm_map_unlock(dst_map);
7004                 return(KERN_INVALID_ADDRESS);
7005         }
7006         vm_map_clip_start(dst_map,
7007                           tmp_entry,
7008                           vm_map_trunc_page(dst_addr,
7009                                             VM_MAP_PAGE_MASK(dst_map)));
7010         for (entry = tmp_entry;;) {
7011                 vm_map_entry_t  next = entry->vme_next;
7012
7013                 while(entry->is_sub_map) {
7014                         vm_map_offset_t sub_start;
7015                         vm_map_offset_t sub_end;
7016                         vm_map_offset_t local_end;
7017
7018                         if (entry->in_transition) {
7019
7020                                 /*
7021                                  * Say that we are waiting, and wait for entry.
7022                                  */
7023                                 entry->needs_wakeup = TRUE;
7024                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7025
7026                                 goto start_pass_1;
7027                         }
7028
7029                         local_end = entry->vme_end;
7030                         if (!(entry->needs_copy)) {
7031                                 /* if needs_copy we are a COW submap */
7032                                 /* in such a case we just replace so */
7033                                 /* there is no need for the follow-  */
7034                                 /* ing check.                        */
7035                                 encountered_sub_map = TRUE;
7036                                 sub_start = VME_OFFSET(entry);
7037
7038                                 if(entry->vme_end < dst_end)
7039                                         sub_end = entry->vme_end;
7040                                 else
7041                                         sub_end = dst_end;
7042                                 sub_end -= entry->vme_start;
7043                                 sub_end += VME_OFFSET(entry);
7044                                 vm_map_unlock(dst_map);
7045
7046                                 kr = vm_map_overwrite_submap_recurse(
7047                                         VME_SUBMAP(entry),
7048                                         sub_start,
7049                                         sub_end - sub_start);
7050                                 if(kr != KERN_SUCCESS)
7051                                         return kr;
7052                                 vm_map_lock(dst_map);
7053                         }
7054
7055                         if (dst_end <= entry->vme_end)
7056                                 goto start_overwrite;
7057                         if(!vm_map_lookup_entry(dst_map, local_end,
7058                                                 &entry)) {
7059                                 vm_map_unlock(dst_map);
7060                                 return(KERN_INVALID_ADDRESS);
7061                         }
7062                         next = entry->vme_next;
7063                 }
7064
7065                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7066                         vm_map_unlock(dst_map);
7067                         return(KERN_PROTECTION_FAILURE);
7068                 }
7069
7070                 /*
7071                  *      If the entry is in transition, we must wait
7072                  *      for it to exit that state.  Anything could happen
7073                  *      when we unlock the map, so start over.
7074                  */
7075                 if (entry->in_transition) {
7076
7077                         /*
7078                          * Say that we are waiting, and wait for entry.
7079                          */
7080                         entry->needs_wakeup = TRUE;
7081                         vm_map_entry_wait(dst_map, THREAD_UNINT);
7082
7083                         goto start_pass_1;
7084                 }
7085
7086 /*
7087  *              our range is contained completely within this map entry
7088  */
7089                 if (dst_end <= entry->vme_end)
7090                         break;
7091 /*
7092  *              check that range specified is contiguous region
7093  */
7094                 if ((next == vm_map_to_entry(dst_map)) ||
7095                     (next->vme_start != entry->vme_end)) {
7096                         vm_map_unlock(dst_map);
7097                         return(KERN_INVALID_ADDRESS);
7098                 }
7099
7100
7101                 /*
7102                  *      Check for permanent objects in the destination.
7103                  */
7104                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7105                     ((!VME_OBJECT(entry)->internal) ||
7106                      (VME_OBJECT(entry)->true_share))) {
7107                         contains_permanent_objects = TRUE;
7108                 }
7109
7110                 entry = next;
7111         }/* for */
7112
7113 start_overwrite:
7114         /*
7115          *      If there are permanent objects in the destination, then
7116          *      the copy cannot be interrupted.
7117          */
7118
7119         if (interruptible && contains_permanent_objects) {
7120                 vm_map_unlock(dst_map);
7121                 return(KERN_FAILURE);   /* XXX */
7122         }
7123
7124         /*
7125          *
7126          *      Make a second pass, overwriting the data
7127          *      At the beginning of each loop iteration,
7128          *      the next entry to be overwritten is "tmp_entry"
7129          *      (initially, the value returned from the lookup above),
7130          *      and the starting address expected in that entry
7131          *      is "start".
7132          */
7133
7134         total_size = copy->size;
7135         if(encountered_sub_map) {
7136                 copy_size = 0;
7137                 /* re-calculate tmp_entry since we've had the map */
7138                 /* unlocked */
7139                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7140                         vm_map_unlock(dst_map);
7141                         return(KERN_INVALID_ADDRESS);
7142                 }
7143         } else {
7144                 copy_size = copy->size;
7145         }
7146
7147         base_addr = dst_addr;
7148         while(TRUE) {
7149                 /* deconstruct the copy object and do in parts */
7150                 /* only in sub_map, interruptable case */
7151                 vm_map_entry_t  copy_entry;
7152                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
7153                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
7154                 int             nentries;
7155                 int             remaining_entries = 0;
7156                 vm_map_offset_t new_offset = 0;
7157
7158                 for (entry = tmp_entry; copy_size == 0;) {
7159                         vm_map_entry_t  next;
7160
7161                         next = entry->vme_next;
7162
7163                         /* tmp_entry and base address are moved along */
7164                         /* each time we encounter a sub-map.  Otherwise */
7165                         /* entry can outpase tmp_entry, and the copy_size */
7166                         /* may reflect the distance between them */
7167                         /* if the current entry is found to be in transition */
7168                         /* we will start over at the beginning or the last */
7169                         /* encounter of a submap as dictated by base_addr */
7170                         /* we will zero copy_size accordingly. */
7171                         if (entry->in_transition) {
7172                                 /*
7173                                  * Say that we are waiting, and wait for entry.
7174                                  */
7175                                 entry->needs_wakeup = TRUE;
7176                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7177
7178                                 if(!vm_map_lookup_entry(dst_map, base_addr,
7179                                                         &tmp_entry)) {
7180                                         vm_map_unlock(dst_map);
7181                                         return(KERN_INVALID_ADDRESS);
7182                                 }
7183                                 copy_size = 0;
7184                                 entry = tmp_entry;
7185                                 continue;
7186                         }
7187                         if(entry->is_sub_map) {
7188                                 vm_map_offset_t sub_start;
7189                                 vm_map_offset_t sub_end;
7190                                 vm_map_offset_t local_end;
7191
7192                                 if (entry->needs_copy) {
7193                                         /* if this is a COW submap */
7194                                         /* just back the range with a */
7195                                         /* anonymous entry */
7196                                         if(entry->vme_end < dst_end)
7197                                                 sub_end = entry->vme_end;
7198                                         else
7199                                                 sub_end = dst_end;
7200                                         if(entry->vme_start < base_addr)
7201                                                 sub_start = base_addr;
7202                                         else
7203                                                 sub_start = entry->vme_start;
7204                                         vm_map_clip_end(
7205                                                 dst_map, entry, sub_end);
7206                                         vm_map_clip_start(
7207                                                 dst_map, entry, sub_start);
7208                                         assert(!entry->use_pmap);
7209                                         entry->is_sub_map = FALSE;
7210                                         vm_map_deallocate(
7211                                                 VME_SUBMAP(entry));
7212                                         VME_SUBMAP_SET(entry, NULL);
7213                                         entry->is_shared = FALSE;
7214                                         entry->needs_copy = FALSE;
7215                                         VME_OFFSET_SET(entry, 0);
7216                                         /*
7217                                          * XXX FBDP
7218                                          * We should propagate the protections
7219                                          * of the submap entry here instead
7220                                          * of forcing them to VM_PROT_ALL...
7221                                          * Or better yet, we should inherit
7222                                          * the protection of the copy_entry.
7223                                          */
7224                                         entry->protection = VM_PROT_ALL;
7225                                         entry->max_protection = VM_PROT_ALL;
7226                                         entry->wired_count = 0;
7227                                         entry->user_wired_count = 0;
7228                                         if(entry->inheritance
7229                                            == VM_INHERIT_SHARE)
7230                                                 entry->inheritance = VM_INHERIT_COPY;
7231                                         continue;
7232                                 }
7233                                 /* first take care of any non-sub_map */
7234                                 /* entries to send */
7235                                 if(base_addr < entry->vme_start) {
7236                                         /* stuff to send */
7237                                         copy_size =
7238                                                 entry->vme_start - base_addr;
7239                                         break;
7240                                 }
7241                                 sub_start = VME_OFFSET(entry);
7242
7243                                 if(entry->vme_end < dst_end)
7244                                         sub_end = entry->vme_end;
7245                                 else
7246                                         sub_end = dst_end;
7247                                 sub_end -= entry->vme_start;
7248                                 sub_end += VME_OFFSET(entry);
7249                                 local_end = entry->vme_end;
7250                                 vm_map_unlock(dst_map);
7251                                 copy_size = sub_end - sub_start;
7252
7253                                 /* adjust the copy object */
7254                                 if (total_size > copy_size) {
7255                                         vm_map_size_t   local_size = 0;
7256                                         vm_map_size_t   entry_size;
7257
7258                                         nentries = 1;
7259                                         new_offset = copy->offset;
7260                                         copy_entry = vm_map_copy_first_entry(copy);
7261                                         while(copy_entry !=
7262                                               vm_map_copy_to_entry(copy)){
7263                                                 entry_size = copy_entry->vme_end -
7264                                                         copy_entry->vme_start;
7265                                                 if((local_size < copy_size) &&
7266                                                    ((local_size + entry_size)
7267                                                     >= copy_size)) {
7268                                                         vm_map_copy_clip_end(copy,
7269                                                                              copy_entry,
7270                                                                              copy_entry->vme_start +
7271                                                                              (copy_size - local_size));
7272                                                         entry_size = copy_entry->vme_end -
7273                                                                 copy_entry->vme_start;
7274                                                         local_size += entry_size;
7275                                                         new_offset += entry_size;
7276                                                 }
7277                                                 if(local_size >= copy_size) {
7278                                                         next_copy = copy_entry->vme_next;
7279                                                         copy_entry->vme_next =
7280                                                                 vm_map_copy_to_entry(copy);
7281                                                         previous_prev =
7282                                                                 copy->cpy_hdr.links.prev;
7283                                                         copy->cpy_hdr.links.prev = copy_entry;
7284                                                         copy->size = copy_size;
7285                                                         remaining_entries =
7286                                                                 copy->cpy_hdr.nentries;
7287                                                         remaining_entries -= nentries;
7288                                                         copy->cpy_hdr.nentries = nentries;
7289                                                         break;
7290                                                 } else {
7291                                                         local_size += entry_size;
7292                                                         new_offset += entry_size;
7293                                                         nentries++;
7294                                                 }
7295                                                 copy_entry = copy_entry->vme_next;
7296                                         }
7297                                 }
7298
7299                                 if((entry->use_pmap) && (pmap == NULL)) {
7300                                         kr = vm_map_copy_overwrite_nested(
7301                                                 VME_SUBMAP(entry),
7302                                                 sub_start,
7303                                                 copy,
7304                                                 interruptible,
7305                                                 VME_SUBMAP(entry)->pmap,
7306                                                 TRUE);
7307                                 } else if (pmap != NULL) {
7308                                         kr = vm_map_copy_overwrite_nested(
7309                                                 VME_SUBMAP(entry),
7310                                                 sub_start,
7311                                                 copy,
7312                                                 interruptible, pmap,
7313                                                 TRUE);
7314                                 } else {
7315                                         kr = vm_map_copy_overwrite_nested(
7316                                                 VME_SUBMAP(entry),
7317                                                 sub_start,
7318                                                 copy,
7319                                                 interruptible,
7320                                                 dst_map->pmap,
7321                                                 TRUE);
7322                                 }
7323                                 if(kr != KERN_SUCCESS) {
7324                                         if(next_copy != NULL) {
7325                                                 copy->cpy_hdr.nentries +=
7326                                                         remaining_entries;
7327                                                 copy->cpy_hdr.links.prev->vme_next =
7328                                                         next_copy;
7329                                                 copy->cpy_hdr.links.prev
7330                                                         = previous_prev;
7331                                                 copy->size = total_size;
7332                                         }
7333                                         return kr;
7334                                 }
7335                                 if (dst_end <= local_end) {
7336                                         return(KERN_SUCCESS);
7337                                 }
7338                                 /* otherwise copy no longer exists, it was */
7339                                 /* destroyed after successful copy_overwrite */
7340                                 copy = (vm_map_copy_t)
7341                                         zalloc(vm_map_copy_zone);
7342                                 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7343                                 vm_map_copy_first_entry(copy) =
7344                                         vm_map_copy_last_entry(copy) =
7345                                         vm_map_copy_to_entry(copy);
7346                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
7347                                 copy->offset = new_offset;
7348
7349                                 /*
7350                                  * XXX FBDP
7351                                  * this does not seem to deal with
7352                                  * the VM map store (R&B tree)
7353                                  */
7354
7355                                 total_size -= copy_size;
7356                                 copy_size = 0;
7357                                 /* put back remainder of copy in container */
7358                                 if(next_copy != NULL) {
7359                                         copy->cpy_hdr.nentries = remaining_entries;
7360                                         copy->cpy_hdr.links.next = next_copy;
7361                                         copy->cpy_hdr.links.prev = previous_prev;
7362                                         copy->size = total_size;
7363                                         next_copy->vme_prev =
7364                                                 vm_map_copy_to_entry(copy);
7365                                         next_copy = NULL;
7366                                 }
7367                                 base_addr = local_end;
7368                                 vm_map_lock(dst_map);
7369                                 if(!vm_map_lookup_entry(dst_map,
7370                                                         local_end, &tmp_entry)) {
7371                                         vm_map_unlock(dst_map);
7372                                         return(KERN_INVALID_ADDRESS);
7373                                 }
7374                                 entry = tmp_entry;
7375                                 continue;
7376                         }
7377                         if (dst_end <= entry->vme_end) {
7378                                 copy_size = dst_end - base_addr;
7379                                 break;
7380                         }
7381
7382                         if ((next == vm_map_to_entry(dst_map)) ||
7383                             (next->vme_start != entry->vme_end)) {
7384                                 vm_map_unlock(dst_map);
7385                                 return(KERN_INVALID_ADDRESS);
7386                         }
7387
7388                         entry = next;
7389                 }/* for */
7390
7391                 next_copy = NULL;
7392                 nentries = 1;
7393
7394                 /* adjust the copy object */
7395                 if (total_size > copy_size) {
7396                         vm_map_size_t   local_size = 0;
7397                         vm_map_size_t   entry_size;
7398
7399                         new_offset = copy->offset;
7400                         copy_entry = vm_map_copy_first_entry(copy);
7401                         while(copy_entry != vm_map_copy_to_entry(copy)) {
7402                                 entry_size = copy_entry->vme_end -
7403                                         copy_entry->vme_start;
7404                                 if((local_size < copy_size) &&
7405                                    ((local_size + entry_size)
7406                                     >= copy_size)) {
7407                                         vm_map_copy_clip_end(copy, copy_entry,
7408                                                              copy_entry->vme_start +
7409                                                              (copy_size - local_size));
7410                                         entry_size = copy_entry->vme_end -
7411                                                 copy_entry->vme_start;
7412                                         local_size += entry_size;
7413                                         new_offset += entry_size;
7414                                 }
7415                                 if(local_size >= copy_size) {
7416                                         next_copy = copy_entry->vme_next;
7417                                         copy_entry->vme_next =
7418                                                 vm_map_copy_to_entry(copy);
7419                                         previous_prev =
7420                                                 copy->cpy_hdr.links.prev;
7421                                         copy->cpy_hdr.links.prev = copy_entry;
7422                                         copy->size = copy_size;
7423                                         remaining_entries =
7424                                                 copy->cpy_hdr.nentries;
7425                                         remaining_entries -= nentries;
7426                                         copy->cpy_hdr.nentries = nentries;
7427                                         break;
7428                                 } else {
7429                                         local_size += entry_size;
7430                                         new_offset += entry_size;
7431                                         nentries++;
7432                                 }
7433                                 copy_entry = copy_entry->vme_next;
7434                         }
7435                 }
7436
7437                 if (aligned) {
7438                         pmap_t  local_pmap;
7439
7440                         if(pmap)
7441                                 local_pmap = pmap;
7442                         else
7443                                 local_pmap = dst_map->pmap;
7444
7445                         if ((kr =  vm_map_copy_overwrite_aligned(
7446                                      dst_map, tmp_entry, copy,
7447                                      base_addr, local_pmap)) != KERN_SUCCESS) {
7448                                 if(next_copy != NULL) {
7449                                         copy->cpy_hdr.nentries +=
7450                                                 remaining_entries;
7451                                         copy->cpy_hdr.links.prev->vme_next =
7452                                                 next_copy;
7453                                         copy->cpy_hdr.links.prev =
7454                                                 previous_prev;
7455                                         copy->size += copy_size;
7456                                 }
7457                                 return kr;
7458                         }
7459                         vm_map_unlock(dst_map);
7460                 } else {
7461                         /*
7462                          * Performance gain:
7463                          *
7464                          * if the copy and dst address are misaligned but the same
7465                          * offset within the page we can copy_not_aligned the
7466                          * misaligned parts and copy aligned the rest.  If they are
7467                          * aligned but len is unaligned we simply need to copy
7468                          * the end bit unaligned.  We'll need to split the misaligned
7469                          * bits of the region in this case !
7470                          */
7471                         /* ALWAYS UNLOCKS THE dst_map MAP */
7472                         kr = vm_map_copy_overwrite_unaligned(
7473                                 dst_map,
7474                                 tmp_entry,
7475                                 copy,
7476                                 base_addr,
7477                                 discard_on_success);
7478                         if (kr != KERN_SUCCESS) {
7479                                 if(next_copy != NULL) {
7480                                         copy->cpy_hdr.nentries +=
7481                                                 remaining_entries;
7482                                         copy->cpy_hdr.links.prev->vme_next =
7483                                                 next_copy;
7484                                         copy->cpy_hdr.links.prev =
7485                                                 previous_prev;
7486                                         copy->size += copy_size;
7487                                 }
7488                                 return kr;
7489                         }
7490                 }
7491                 total_size -= copy_size;
7492                 if(total_size == 0)
7493                         break;
7494                 base_addr += copy_size;
7495                 copy_size = 0;
7496                 copy->offset = new_offset;
7497                 if(next_copy != NULL) {
7498                         copy->cpy_hdr.nentries = remaining_entries;
7499                         copy->cpy_hdr.links.next = next_copy;
7500                         copy->cpy_hdr.links.prev = previous_prev;
7501                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
7502                         copy->size = total_size;
7503                 }
7504                 vm_map_lock(dst_map);
7505                 while(TRUE) {
7506                         if (!vm_map_lookup_entry(dst_map,
7507                                                  base_addr, &tmp_entry)) {
7508                                 vm_map_unlock(dst_map);
7509                                 return(KERN_INVALID_ADDRESS);
7510                         }
7511                         if (tmp_entry->in_transition) {
7512                                 entry->needs_wakeup = TRUE;
7513                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7514                         } else {
7515                                 break;
7516                         }
7517                 }
7518                 vm_map_clip_start(dst_map,
7519                                   tmp_entry,
7520                                   vm_map_trunc_page(base_addr,
7521                                                     VM_MAP_PAGE_MASK(dst_map)));
7522
7523                 entry = tmp_entry;
7524         } /* while */
7525
7526         /*
7527          *      Throw away the vm_map_copy object
7528          */
7529         if (discard_on_success)
7530                 vm_map_copy_discard(copy);
7531
7532         return(KERN_SUCCESS);
7533 }/* vm_map_copy_overwrite */
7534
7535 kern_return_t
7536 vm_map_copy_overwrite(
7537         vm_map_t        dst_map,
7538         vm_map_offset_t dst_addr,
7539         vm_map_copy_t   copy,
7540         boolean_t       interruptible)
7541 {
7542         vm_map_size_t   head_size, tail_size;
7543         vm_map_copy_t   head_copy, tail_copy;
7544         vm_map_offset_t head_addr, tail_addr;
7545         vm_map_entry_t  entry;
7546         kern_return_t   kr;
7547
7548         head_size = 0;
7549         tail_size = 0;
7550         head_copy = NULL;
7551         tail_copy = NULL;
7552         head_addr = 0;
7553         tail_addr = 0;
7554
7555         if (interruptible ||
7556             copy == VM_MAP_COPY_NULL ||
7557             copy->type != VM_MAP_COPY_ENTRY_LIST) {
7558                 /*
7559                  * We can't split the "copy" map if we're interruptible
7560                  * or if we don't have a "copy" map...
7561                  */
7562         blunt_copy:
7563                 return vm_map_copy_overwrite_nested(dst_map,
7564                                                     dst_addr,
7565                                                     copy,
7566                                                     interruptible,
7567                                                     (pmap_t) NULL,
7568                                                     TRUE);
7569         }
7570
7571         if (copy->size < 3 * PAGE_SIZE) {
7572                 /*
7573                  * Too small to bother with optimizing...
7574                  */
7575                 goto blunt_copy;
7576         }
7577
7578         if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7579             (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7580                 /*
7581                  * Incompatible mis-alignment of source and destination...
7582                  */
7583                 goto blunt_copy;
7584         }
7585
7586         /*
7587          * Proper alignment or identical mis-alignment at the beginning.
7588          * Let's try and do a small unaligned copy first (if needed)
7589          * and then an aligned copy for the rest.
7590          */
7591         if (!page_aligned(dst_addr)) {
7592                 head_addr = dst_addr;
7593                 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7594                              (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7595         }
7596         if (!page_aligned(copy->offset + copy->size)) {
7597                 /*
7598                  * Mis-alignment at the end.
7599                  * Do an aligned copy up to the last page and
7600                  * then an unaligned copy for the remaining bytes.
7601                  */
7602                 tail_size = ((copy->offset + copy->size) &
7603                              VM_MAP_PAGE_MASK(dst_map));
7604                 tail_addr = dst_addr + copy->size - tail_size;
7605         }
7606
7607         if (head_size + tail_size == copy->size) {
7608                 /*
7609                  * It's all unaligned, no optimization possible...
7610                  */
7611                 goto blunt_copy;
7612         }
7613
7614         /*
7615          * Can't optimize if there are any submaps in the
7616          * destination due to the way we free the "copy" map
7617          * progressively in vm_map_copy_overwrite_nested()
7618          * in that case.
7619          */
7620         vm_map_lock_read(dst_map);
7621         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7622                 vm_map_unlock_read(dst_map);
7623                 goto blunt_copy;
7624         }
7625         for (;
7626              (entry != vm_map_copy_to_entry(copy) &&
7627               entry->vme_start < dst_addr + copy->size);
7628              entry = entry->vme_next) {
7629                 if (entry->is_sub_map) {
7630                         vm_map_unlock_read(dst_map);
7631                         goto blunt_copy;
7632                 }
7633         }
7634         vm_map_unlock_read(dst_map);
7635
7636         if (head_size) {
7637                 /*
7638                  * Unaligned copy of the first "head_size" bytes, to reach
7639                  * a page boundary.
7640                  */
7641
7642                 /*
7643                  * Extract "head_copy" out of "copy".
7644                  */
7645                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7646                 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7647                 vm_map_copy_first_entry(head_copy) =
7648                         vm_map_copy_to_entry(head_copy);
7649                 vm_map_copy_last_entry(head_copy) =
7650                         vm_map_copy_to_entry(head_copy);
7651                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7652                 head_copy->cpy_hdr.nentries = 0;
7653                 head_copy->cpy_hdr.entries_pageable =
7654                         copy->cpy_hdr.entries_pageable;
7655                 vm_map_store_init(&head_copy->cpy_hdr);
7656
7657                 head_copy->offset = copy->offset;
7658                 head_copy->size = head_size;
7659
7660                 copy->offset += head_size;
7661                 copy->size -= head_size;
7662
7663                 entry = vm_map_copy_first_entry(copy);
7664                 vm_map_copy_clip_end(copy, entry, copy->offset);
7665                 vm_map_copy_entry_unlink(copy, entry);
7666                 vm_map_copy_entry_link(head_copy,
7667                                        vm_map_copy_to_entry(head_copy),
7668                                        entry);
7669
7670                 /*
7671                  * Do the unaligned copy.
7672                  */
7673                 kr = vm_map_copy_overwrite_nested(dst_map,
7674                                                   head_addr,
7675                                                   head_copy,
7676                                                   interruptible,
7677                                                   (pmap_t) NULL,
7678                                                   FALSE);
7679                 if (kr != KERN_SUCCESS)
7680                         goto done;
7681         }
7682
7683         if (tail_size) {
7684                 /*
7685                  * Extract "tail_copy" out of "copy".
7686                  */
7687                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7688                 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7689                 vm_map_copy_first_entry(tail_copy) =
7690                         vm_map_copy_to_entry(tail_copy);
7691                 vm_map_copy_last_entry(tail_copy) =
7692                         vm_map_copy_to_entry(tail_copy);
7693                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7694                 tail_copy->cpy_hdr.nentries = 0;
7695                 tail_copy->cpy_hdr.entries_pageable =
7696                         copy->cpy_hdr.entries_pageable;
7697                 vm_map_store_init(&tail_copy->cpy_hdr);
7698
7699                 tail_copy->offset = copy->offset + copy->size - tail_size;
7700                 tail_copy->size = tail_size;
7701
7702                 copy->size -= tail_size;
7703
7704                 entry = vm_map_copy_last_entry(copy);
7705                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7706                 entry = vm_map_copy_last_entry(copy);
7707                 vm_map_copy_entry_unlink(copy, entry);
7708                 vm_map_copy_entry_link(tail_copy,
7709                                        vm_map_copy_last_entry(tail_copy),
7710                                        entry);
7711         }
7712
7713         /*
7714          * Copy most (or possibly all) of the data.
7715          */
7716         kr = vm_map_copy_overwrite_nested(dst_map,
7717                                           dst_addr + head_size,
7718                                           copy,
7719                                           interruptible,
7720                                           (pmap_t) NULL,
7721                                           FALSE);
7722         if (kr != KERN_SUCCESS) {
7723                 goto done;
7724         }
7725
7726         if (tail_size) {
7727                 kr = vm_map_copy_overwrite_nested(dst_map,
7728                                                   tail_addr,
7729                                                   tail_copy,
7730                                                   interruptible,
7731                                                   (pmap_t) NULL,
7732                                                   FALSE);
7733         }
7734
7735 done:
7736         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7737         if (kr == KERN_SUCCESS) {
7738                 /*
7739                  * Discard all the copy maps.
7740                  */
7741                 if (head_copy) {
7742                         vm_map_copy_discard(head_copy);
7743                         head_copy = NULL;
7744                 }
7745                 vm_map_copy_discard(copy);
7746                 if (tail_copy) {
7747                         vm_map_copy_discard(tail_copy);
7748                         tail_copy = NULL;
7749                 }
7750         } else {
7751                 /*
7752                  * Re-assemble the original copy map.
7753                  */
7754                 if (head_copy) {
7755                         entry = vm_map_copy_first_entry(head_copy);
7756                         vm_map_copy_entry_unlink(head_copy, entry);
7757                         vm_map_copy_entry_link(copy,
7758                                                vm_map_copy_to_entry(copy),
7759                                                entry);
7760                         copy->offset -= head_size;
7761                         copy->size += head_size;
7762                         vm_map_copy_discard(head_copy);
7763                         head_copy = NULL;
7764                 }
7765                 if (tail_copy) {
7766                         entry = vm_map_copy_last_entry(tail_copy);
7767                         vm_map_copy_entry_unlink(tail_copy, entry);
7768                         vm_map_copy_entry_link(copy,
7769                                                vm_map_copy_last_entry(copy),
7770                                                entry);
7771                         copy->size += tail_size;
7772                         vm_map_copy_discard(tail_copy);
7773                         tail_copy = NULL;
7774                 }
7775         }
7776         return kr;
7777 }
7778
7779
7780 /*
7781  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
7782  *
7783  *      Decription:
7784  *      Physically copy unaligned data
7785  *
7786  *      Implementation:
7787  *      Unaligned parts of pages have to be physically copied.  We use
7788  *      a modified form of vm_fault_copy (which understands none-aligned
7789  *      page offsets and sizes) to do the copy.  We attempt to copy as
7790  *      much memory in one go as possibly, however vm_fault_copy copies
7791  *      within 1 memory object so we have to find the smaller of "amount left"
7792  *      "source object data size" and "target object data size".  With
7793  *      unaligned data we don't need to split regions, therefore the source
7794  *      (copy) object should be one map entry, the target range may be split
7795  *      over multiple map entries however.  In any event we are pessimistic
7796  *      about these assumptions.
7797  *
7798  *      Assumptions:
7799  *      dst_map is locked on entry and is return locked on success,
7800  *      unlocked on error.
7801  */
7802
7803 static kern_return_t
7804 vm_map_copy_overwrite_unaligned(
7805         vm_map_t        dst_map,
7806         vm_map_entry_t  entry,
7807         vm_map_copy_t   copy,
7808         vm_map_offset_t start,
7809         boolean_t       discard_on_success)
7810 {
7811         vm_map_entry_t          copy_entry;
7812         vm_map_entry_t          copy_entry_next;
7813         vm_map_version_t        version;
7814         vm_object_t             dst_object;
7815         vm_object_offset_t      dst_offset;
7816         vm_object_offset_t      src_offset;
7817         vm_object_offset_t      entry_offset;
7818         vm_map_offset_t         entry_end;
7819         vm_map_size_t           src_size,
7820                                 dst_size,
7821                                 copy_size,
7822                                 amount_left;
7823         kern_return_t           kr = KERN_SUCCESS;
7824
7825
7826         copy_entry = vm_map_copy_first_entry(copy);
7827
7828         vm_map_lock_write_to_read(dst_map);
7829
7830         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7831         amount_left = copy->size;
7832 /*
7833  *      unaligned so we never clipped this entry, we need the offset into
7834  *      the vm_object not just the data.
7835  */
7836         while (amount_left > 0) {
7837
7838                 if (entry == vm_map_to_entry(dst_map)) {
7839                         vm_map_unlock_read(dst_map);
7840                         return KERN_INVALID_ADDRESS;
7841                 }
7842
7843                 /* "start" must be within the current map entry */
7844                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7845
7846                 dst_offset = start - entry->vme_start;
7847
7848                 dst_size = entry->vme_end - start;
7849
7850                 src_size = copy_entry->vme_end -
7851                         (copy_entry->vme_start + src_offset);
7852
7853                 if (dst_size < src_size) {
7854 /*
7855  *                      we can only copy dst_size bytes before
7856  *                      we have to get the next destination entry
7857  */
7858                         copy_size = dst_size;
7859                 } else {
7860 /*
7861  *                      we can only copy src_size bytes before
7862  *                      we have to get the next source copy entry
7863  */
7864                         copy_size = src_size;
7865                 }
7866
7867                 if (copy_size > amount_left) {
7868                         copy_size = amount_left;
7869                 }
7870 /*
7871  *              Entry needs copy, create a shadow shadow object for
7872  *              Copy on write region.
7873  */
7874                 if (entry->needs_copy &&
7875                     ((entry->protection & VM_PROT_WRITE) != 0))
7876                 {
7877                         if (vm_map_lock_read_to_write(dst_map)) {
7878                                 vm_map_lock_read(dst_map);
7879                                 goto RetryLookup;
7880                         }
7881                         VME_OBJECT_SHADOW(entry,
7882                                           (vm_map_size_t)(entry->vme_end
7883                                                           - entry->vme_start));
7884                         entry->needs_copy = FALSE;
7885                         vm_map_lock_write_to_read(dst_map);
7886                 }
7887                 dst_object = VME_OBJECT(entry);
7888 /*
7889  *              unlike with the virtual (aligned) copy we're going
7890  *              to fault on it therefore we need a target object.
7891  */
7892                 if (dst_object == VM_OBJECT_NULL) {
7893                         if (vm_map_lock_read_to_write(dst_map)) {
7894                                 vm_map_lock_read(dst_map);
7895                                 goto RetryLookup;
7896                         }
7897                         dst_object = vm_object_allocate((vm_map_size_t)
7898                                                         entry->vme_end - entry->vme_start);
7899                         VME_OBJECT(entry) = dst_object;
7900                         VME_OFFSET_SET(entry, 0);
7901                         assert(entry->use_pmap);
7902                         vm_map_lock_write_to_read(dst_map);
7903                 }
7904 /*
7905  *              Take an object reference and unlock map. The "entry" may
7906  *              disappear or change when the map is unlocked.
7907  */
7908                 vm_object_reference(dst_object);
7909                 version.main_timestamp = dst_map->timestamp;
7910                 entry_offset = VME_OFFSET(entry);
7911                 entry_end = entry->vme_end;
7912                 vm_map_unlock_read(dst_map);
7913 /*
7914  *              Copy as much as possible in one pass
7915  */
7916                 kr = vm_fault_copy(
7917                         VME_OBJECT(copy_entry),
7918                         VME_OFFSET(copy_entry) + src_offset,
7919                         &copy_size,
7920                         dst_object,
7921                         entry_offset + dst_offset,
7922                         dst_map,
7923                         &version,
7924                         THREAD_UNINT );
7925
7926                 start += copy_size;
7927                 src_offset += copy_size;
7928                 amount_left -= copy_size;
7929 /*
7930  *              Release the object reference
7931  */
7932                 vm_object_deallocate(dst_object);
7933 /*
7934  *              If a hard error occurred, return it now
7935  */
7936                 if (kr != KERN_SUCCESS)
7937                         return kr;
7938
7939                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7940                     || amount_left == 0)
7941                 {
7942 /*
7943  *                      all done with this copy entry, dispose.
7944  */
7945                         copy_entry_next = copy_entry->vme_next;
7946
7947                         if (discard_on_success) {
7948                                 vm_map_copy_entry_unlink(copy, copy_entry);
7949                                 assert(!copy_entry->is_sub_map);
7950                                 vm_object_deallocate(VME_OBJECT(copy_entry));
7951                                 vm_map_copy_entry_dispose(copy, copy_entry);
7952                         }
7953
7954                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7955                             amount_left) {
7956 /*
7957  *                              not finished copying but run out of source
7958  */
7959                                 return KERN_INVALID_ADDRESS;
7960                         }
7961
7962                         copy_entry = copy_entry_next;
7963
7964                         src_offset = 0;
7965                 }
7966
7967                 if (amount_left == 0)
7968                         return KERN_SUCCESS;
7969
7970                 vm_map_lock_read(dst_map);
7971                 if (version.main_timestamp == dst_map->timestamp) {
7972                         if (start == entry_end) {
7973 /*
7974  *                              destination region is split.  Use the version
7975  *                              information to avoid a lookup in the normal
7976  *                              case.
7977  */
7978                                 entry = entry->vme_next;
7979 /*
7980  *                              should be contiguous. Fail if we encounter
7981  *                              a hole in the destination.
7982  */
7983                                 if (start != entry->vme_start) {
7984                                         vm_map_unlock_read(dst_map);
7985                                         return KERN_INVALID_ADDRESS ;
7986                                 }
7987                         }
7988                 } else {
7989 /*
7990  *                      Map version check failed.
7991  *                      we must lookup the entry because somebody
7992  *                      might have changed the map behind our backs.
7993  */
7994                 RetryLookup:
7995                         if (!vm_map_lookup_entry(dst_map, start, &entry))
7996                         {
7997                                 vm_map_unlock_read(dst_map);
7998                                 return KERN_INVALID_ADDRESS ;
7999                         }
8000                 }
8001         }/* while */
8002
8003         return KERN_SUCCESS;
8004 }/* vm_map_copy_overwrite_unaligned */
8005
8006 /*
8007  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
8008  *
8009  *      Description:
8010  *      Does all the vm_trickery possible for whole pages.
8011  *
8012  *      Implementation:
8013  *
8014  *      If there are no permanent objects in the destination,
8015  *      and the source and destination map entry zones match,
8016  *      and the destination map entry is not shared,
8017  *      then the map entries can be deleted and replaced
8018  *      with those from the copy.  The following code is the
8019  *      basic idea of what to do, but there are lots of annoying
8020  *      little details about getting protection and inheritance
8021  *      right.  Should add protection, inheritance, and sharing checks
8022  *      to the above pass and make sure that no wiring is involved.
8023  */
8024
8025 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8026 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8027 int vm_map_copy_overwrite_aligned_src_large = 0;
8028
8029 static kern_return_t
8030 vm_map_copy_overwrite_aligned(
8031         vm_map_t        dst_map,
8032         vm_map_entry_t  tmp_entry,
8033         vm_map_copy_t   copy,
8034         vm_map_offset_t start,
8035         __unused pmap_t pmap)
8036 {
8037         vm_object_t     object;
8038         vm_map_entry_t  copy_entry;
8039         vm_map_size_t   copy_size;
8040         vm_map_size_t   size;
8041         vm_map_entry_t  entry;
8042
8043         while ((copy_entry = vm_map_copy_first_entry(copy))
8044                != vm_map_copy_to_entry(copy))
8045         {
8046                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8047
8048                 entry = tmp_entry;
8049                 if (entry->is_sub_map) {
8050                         /* unnested when clipped earlier */
8051                         assert(!entry->use_pmap);
8052                 }
8053                 if (entry == vm_map_to_entry(dst_map)) {
8054                         vm_map_unlock(dst_map);
8055                         return KERN_INVALID_ADDRESS;
8056                 }
8057                 size = (entry->vme_end - entry->vme_start);
8058                 /*
8059                  *      Make sure that no holes popped up in the
8060                  *      address map, and that the protection is
8061                  *      still valid, in case the map was unlocked
8062                  *      earlier.
8063                  */
8064
8065                 if ((entry->vme_start != start) || ((entry->is_sub_map)
8066                                                     && !entry->needs_copy)) {
8067                         vm_map_unlock(dst_map);
8068                         return(KERN_INVALID_ADDRESS);
8069                 }
8070                 assert(entry != vm_map_to_entry(dst_map));
8071
8072                 /*
8073                  *      Check protection again
8074                  */
8075
8076                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8077                         vm_map_unlock(dst_map);
8078                         return(KERN_PROTECTION_FAILURE);
8079                 }
8080
8081                 /*
8082                  *      Adjust to source size first
8083                  */
8084
8085                 if (copy_size < size) {
8086                         if (entry->map_aligned &&
8087                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8088                                                  VM_MAP_PAGE_MASK(dst_map))) {
8089                                 /* no longer map-aligned */
8090                                 entry->map_aligned = FALSE;
8091                         }
8092                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8093                         size = copy_size;
8094                 }
8095
8096                 /*
8097                  *      Adjust to destination size
8098                  */
8099
8100                 if (size < copy_size) {
8101                         vm_map_copy_clip_end(copy, copy_entry,
8102                                              copy_entry->vme_start + size);
8103                         copy_size = size;
8104                 }
8105
8106                 assert((entry->vme_end - entry->vme_start) == size);
8107                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8108                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8109
8110                 /*
8111                  *      If the destination contains temporary unshared memory,
8112                  *      we can perform the copy by throwing it away and
8113                  *      installing the source data.
8114                  */
8115
8116                 object = VME_OBJECT(entry);
8117                 if ((!entry->is_shared &&
8118                      ((object == VM_OBJECT_NULL) ||
8119                       (object->internal && !object->true_share))) ||
8120                     entry->needs_copy) {
8121                         vm_object_t     old_object = VME_OBJECT(entry);
8122                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
8123                         vm_object_offset_t      offset;
8124
8125                         /*
8126                          * Ensure that the source and destination aren't
8127                          * identical
8128                          */
8129                         if (old_object == VME_OBJECT(copy_entry) &&
8130                             old_offset == VME_OFFSET(copy_entry)) {
8131                                 vm_map_copy_entry_unlink(copy, copy_entry);
8132                                 vm_map_copy_entry_dispose(copy, copy_entry);
8133
8134                                 if (old_object != VM_OBJECT_NULL)
8135                                         vm_object_deallocate(old_object);
8136
8137                                 start = tmp_entry->vme_end;
8138                                 tmp_entry = tmp_entry->vme_next;
8139                                 continue;
8140                         }
8141
8142 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8143 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
8144                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8145                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
8146                             copy_size <= __TRADEOFF1_COPY_SIZE) {
8147                                 /*
8148                                  * Virtual vs. Physical copy tradeoff #1.
8149                                  *
8150                                  * Copying only a few pages out of a large
8151                                  * object:  do a physical copy instead of
8152                                  * a virtual copy, to avoid possibly keeping
8153                                  * the entire large object alive because of
8154                                  * those few copy-on-write pages.
8155                                  */
8156                                 vm_map_copy_overwrite_aligned_src_large++;
8157                                 goto slow_copy;
8158                         }
8159
8160                         if ((dst_map->pmap != kernel_pmap) &&
8161                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8162                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
8163                                 vm_object_t new_object, new_shadow;
8164
8165                                 /*
8166                                  * We're about to map something over a mapping
8167                                  * established by malloc()...
8168                                  */
8169                                 new_object = VME_OBJECT(copy_entry);
8170                                 if (new_object != VM_OBJECT_NULL) {
8171                                         vm_object_lock_shared(new_object);
8172                                 }
8173                                 while (new_object != VM_OBJECT_NULL &&
8174                                        !new_object->true_share &&
8175                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8176                                        new_object->internal) {
8177                                         new_shadow = new_object->shadow;
8178                                         if (new_shadow == VM_OBJECT_NULL) {
8179                                                 break;
8180                                         }
8181                                         vm_object_lock_shared(new_shadow);
8182                                         vm_object_unlock(new_object);
8183                                         new_object = new_shadow;
8184                                 }
8185                                 if (new_object != VM_OBJECT_NULL) {
8186                                         if (!new_object->internal) {
8187                                                 /*
8188                                                  * The new mapping is backed
8189                                                  * by an external object.  We
8190                                                  * don't want malloc'ed memory
8191                                                  * to be replaced with such a
8192                                                  * non-anonymous mapping, so
8193                                                  * let's go off the optimized
8194                                                  * path...
8195                                                  */
8196                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
8197                                                 vm_object_unlock(new_object);
8198                                                 goto slow_copy;
8199                                         }
8200                                         if (new_object->true_share ||
8201                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8202                                                 /*
8203                                                  * Same if there's a "true_share"
8204                                                  * object in the shadow chain, or
8205                                                  * an object with a non-default
8206                                                  * (SYMMETRIC) copy strategy.
8207                                                  */
8208                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8209                                                 vm_object_unlock(new_object);
8210                                                 goto slow_copy;
8211                                         }
8212                                         vm_object_unlock(new_object);
8213                                 }
8214                                 /*
8215                                  * The new mapping is still backed by
8216                                  * anonymous (internal) memory, so it's
8217                                  * OK to substitute it for the original
8218                                  * malloc() mapping.
8219                                  */
8220                         }
8221
8222                         if (old_object != VM_OBJECT_NULL) {
8223                                 if(entry->is_sub_map) {
8224                                         if(entry->use_pmap) {
8225 #ifndef NO_NESTED_PMAP
8226                                                 pmap_unnest(dst_map->pmap,
8227                                                             (addr64_t)entry->vme_start,
8228                                                             entry->vme_end - entry->vme_start);
8229 #endif  /* NO_NESTED_PMAP */
8230                                                 if(dst_map->mapped_in_other_pmaps) {
8231                                                         /* clean up parent */
8232                                                         /* map/maps */
8233                                                         vm_map_submap_pmap_clean(
8234                                                                 dst_map, entry->vme_start,
8235                                                                 entry->vme_end,
8236                                                                 VME_SUBMAP(entry),
8237                                                                 VME_OFFSET(entry));
8238                                                 }
8239                                         } else {
8240                                                 vm_map_submap_pmap_clean(
8241                                                         dst_map, entry->vme_start,
8242                                                         entry->vme_end,
8243                                                         VME_SUBMAP(entry),
8244                                                         VME_OFFSET(entry));
8245                                         }
8246                                         vm_map_deallocate(VME_SUBMAP(entry));
8247                                 } else {
8248                                         if(dst_map->mapped_in_other_pmaps) {
8249                                                 vm_object_pmap_protect_options(
8250                                                         VME_OBJECT(entry),
8251                                                         VME_OFFSET(entry),
8252                                                         entry->vme_end
8253                                                         - entry->vme_start,
8254                                                         PMAP_NULL,
8255                                                         entry->vme_start,
8256                                                         VM_PROT_NONE,
8257                                                         PMAP_OPTIONS_REMOVE);
8258                                         } else {
8259                                                 pmap_remove_options(
8260                                                         dst_map->pmap,
8261                                                         (addr64_t)(entry->vme_start),
8262                                                         (addr64_t)(entry->vme_end),
8263                                                         PMAP_OPTIONS_REMOVE);
8264                                         }
8265                                         vm_object_deallocate(old_object);
8266                                 }
8267                         }
8268
8269                         entry->is_sub_map = FALSE;
8270                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8271                         object = VME_OBJECT(entry);
8272                         entry->needs_copy = copy_entry->needs_copy;
8273                         entry->wired_count = 0;
8274                         entry->user_wired_count = 0;
8275                         offset = VME_OFFSET(copy_entry);
8276                         VME_OFFSET_SET(entry, offset);
8277
8278                         vm_map_copy_entry_unlink(copy, copy_entry);
8279                         vm_map_copy_entry_dispose(copy, copy_entry);
8280
8281                         /*
8282                          * we could try to push pages into the pmap at this point, BUT
8283                          * this optimization only saved on average 2 us per page if ALL
8284                          * the pages in the source were currently mapped
8285                          * and ALL the pages in the dest were touched, if there were fewer
8286                          * than 2/3 of the pages touched, this optimization actually cost more cycles
8287                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
8288                          */
8289
8290                         /*
8291                          *      Set up for the next iteration.  The map
8292                          *      has not been unlocked, so the next
8293                          *      address should be at the end of this
8294                          *      entry, and the next map entry should be
8295                          *      the one following it.
8296                          */
8297
8298                         start = tmp_entry->vme_end;
8299                         tmp_entry = tmp_entry->vme_next;
8300                 } else {
8301                         vm_map_version_t        version;
8302                         vm_object_t             dst_object;
8303                         vm_object_offset_t      dst_offset;
8304                         kern_return_t           r;
8305
8306                 slow_copy:
8307                         if (entry->needs_copy) {
8308                                 VME_OBJECT_SHADOW(entry,
8309                                                   (entry->vme_end -
8310                                                    entry->vme_start));
8311                                 entry->needs_copy = FALSE;
8312                         }
8313
8314                         dst_object = VME_OBJECT(entry);
8315                         dst_offset = VME_OFFSET(entry);
8316
8317                         /*
8318                          *      Take an object reference, and record
8319                          *      the map version information so that the
8320                          *      map can be safely unlocked.
8321                          */
8322
8323                         if (dst_object == VM_OBJECT_NULL) {
8324                                 /*
8325                                  * We would usually have just taken the
8326                                  * optimized path above if the destination
8327                                  * object has not been allocated yet.  But we
8328                                  * now disable that optimization if the copy
8329                                  * entry's object is not backed by anonymous
8330                                  * memory to avoid replacing malloc'ed
8331                                  * (i.e. re-usable) anonymous memory with a
8332                                  * not-so-anonymous mapping.
8333                                  * So we have to handle this case here and
8334                                  * allocate a new VM object for this map entry.
8335                                  */
8336                                 dst_object = vm_object_allocate(
8337                                         entry->vme_end - entry->vme_start);
8338                                 dst_offset = 0;
8339                                 VME_OBJECT_SET(entry, dst_object);
8340                                 VME_OFFSET_SET(entry, dst_offset);
8341                                 assert(entry->use_pmap);
8342
8343                         }
8344
8345                         vm_object_reference(dst_object);
8346
8347                         /* account for unlock bumping up timestamp */
8348                         version.main_timestamp = dst_map->timestamp + 1;
8349
8350                         vm_map_unlock(dst_map);
8351
8352                         /*
8353                          *      Copy as much as possible in one pass
8354                          */
8355
8356                         copy_size = size;
8357                         r = vm_fault_copy(
8358                                 VME_OBJECT(copy_entry),
8359                                 VME_OFFSET(copy_entry),
8360                                 &copy_size,
8361                                 dst_object,
8362                                 dst_offset,
8363                                 dst_map,
8364                                 &version,
8365                                 THREAD_UNINT );
8366
8367                         /*
8368                          *      Release the object reference
8369                          */
8370
8371                         vm_object_deallocate(dst_object);
8372
8373                         /*
8374                          *      If a hard error occurred, return it now
8375                          */
8376
8377                         if (r != KERN_SUCCESS)
8378                                 return(r);
8379
8380                         if (copy_size != 0) {
8381                                 /*
8382                                  *      Dispose of the copied region
8383                                  */
8384
8385                                 vm_map_copy_clip_end(copy, copy_entry,
8386                                                      copy_entry->vme_start + copy_size);
8387                                 vm_map_copy_entry_unlink(copy, copy_entry);
8388                                 vm_object_deallocate(VME_OBJECT(copy_entry));
8389                                 vm_map_copy_entry_dispose(copy, copy_entry);
8390                         }
8391
8392                         /*
8393                          *      Pick up in the destination map where we left off.
8394                          *
8395                          *      Use the version information to avoid a lookup
8396                          *      in the normal case.
8397                          */
8398
8399                         start += copy_size;
8400                         vm_map_lock(dst_map);
8401                         if (version.main_timestamp == dst_map->timestamp &&
8402                             copy_size != 0) {
8403                                 /* We can safely use saved tmp_entry value */
8404
8405                                 if (tmp_entry->map_aligned &&
8406                                     !VM_MAP_PAGE_ALIGNED(
8407                                             start,
8408                                             VM_MAP_PAGE_MASK(dst_map))) {
8409                                         /* no longer map-aligned */
8410                                         tmp_entry->map_aligned = FALSE;
8411                                 }
8412                                 vm_map_clip_end(dst_map, tmp_entry, start);
8413                                 tmp_entry = tmp_entry->vme_next;
8414                         } else {
8415                                 /* Must do lookup of tmp_entry */
8416
8417                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8418                                         vm_map_unlock(dst_map);
8419                                         return(KERN_INVALID_ADDRESS);
8420                                 }
8421                                 if (tmp_entry->map_aligned &&
8422                                     !VM_MAP_PAGE_ALIGNED(
8423                                             start,
8424                                             VM_MAP_PAGE_MASK(dst_map))) {
8425                                         /* no longer map-aligned */
8426                                         tmp_entry->map_aligned = FALSE;
8427                                 }
8428                                 vm_map_clip_start(dst_map, tmp_entry, start);
8429                         }
8430                 }
8431         }/* while */
8432
8433         return(KERN_SUCCESS);
8434 }/* vm_map_copy_overwrite_aligned */
8435
8436 /*
8437  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
8438  *
8439  *      Description:
8440  *              Copy in data to a kernel buffer from space in the
8441  *              source map. The original space may be optionally
8442  *              deallocated.
8443  *
8444  *              If successful, returns a new copy object.
8445  */
8446 static kern_return_t
8447 vm_map_copyin_kernel_buffer(
8448         vm_map_t        src_map,
8449         vm_map_offset_t src_addr,
8450         vm_map_size_t   len,
8451         boolean_t       src_destroy,
8452         vm_map_copy_t   *copy_result)
8453 {
8454         kern_return_t kr;
8455         vm_map_copy_t copy;
8456         vm_size_t kalloc_size;
8457
8458         if (len > msg_ool_size_small)
8459                 return KERN_INVALID_ARGUMENT;
8460
8461         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8462
8463         copy = (vm_map_copy_t)kalloc(kalloc_size);
8464         if (copy == VM_MAP_COPY_NULL)
8465                 return KERN_RESOURCE_SHORTAGE;
8466         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8467         copy->size = len;
8468         copy->offset = 0;
8469
8470         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
8471         if (kr != KERN_SUCCESS) {
8472                 kfree(copy, kalloc_size);
8473                 return kr;
8474         }
8475         if (src_destroy) {
8476                 (void) vm_map_remove(
8477                         src_map,
8478                         vm_map_trunc_page(src_addr,
8479                                           VM_MAP_PAGE_MASK(src_map)),
8480                         vm_map_round_page(src_addr + len,
8481                                           VM_MAP_PAGE_MASK(src_map)),
8482                         (VM_MAP_REMOVE_INTERRUPTIBLE |
8483                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8484                          (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
8485         }
8486         *copy_result = copy;
8487         return KERN_SUCCESS;
8488 }
8489
8490 /*
8491  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
8492  *
8493  *      Description:
8494  *              Copy out data from a kernel buffer into space in the
8495  *              destination map. The space may be otpionally dynamically
8496  *              allocated.
8497  *
8498  *              If successful, consumes the copy object.
8499  *              Otherwise, the caller is responsible for it.
8500  */
8501 static int vm_map_copyout_kernel_buffer_failures = 0;
8502 static kern_return_t
8503 vm_map_copyout_kernel_buffer(
8504         vm_map_t                map,
8505         vm_map_address_t        *addr,  /* IN/OUT */
8506         vm_map_copy_t           copy,
8507         boolean_t               overwrite,
8508         boolean_t               consume_on_success)
8509 {
8510         kern_return_t kr = KERN_SUCCESS;
8511         thread_t thread = current_thread();
8512
8513         /*
8514          * check for corrupted vm_map_copy structure
8515          */
8516         if (copy->size > msg_ool_size_small || copy->offset)
8517                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8518                       (long long)copy->size, (long long)copy->offset);
8519
8520         if (!overwrite) {
8521
8522                 /*
8523                  * Allocate space in the target map for the data
8524                  */
8525                 *addr = 0;
8526                 kr = vm_map_enter(map,
8527                                   addr,
8528                                   vm_map_round_page(copy->size,
8529                                                     VM_MAP_PAGE_MASK(map)),
8530                                   (vm_map_offset_t) 0,
8531                                   VM_FLAGS_ANYWHERE,
8532                                   VM_OBJECT_NULL,
8533                                   (vm_object_offset_t) 0,
8534                                   FALSE,
8535                                   VM_PROT_DEFAULT,
8536                                   VM_PROT_ALL,
8537                                   VM_INHERIT_DEFAULT);
8538                 if (kr != KERN_SUCCESS)
8539                         return kr;
8540         }
8541
8542         /*
8543          * Copyout the data from the kernel buffer to the target map.
8544          */
8545         if (thread->map == map) {
8546
8547                 /*
8548                  * If the target map is the current map, just do
8549                  * the copy.
8550                  */
8551                 assert((vm_size_t) copy->size == copy->size);
8552                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8553                         kr = KERN_INVALID_ADDRESS;
8554                 }
8555         }
8556         else {
8557                 vm_map_t oldmap;
8558
8559                 /*
8560                  * If the target map is another map, assume the
8561                  * target's address space identity for the duration
8562                  * of the copy.
8563                  */
8564                 vm_map_reference(map);
8565                 oldmap = vm_map_switch(map);
8566
8567                 assert((vm_size_t) copy->size == copy->size);
8568                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8569                         vm_map_copyout_kernel_buffer_failures++;
8570                         kr = KERN_INVALID_ADDRESS;
8571                 }
8572
8573                 (void) vm_map_switch(oldmap);
8574                 vm_map_deallocate(map);
8575         }
8576
8577         if (kr != KERN_SUCCESS) {
8578                 /* the copy failed, clean up */
8579                 if (!overwrite) {
8580                         /*
8581                          * Deallocate the space we allocated in the target map.
8582                          */
8583                         (void) vm_map_remove(
8584                                 map,
8585                                 vm_map_trunc_page(*addr,
8586                                                   VM_MAP_PAGE_MASK(map)),
8587                                 vm_map_round_page((*addr +
8588                                                    vm_map_round_page(copy->size,
8589                                                                      VM_MAP_PAGE_MASK(map))),
8590                                                   VM_MAP_PAGE_MASK(map)),
8591                                 VM_MAP_NO_FLAGS);
8592                         *addr = 0;
8593                 }
8594         } else {
8595                 /* copy was successful, dicard the copy structure */
8596                 if (consume_on_success) {
8597                         kfree(copy, copy->size + cpy_kdata_hdr_sz);
8598                 }
8599         }
8600
8601         return kr;
8602 }
8603
8604 /*
8605  *      Macro:          vm_map_copy_insert
8606  *
8607  *      Description:
8608  *              Link a copy chain ("copy") into a map at the
8609  *              specified location (after "where").
8610  *      Side effects:
8611  *              The copy chain is destroyed.
8612  *      Warning:
8613  *              The arguments are evaluated multiple times.
8614  */
8615 #define vm_map_copy_insert(map, where, copy)                            \
8616 MACRO_BEGIN                                                             \
8617         vm_map_store_copy_insert(map, where, copy);       \
8618         zfree(vm_map_copy_zone, copy);          \
8619 MACRO_END
8620
8621 void
8622 vm_map_copy_remap(
8623         vm_map_t        map,
8624         vm_map_entry_t  where,
8625         vm_map_copy_t   copy,
8626         vm_map_offset_t adjustment,
8627         vm_prot_t       cur_prot,
8628         vm_prot_t       max_prot,
8629         vm_inherit_t    inheritance)
8630 {
8631         vm_map_entry_t  copy_entry, new_entry;
8632
8633         for (copy_entry = vm_map_copy_first_entry(copy);
8634              copy_entry != vm_map_copy_to_entry(copy);
8635              copy_entry = copy_entry->vme_next) {
8636                 /* get a new VM map entry for the map */
8637                 new_entry = vm_map_entry_create(map,
8638                                                 !map->hdr.entries_pageable);
8639                 /* copy the "copy entry" to the new entry */
8640                 vm_map_entry_copy(new_entry, copy_entry);
8641                 /* adjust "start" and "end" */
8642                 new_entry->vme_start += adjustment;
8643                 new_entry->vme_end += adjustment;
8644                 /* clear some attributes */
8645                 new_entry->inheritance = inheritance;
8646                 new_entry->protection = cur_prot;
8647                 new_entry->max_protection = max_prot;
8648                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8649                 /* take an extra reference on the entry's "object" */
8650                 if (new_entry->is_sub_map) {
8651                         assert(!new_entry->use_pmap); /* not nested */
8652                         vm_map_lock(VME_SUBMAP(new_entry));
8653                         vm_map_reference(VME_SUBMAP(new_entry));
8654                         vm_map_unlock(VME_SUBMAP(new_entry));
8655                 } else {
8656                         vm_object_reference(VME_OBJECT(new_entry));
8657                 }
8658                 /* insert the new entry in the map */
8659                 vm_map_store_entry_link(map, where, new_entry);
8660                 /* continue inserting the "copy entries" after the new entry */
8661                 where = new_entry;
8662         }
8663 }
8664
8665
8666 boolean_t
8667 vm_map_copy_validate_size(
8668         vm_map_t                dst_map,
8669         vm_map_copy_t           copy,
8670         vm_map_size_t           size)
8671 {
8672         if (copy == VM_MAP_COPY_NULL)
8673                 return FALSE;
8674         switch (copy->type) {
8675         case VM_MAP_COPY_OBJECT:
8676         case VM_MAP_COPY_KERNEL_BUFFER:
8677                 if (size == copy->size)
8678                         return TRUE;
8679                 break;
8680         case VM_MAP_COPY_ENTRY_LIST:
8681                 /*
8682                  * potential page-size rounding prevents us from exactly
8683                  * validating this flavor of vm_map_copy, but we can at least
8684                  * assert that it's within a range.
8685                  */
8686                 if (copy->size >= size &&
8687                     copy->size <= vm_map_round_page(size,
8688                                                     VM_MAP_PAGE_MASK(dst_map)))
8689                         return TRUE;
8690                 break;
8691         default:
8692                 break;
8693         }
8694         return FALSE;
8695 }
8696
8697
8698 /*
8699  *      Routine:        vm_map_copyout
8700  *
8701  *      Description:
8702  *              Copy out a copy chain ("copy") into newly-allocated
8703  *              space in the destination map.
8704  *
8705  *              If successful, consumes the copy object.
8706  *              Otherwise, the caller is responsible for it.
8707  */
8708
8709 kern_return_t
8710 vm_map_copyout(
8711         vm_map_t                dst_map,
8712         vm_map_address_t        *dst_addr,      /* OUT */
8713         vm_map_copy_t           copy)
8714 {
8715         return vm_map_copyout_internal(dst_map, dst_addr, copy,
8716                                        TRUE, /* consume_on_success */
8717                                        VM_PROT_DEFAULT,
8718                                        VM_PROT_ALL,
8719                                        VM_INHERIT_DEFAULT);
8720 }
8721
8722 kern_return_t
8723 vm_map_copyout_internal(
8724         vm_map_t                dst_map,
8725         vm_map_address_t        *dst_addr,      /* OUT */
8726         vm_map_copy_t           copy,
8727         boolean_t               consume_on_success,
8728         vm_prot_t               cur_protection,
8729         vm_prot_t               max_protection,
8730         vm_inherit_t            inheritance)
8731 {
8732         vm_map_size_t           size;
8733         vm_map_size_t           adjustment;
8734         vm_map_offset_t         start;
8735         vm_object_offset_t      vm_copy_start;
8736         vm_map_entry_t          last;
8737         vm_map_entry_t          entry;
8738         vm_map_entry_t          hole_entry;
8739
8740         /*
8741          *      Check for null copy object.
8742          */
8743
8744         if (copy == VM_MAP_COPY_NULL) {
8745                 *dst_addr = 0;
8746                 return(KERN_SUCCESS);
8747         }
8748
8749         /*
8750          *      Check for special copy object, created
8751          *      by vm_map_copyin_object.
8752          */
8753
8754         if (copy->type == VM_MAP_COPY_OBJECT) {
8755                 vm_object_t             object = copy->cpy_object;
8756                 kern_return_t           kr;
8757                 vm_object_offset_t      offset;
8758
8759                 offset = vm_object_trunc_page(copy->offset);
8760                 size = vm_map_round_page((copy->size +
8761                                           (vm_map_size_t)(copy->offset -
8762                                                           offset)),
8763                                          VM_MAP_PAGE_MASK(dst_map));
8764                 *dst_addr = 0;
8765                 kr = vm_map_enter(dst_map, dst_addr, size,
8766                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8767                                   object, offset, FALSE,
8768                                   VM_PROT_DEFAULT, VM_PROT_ALL,
8769                                   VM_INHERIT_DEFAULT);
8770                 if (kr != KERN_SUCCESS)
8771                         return(kr);
8772                 /* Account for non-pagealigned copy object */
8773                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8774                 if (consume_on_success)
8775                         zfree(vm_map_copy_zone, copy);
8776                 return(KERN_SUCCESS);
8777         }
8778
8779         /*
8780          *      Check for special kernel buffer allocated
8781          *      by new_ipc_kmsg_copyin.
8782          */
8783
8784         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8785                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8786                                                     copy, FALSE,
8787                                                     consume_on_success);
8788         }
8789
8790
8791         /*
8792          *      Find space for the data
8793          */
8794
8795         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8796                                           VM_MAP_COPY_PAGE_MASK(copy));
8797         size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8798                                  VM_MAP_COPY_PAGE_MASK(copy))
8799                 - vm_copy_start;
8800
8801
8802 StartAgain: ;
8803
8804         vm_map_lock(dst_map);
8805         if( dst_map->disable_vmentry_reuse == TRUE) {
8806                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8807                 last = entry;
8808         } else {
8809                 if (dst_map->holelistenabled) {
8810                         hole_entry = (vm_map_entry_t)dst_map->holes_list;
8811
8812                         if (hole_entry == NULL) {
8813                                 /*
8814                                  * No more space in the map?
8815                                  */
8816                                 vm_map_unlock(dst_map);
8817                                 return(KERN_NO_SPACE);
8818                         }
8819
8820                         last = hole_entry;
8821                         start = last->vme_start;
8822                 } else {
8823                         assert(first_free_is_valid(dst_map));
8824                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8825                         vm_map_min(dst_map) : last->vme_end;
8826                 }
8827                 start = vm_map_round_page(start,
8828                                           VM_MAP_PAGE_MASK(dst_map));
8829         }
8830
8831         while (TRUE) {
8832                 vm_map_entry_t  next = last->vme_next;
8833                 vm_map_offset_t end = start + size;
8834
8835                 if ((end > dst_map->max_offset) || (end < start)) {
8836                         if (dst_map->wait_for_space) {
8837                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8838                                         assert_wait((event_t) dst_map,
8839                                                     THREAD_INTERRUPTIBLE);
8840                                         vm_map_unlock(dst_map);
8841                                         thread_block(THREAD_CONTINUE_NULL);
8842                                         goto StartAgain;
8843                                 }
8844                         }
8845                         vm_map_unlock(dst_map);
8846                         return(KERN_NO_SPACE);
8847                 }
8848
8849                 if (dst_map->holelistenabled) {
8850                         if (last->vme_end >= end)
8851                                 break;
8852                 } else {
8853                         /*
8854                          *      If there are no more entries, we must win.
8855                          *
8856                          *      OR
8857                          *
8858                          *      If there is another entry, it must be
8859                          *      after the end of the potential new region.
8860                          */
8861
8862                         if (next == vm_map_to_entry(dst_map))
8863                                 break;
8864
8865                         if (next->vme_start >= end)
8866                                 break;
8867                 }
8868
8869                 last = next;
8870
8871                 if (dst_map->holelistenabled) {
8872                         if (last == (vm_map_entry_t) dst_map->holes_list) {
8873                                 /*
8874                                  * Wrapped around
8875                                  */
8876                                 vm_map_unlock(dst_map);
8877                                 return(KERN_NO_SPACE);
8878                         }
8879                         start = last->vme_start;
8880                 } else {
8881                         start = last->vme_end;
8882                 }
8883                 start = vm_map_round_page(start,
8884                                           VM_MAP_PAGE_MASK(dst_map));
8885         }
8886
8887         if (dst_map->holelistenabled) {
8888                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
8889                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
8890                 }
8891         }
8892
8893
8894         adjustment = start - vm_copy_start;
8895         if (! consume_on_success) {
8896                 /*
8897                  * We're not allowed to consume "copy", so we'll have to
8898                  * copy its map entries into the destination map below.
8899                  * No need to re-allocate map entries from the correct
8900                  * (pageable or not) zone, since we'll get new map entries
8901                  * during the transfer.
8902                  * We'll also adjust the map entries's "start" and "end"
8903                  * during the transfer, to keep "copy"'s entries consistent
8904                  * with its "offset".
8905                  */
8906                 goto after_adjustments;
8907         }
8908
8909         /*
8910          *      Since we're going to just drop the map
8911          *      entries from the copy into the destination
8912          *      map, they must come from the same pool.
8913          */
8914
8915         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8916                 /*
8917                  * Mismatches occur when dealing with the default
8918                  * pager.
8919                  */
8920                 zone_t          old_zone;
8921                 vm_map_entry_t  next, new;
8922
8923                 /*
8924                  * Find the zone that the copies were allocated from
8925                  */
8926
8927                 entry = vm_map_copy_first_entry(copy);
8928
8929                 /*
8930                  * Reinitialize the copy so that vm_map_copy_entry_link
8931                  * will work.
8932                  */
8933                 vm_map_store_copy_reset(copy, entry);
8934                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8935
8936                 /*
8937                  * Copy each entry.
8938                  */
8939                 while (entry != vm_map_copy_to_entry(copy)) {
8940                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8941                         vm_map_entry_copy_full(new, entry);
8942                         assert(!new->iokit_acct);
8943                         if (new->is_sub_map) {
8944                                 /* clr address space specifics */
8945                                 new->use_pmap = FALSE;
8946                         }
8947                         vm_map_copy_entry_link(copy,
8948                                                vm_map_copy_last_entry(copy),
8949                                                new);
8950                         next = entry->vme_next;
8951                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8952                         zfree(old_zone, entry);
8953                         entry = next;
8954                 }
8955         }
8956
8957         /*
8958          *      Adjust the addresses in the copy chain, and
8959          *      reset the region attributes.
8960          */
8961
8962         for (entry = vm_map_copy_first_entry(copy);
8963              entry != vm_map_copy_to_entry(copy);
8964              entry = entry->vme_next) {
8965                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8966                         /*
8967                          * We're injecting this copy entry into a map that
8968                          * has the standard page alignment, so clear
8969                          * "map_aligned" (which might have been inherited
8970                          * from the original map entry).
8971                          */
8972                         entry->map_aligned = FALSE;
8973                 }
8974
8975                 entry->vme_start += adjustment;
8976                 entry->vme_end += adjustment;
8977
8978                 if (entry->map_aligned) {
8979                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8980                                                    VM_MAP_PAGE_MASK(dst_map)));
8981                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8982                                                    VM_MAP_PAGE_MASK(dst_map)));
8983                 }
8984
8985                 entry->inheritance = VM_INHERIT_DEFAULT;
8986                 entry->protection = VM_PROT_DEFAULT;
8987                 entry->max_protection = VM_PROT_ALL;
8988                 entry->behavior = VM_BEHAVIOR_DEFAULT;
8989
8990                 /*
8991                  * If the entry is now wired,
8992                  * map the pages into the destination map.
8993                  */
8994                 if (entry->wired_count != 0) {
8995                         register vm_map_offset_t va;
8996                         vm_object_offset_t       offset;
8997                         register vm_object_t object;
8998                         vm_prot_t prot;
8999                         int     type_of_fault;
9000
9001                         object = VME_OBJECT(entry);
9002                         offset = VME_OFFSET(entry);
9003                         va = entry->vme_start;
9004
9005                         pmap_pageable(dst_map->pmap,
9006                                       entry->vme_start,
9007                                       entry->vme_end,
9008                                       TRUE);
9009
9010                         while (va < entry->vme_end) {
9011                                 register vm_page_t      m;
9012
9013                                 /*
9014                                  * Look up the page in the object.
9015                                  * Assert that the page will be found in the
9016                                  * top object:
9017                                  * either
9018                                  *      the object was newly created by
9019                                  *      vm_object_copy_slowly, and has
9020                                  *      copies of all of the pages from
9021                                  *      the source object
9022                                  * or
9023                                  *      the object was moved from the old
9024                                  *      map entry; because the old map
9025                                  *      entry was wired, all of the pages
9026                                  *      were in the top-level object.
9027                                  *      (XXX not true if we wire pages for
9028                                  *       reading)
9029                                  */
9030                                 vm_object_lock(object);
9031
9032                                 m = vm_page_lookup(object, offset);
9033                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
9034                                     m->absent)
9035                                         panic("vm_map_copyout: wiring %p", m);
9036
9037                                 /*
9038                                  * ENCRYPTED SWAP:
9039                                  * The page is assumed to be wired here, so it
9040                                  * shouldn't be encrypted.  Otherwise, we
9041                                  * couldn't enter it in the page table, since
9042                                  * we don't want the user to see the encrypted
9043                                  * data.
9044                                  */
9045                                 ASSERT_PAGE_DECRYPTED(m);
9046
9047                                 prot = entry->protection;
9048
9049                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9050                                     prot)
9051                                         prot |= VM_PROT_EXECUTE;
9052
9053                                 type_of_fault = DBG_CACHE_HIT_FAULT;
9054
9055                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
9056                                                VM_PAGE_WIRED(m), FALSE, FALSE,
9057                                                FALSE, VME_ALIAS(entry),
9058                                                ((entry->iokit_acct ||
9059                                                  (!entry->is_sub_map &&
9060                                                   !entry->use_pmap))
9061                                                 ? PMAP_OPTIONS_ALT_ACCT
9062                                                 : 0),
9063                                                NULL, &type_of_fault);
9064
9065                                 vm_object_unlock(object);
9066
9067                                 offset += PAGE_SIZE_64;
9068                                 va += PAGE_SIZE;
9069                         }
9070                 }
9071         }
9072
9073 after_adjustments:
9074
9075         /*
9076          *      Correct the page alignment for the result
9077          */
9078
9079         *dst_addr = start + (copy->offset - vm_copy_start);
9080
9081         /*
9082          *      Update the hints and the map size
9083          */
9084
9085         if (consume_on_success) {
9086                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9087         } else {
9088                 SAVE_HINT_MAP_WRITE(dst_map, last);
9089         }
9090
9091         dst_map->size += size;
9092
9093         /*
9094          *      Link in the copy
9095          */
9096
9097         if (consume_on_success) {
9098                 vm_map_copy_insert(dst_map, last, copy);
9099         } else {
9100                 vm_map_copy_remap(dst_map, last, copy, adjustment,
9101                                   cur_protection, max_protection,
9102                                   inheritance);
9103         }
9104
9105         vm_map_unlock(dst_map);
9106
9107         /*
9108          * XXX  If wiring_required, call vm_map_pageable
9109          */
9110
9111         return(KERN_SUCCESS);
9112 }
9113
9114 /*
9115  *      Routine:        vm_map_copyin
9116  *
9117  *      Description:
9118  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
9119  *
9120  */
9121
9122 #undef vm_map_copyin
9123
9124 kern_return_t
9125 vm_map_copyin(
9126         vm_map_t                        src_map,
9127         vm_map_address_t        src_addr,
9128         vm_map_size_t           len,
9129         boolean_t                       src_destroy,
9130         vm_map_copy_t           *copy_result)   /* OUT */
9131 {
9132         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9133                                         FALSE, copy_result, FALSE));
9134 }
9135
9136 /*
9137  *      Routine:        vm_map_copyin_common
9138  *
9139  *      Description:
9140  *              Copy the specified region (src_addr, len) from the
9141  *              source address space (src_map), possibly removing
9142  *              the region from the source address space (src_destroy).
9143  *
9144  *      Returns:
9145  *              A vm_map_copy_t object (copy_result), suitable for
9146  *              insertion into another address space (using vm_map_copyout),
9147  *              copying over another address space region (using
9148  *              vm_map_copy_overwrite).  If the copy is unused, it
9149  *              should be destroyed (using vm_map_copy_discard).
9150  *
9151  *      In/out conditions:
9152  *              The source map should not be locked on entry.
9153  */
9154
9155 typedef struct submap_map {
9156         vm_map_t        parent_map;
9157         vm_map_offset_t base_start;
9158         vm_map_offset_t base_end;
9159         vm_map_size_t   base_len;
9160         struct submap_map *next;
9161 } submap_map_t;
9162
9163 kern_return_t
9164 vm_map_copyin_common(
9165         vm_map_t        src_map,
9166         vm_map_address_t src_addr,
9167         vm_map_size_t   len,
9168         boolean_t       src_destroy,
9169         __unused boolean_t      src_volatile,
9170         vm_map_copy_t   *copy_result,   /* OUT */
9171         boolean_t       use_maxprot)
9172 {
9173         int flags;
9174
9175         flags = 0;
9176         if (src_destroy) {
9177                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9178         }
9179         if (use_maxprot) {
9180                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9181         }
9182         return vm_map_copyin_internal(src_map,
9183                                       src_addr,
9184                                       len,
9185                                       flags,
9186                                       copy_result);
9187 }
9188 kern_return_t
9189 vm_map_copyin_internal(
9190         vm_map_t        src_map,
9191         vm_map_address_t src_addr,
9192         vm_map_size_t   len,
9193         int             flags,
9194         vm_map_copy_t   *copy_result)   /* OUT */
9195 {
9196         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
9197                                          * in multi-level lookup, this
9198                                          * entry contains the actual
9199                                          * vm_object/offset.
9200                                          */
9201         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
9202
9203         vm_map_offset_t src_start;      /* Start of current entry --
9204                                          * where copy is taking place now
9205                                          */
9206         vm_map_offset_t src_end;        /* End of entire region to be
9207                                          * copied */
9208         vm_map_offset_t src_base;
9209         vm_map_t        base_map = src_map;
9210         boolean_t       map_share=FALSE;
9211         submap_map_t    *parent_maps = NULL;
9212
9213         vm_map_copy_t   copy;           /* Resulting copy */
9214         vm_map_address_t copy_addr;
9215         vm_map_size_t   copy_size;
9216         boolean_t       src_destroy;
9217         boolean_t       use_maxprot;
9218
9219         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9220                 return KERN_INVALID_ARGUMENT;
9221         }
9222
9223         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9224         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
9225
9226         /*
9227          *      Check for copies of zero bytes.
9228          */
9229
9230         if (len == 0) {
9231                 *copy_result = VM_MAP_COPY_NULL;
9232                 return(KERN_SUCCESS);
9233         }
9234
9235         /*
9236          *      Check that the end address doesn't overflow
9237          */
9238         src_end = src_addr + len;
9239         if (src_end < src_addr)
9240                 return KERN_INVALID_ADDRESS;
9241
9242         /*
9243          * If the copy is sufficiently small, use a kernel buffer instead
9244          * of making a virtual copy.  The theory being that the cost of
9245          * setting up VM (and taking C-O-W faults) dominates the copy costs
9246          * for small regions.
9247          */
9248         if ((len < msg_ool_size_small) &&
9249             !use_maxprot &&
9250             !(flags & VM_MAP_COPYIN_ENTRY_LIST))
9251                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9252                                                    src_destroy, copy_result);
9253
9254         /*
9255          *      Compute (page aligned) start and end of region
9256          */
9257         src_start = vm_map_trunc_page(src_addr,
9258                                       VM_MAP_PAGE_MASK(src_map));
9259         src_end = vm_map_round_page(src_end,
9260                                     VM_MAP_PAGE_MASK(src_map));
9261
9262         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
9263
9264         /*
9265          *      Allocate a header element for the list.
9266          *
9267          *      Use the start and end in the header to
9268          *      remember the endpoints prior to rounding.
9269          */
9270
9271         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9272         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
9273         vm_map_copy_first_entry(copy) =
9274                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9275         copy->type = VM_MAP_COPY_ENTRY_LIST;
9276         copy->cpy_hdr.nentries = 0;
9277         copy->cpy_hdr.entries_pageable = TRUE;
9278 #if 00
9279         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9280 #else
9281         /*
9282          * The copy entries can be broken down for a variety of reasons,
9283          * so we can't guarantee that they will remain map-aligned...
9284          * Will need to adjust the first copy_entry's "vme_start" and
9285          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9286          * rather than the original map's alignment.
9287          */
9288         copy->cpy_hdr.page_shift = PAGE_SHIFT;
9289 #endif
9290
9291         vm_map_store_init( &(copy->cpy_hdr) );
9292
9293         copy->offset = src_addr;
9294         copy->size = len;
9295
9296         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9297
9298 #define RETURN(x)                                               \
9299         MACRO_BEGIN                                             \
9300         vm_map_unlock(src_map);                                 \
9301         if(src_map != base_map)                                 \
9302                 vm_map_deallocate(src_map);                     \
9303         if (new_entry != VM_MAP_ENTRY_NULL)                     \
9304                 vm_map_copy_entry_dispose(copy,new_entry);      \
9305         vm_map_copy_discard(copy);                              \
9306         {                                                       \
9307                 submap_map_t    *_ptr;                          \
9308                                                                 \
9309                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
9310                         parent_maps=parent_maps->next;          \
9311                         if (_ptr->parent_map != base_map)       \
9312                                 vm_map_deallocate(_ptr->parent_map);    \
9313                         kfree(_ptr, sizeof(submap_map_t));      \
9314                 }                                               \
9315         }                                                       \
9316         MACRO_RETURN(x);                                        \
9317         MACRO_END
9318
9319         /*
9320          *      Find the beginning of the region.
9321          */
9322
9323         vm_map_lock(src_map);
9324
9325         /*
9326          * Lookup the original "src_addr" rather than the truncated
9327          * "src_start", in case "src_start" falls in a non-map-aligned
9328          * map entry *before* the map entry that contains "src_addr"...
9329          */
9330         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
9331                 RETURN(KERN_INVALID_ADDRESS);
9332         if(!tmp_entry->is_sub_map) {
9333                 /*
9334                  * ... but clip to the map-rounded "src_start" rather than
9335                  * "src_addr" to preserve map-alignment.  We'll adjust the
9336                  * first copy entry at the end, if needed.
9337                  */
9338                 vm_map_clip_start(src_map, tmp_entry, src_start);
9339         }
9340         if (src_start < tmp_entry->vme_start) {
9341                 /*
9342                  * Move "src_start" up to the start of the
9343                  * first map entry to copy.
9344                  */
9345                 src_start = tmp_entry->vme_start;
9346         }
9347         /* set for later submap fix-up */
9348         copy_addr = src_start;
9349
9350         /*
9351          *      Go through entries until we get to the end.
9352          */
9353
9354         while (TRUE) {
9355                 register
9356                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
9357                 vm_map_size_t   src_size;               /* Size of source
9358                                                          * map entry (in both
9359                                                          * maps)
9360                                                          */
9361
9362                 register
9363                 vm_object_t             src_object;     /* Object to copy */
9364                 vm_object_offset_t      src_offset;
9365
9366                 boolean_t       src_needs_copy;         /* Should source map
9367                                                          * be made read-only
9368                                                          * for copy-on-write?
9369                                                          */
9370
9371                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
9372
9373                 boolean_t       was_wired;              /* Was source wired? */
9374                 vm_map_version_t version;               /* Version before locks
9375                                                          * dropped to make copy
9376                                                          */
9377                 kern_return_t   result;                 /* Return value from
9378                                                          * copy_strategically.
9379                                                          */
9380                 while(tmp_entry->is_sub_map) {
9381                         vm_map_size_t submap_len;
9382                         submap_map_t *ptr;
9383
9384                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9385                         ptr->next = parent_maps;
9386                         parent_maps = ptr;
9387                         ptr->parent_map = src_map;
9388                         ptr->base_start = src_start;
9389                         ptr->base_end = src_end;
9390                         submap_len = tmp_entry->vme_end - src_start;
9391                         if(submap_len > (src_end-src_start))
9392                                 submap_len = src_end-src_start;
9393                         ptr->base_len = submap_len;
9394
9395                         src_start -= tmp_entry->vme_start;
9396                         src_start += VME_OFFSET(tmp_entry);
9397                         src_end = src_start + submap_len;
9398                         src_map = VME_SUBMAP(tmp_entry);
9399                         vm_map_lock(src_map);
9400                         /* keep an outstanding reference for all maps in */
9401                         /* the parents tree except the base map */
9402                         vm_map_reference(src_map);
9403                         vm_map_unlock(ptr->parent_map);
9404                         if (!vm_map_lookup_entry(
9405                                     src_map, src_start, &tmp_entry))
9406                                 RETURN(KERN_INVALID_ADDRESS);
9407                         map_share = TRUE;
9408                         if(!tmp_entry->is_sub_map)
9409                                 vm_map_clip_start(src_map, tmp_entry, src_start);
9410                         src_entry = tmp_entry;
9411                 }
9412                 /* we are now in the lowest level submap... */
9413
9414                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9415                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
9416                         /* This is not, supported for now.In future */
9417                         /* we will need to detect the phys_contig   */
9418                         /* condition and then upgrade copy_slowly   */
9419                         /* to do physical copy from the device mem  */
9420                         /* based object. We can piggy-back off of   */
9421                         /* the was wired boolean to set-up the      */
9422                         /* proper handling */
9423                         RETURN(KERN_PROTECTION_FAILURE);
9424                 }
9425                 /*
9426                  *      Create a new address map entry to hold the result.
9427                  *      Fill in the fields from the appropriate source entries.
9428                  *      We must unlock the source map to do this if we need
9429                  *      to allocate a map entry.
9430                  */
9431                 if (new_entry == VM_MAP_ENTRY_NULL) {
9432                         version.main_timestamp = src_map->timestamp;
9433                         vm_map_unlock(src_map);
9434
9435                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9436
9437                         vm_map_lock(src_map);
9438                         if ((version.main_timestamp + 1) != src_map->timestamp) {
9439                                 if (!vm_map_lookup_entry(src_map, src_start,
9440                                                          &tmp_entry)) {
9441                                         RETURN(KERN_INVALID_ADDRESS);
9442                                 }
9443                                 if (!tmp_entry->is_sub_map)
9444                                         vm_map_clip_start(src_map, tmp_entry, src_start);
9445                                 continue; /* restart w/ new tmp_entry */
9446                         }
9447                 }
9448
9449                 /*
9450                  *      Verify that the region can be read.
9451                  */
9452                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
9453                      !use_maxprot) ||
9454                     (src_entry->max_protection & VM_PROT_READ) == 0)
9455                         RETURN(KERN_PROTECTION_FAILURE);
9456
9457                 /*
9458                  *      Clip against the endpoints of the entire region.
9459                  */
9460
9461                 vm_map_clip_end(src_map, src_entry, src_end);
9462
9463                 src_size = src_entry->vme_end - src_start;
9464                 src_object = VME_OBJECT(src_entry);
9465                 src_offset = VME_OFFSET(src_entry);
9466                 was_wired = (src_entry->wired_count != 0);
9467
9468                 vm_map_entry_copy(new_entry, src_entry);
9469                 if (new_entry->is_sub_map) {
9470                         /* clr address space specifics */
9471                         new_entry->use_pmap = FALSE;
9472                 }
9473
9474                 /*
9475                  *      Attempt non-blocking copy-on-write optimizations.
9476                  */
9477
9478                 if (src_destroy &&
9479                     (src_object == VM_OBJECT_NULL ||
9480                      (src_object->internal && !src_object->true_share
9481                       && !map_share))) {
9482                         /*
9483                          * If we are destroying the source, and the object
9484                          * is internal, we can move the object reference
9485                          * from the source to the copy.  The copy is
9486                          * copy-on-write only if the source is.
9487                          * We make another reference to the object, because
9488                          * destroying the source entry will deallocate it.
9489                          */
9490                         vm_object_reference(src_object);
9491
9492                         /*
9493                          * Copy is always unwired.  vm_map_copy_entry
9494                          * set its wired count to zero.
9495                          */
9496
9497                         goto CopySuccessful;
9498                 }
9499
9500
9501         RestartCopy:
9502                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
9503                     src_object, new_entry, VME_OBJECT(new_entry),
9504                     was_wired, 0);
9505                 if ((src_object == VM_OBJECT_NULL ||
9506                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9507                     vm_object_copy_quickly(
9508                             &VME_OBJECT(new_entry),
9509                             src_offset,
9510                             src_size,
9511                             &src_needs_copy,
9512                             &new_entry_needs_copy)) {
9513
9514                         new_entry->needs_copy = new_entry_needs_copy;
9515
9516                         /*
9517                          *      Handle copy-on-write obligations
9518                          */
9519
9520                         if (src_needs_copy && !tmp_entry->needs_copy) {
9521                                 vm_prot_t prot;
9522
9523                                 prot = src_entry->protection & ~VM_PROT_WRITE;
9524
9525                                 if (override_nx(src_map, VME_ALIAS(src_entry))
9526                                     && prot)
9527                                         prot |= VM_PROT_EXECUTE;
9528
9529                                 vm_object_pmap_protect(
9530                                         src_object,
9531                                         src_offset,
9532                                         src_size,
9533                                         (src_entry->is_shared ?
9534                                          PMAP_NULL
9535                                          : src_map->pmap),
9536                                         src_entry->vme_start,
9537                                         prot);
9538
9539                                 assert(tmp_entry->wired_count == 0);
9540                                 tmp_entry->needs_copy = TRUE;
9541                         }
9542
9543                         /*
9544                          *      The map has never been unlocked, so it's safe
9545                          *      to move to the next entry rather than doing
9546                          *      another lookup.
9547                          */
9548
9549                         goto CopySuccessful;
9550                 }
9551
9552                 /*
9553                  *      Take an object reference, so that we may
9554                  *      release the map lock(s).
9555                  */
9556
9557                 assert(src_object != VM_OBJECT_NULL);
9558                 vm_object_reference(src_object);
9559
9560                 /*
9561                  *      Record the timestamp for later verification.
9562                  *      Unlock the map.
9563                  */
9564
9565                 version.main_timestamp = src_map->timestamp;
9566                 vm_map_unlock(src_map); /* Increments timestamp once! */
9567
9568                 /*
9569                  *      Perform the copy
9570                  */
9571
9572                 if (was_wired) {
9573                 CopySlowly:
9574                         vm_object_lock(src_object);
9575                         result = vm_object_copy_slowly(
9576                                 src_object,
9577                                 src_offset,
9578                                 src_size,
9579                                 THREAD_UNINT,
9580                                 &VME_OBJECT(new_entry));
9581                         VME_OFFSET_SET(new_entry, 0);
9582                         new_entry->needs_copy = FALSE;
9583
9584                 }
9585                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9586                          (tmp_entry->is_shared  || map_share)) {
9587                         vm_object_t new_object;
9588
9589                         vm_object_lock_shared(src_object);
9590                         new_object = vm_object_copy_delayed(
9591                                 src_object,
9592                                 src_offset,
9593                                 src_size,
9594                                 TRUE);
9595                         if (new_object == VM_OBJECT_NULL)
9596                                 goto CopySlowly;
9597
9598                         VME_OBJECT_SET(new_entry, new_object);
9599                         assert(new_entry->wired_count == 0);
9600                         new_entry->needs_copy = TRUE;
9601                         assert(!new_entry->iokit_acct);
9602                         assert(new_object->purgable == VM_PURGABLE_DENY);
9603                         new_entry->use_pmap = TRUE;
9604                         result = KERN_SUCCESS;
9605
9606                 } else {
9607                         vm_object_offset_t new_offset;
9608                         new_offset = VME_OFFSET(new_entry);
9609                         result = vm_object_copy_strategically(src_object,
9610                                                               src_offset,
9611                                                               src_size,
9612                                                               &VME_OBJECT(new_entry),
9613                                                               &new_offset,
9614                                                               &new_entry_needs_copy);
9615                         if (new_offset != VME_OFFSET(new_entry)) {
9616                                 VME_OFFSET_SET(new_entry, new_offset);
9617                         }
9618
9619                         new_entry->needs_copy = new_entry_needs_copy;
9620                 }
9621
9622                 if (result != KERN_SUCCESS &&
9623                     result != KERN_MEMORY_RESTART_COPY) {
9624                         vm_map_lock(src_map);
9625                         RETURN(result);
9626                 }
9627
9628                 /*
9629                  *      Throw away the extra reference
9630                  */
9631
9632                 vm_object_deallocate(src_object);
9633
9634                 /*
9635                  *      Verify that the map has not substantially
9636                  *      changed while the copy was being made.
9637                  */
9638
9639                 vm_map_lock(src_map);
9640
9641                 if ((version.main_timestamp + 1) == src_map->timestamp)
9642                         goto VerificationSuccessful;
9643
9644                 /*
9645                  *      Simple version comparison failed.
9646                  *
9647                  *      Retry the lookup and verify that the
9648                  *      same object/offset are still present.
9649                  *
9650                  *      [Note: a memory manager that colludes with
9651                  *      the calling task can detect that we have
9652                  *      cheated.  While the map was unlocked, the
9653                  *      mapping could have been changed and restored.]
9654                  */
9655
9656                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9657                         if (result != KERN_MEMORY_RESTART_COPY) {
9658                                 vm_object_deallocate(VME_OBJECT(new_entry));
9659                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
9660                                 assert(!new_entry->iokit_acct);
9661                                 new_entry->use_pmap = TRUE;
9662                         }
9663                         RETURN(KERN_INVALID_ADDRESS);
9664                 }
9665
9666                 src_entry = tmp_entry;
9667                 vm_map_clip_start(src_map, src_entry, src_start);
9668
9669                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9670                      !use_maxprot) ||
9671                     ((src_entry->max_protection & VM_PROT_READ) == 0))
9672                         goto VerificationFailed;
9673
9674                 if (src_entry->vme_end < new_entry->vme_end) {
9675                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9676                                                    VM_MAP_COPY_PAGE_MASK(copy)));
9677                         new_entry->vme_end = src_entry->vme_end;
9678                         src_size = new_entry->vme_end - src_start;
9679                 }
9680
9681                 if ((VME_OBJECT(src_entry) != src_object) ||
9682                     (VME_OFFSET(src_entry) != src_offset) ) {
9683
9684                         /*
9685                          *      Verification failed.
9686                          *
9687                          *      Start over with this top-level entry.
9688                          */
9689
9690                 VerificationFailed: ;
9691
9692                         vm_object_deallocate(VME_OBJECT(new_entry));
9693                         tmp_entry = src_entry;
9694                         continue;
9695                 }
9696
9697                 /*
9698                  *      Verification succeeded.
9699                  */
9700
9701         VerificationSuccessful: ;
9702
9703                 if (result == KERN_MEMORY_RESTART_COPY)
9704                         goto RestartCopy;
9705
9706                 /*
9707                  *      Copy succeeded.
9708                  */
9709
9710         CopySuccessful: ;
9711
9712                 /*
9713                  *      Link in the new copy entry.
9714                  */
9715
9716                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9717                                        new_entry);
9718
9719                 /*
9720                  *      Determine whether the entire region
9721                  *      has been copied.
9722                  */
9723                 src_base = src_start;
9724                 src_start = new_entry->vme_end;
9725                 new_entry = VM_MAP_ENTRY_NULL;
9726                 while ((src_start >= src_end) && (src_end != 0)) {
9727                         submap_map_t    *ptr;
9728
9729                         if (src_map == base_map) {
9730                                 /* back to the top */
9731                                 break;
9732                         }
9733
9734                         ptr = parent_maps;
9735                         assert(ptr != NULL);
9736                         parent_maps = parent_maps->next;
9737
9738                         /* fix up the damage we did in that submap */
9739                         vm_map_simplify_range(src_map,
9740                                               src_base,
9741                                               src_end);
9742
9743                         vm_map_unlock(src_map);
9744                         vm_map_deallocate(src_map);
9745                         vm_map_lock(ptr->parent_map);
9746                         src_map = ptr->parent_map;
9747                         src_base = ptr->base_start;
9748                         src_start = ptr->base_start + ptr->base_len;
9749                         src_end = ptr->base_end;
9750                         if (!vm_map_lookup_entry(src_map,
9751                                                  src_start,
9752                                                  &tmp_entry) &&
9753                             (src_end > src_start)) {
9754                                 RETURN(KERN_INVALID_ADDRESS);
9755                         }
9756                         kfree(ptr, sizeof(submap_map_t));
9757                         if (parent_maps == NULL)
9758                                 map_share = FALSE;
9759                         src_entry = tmp_entry->vme_prev;
9760                 }
9761
9762                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9763                     (src_start >= src_addr + len) &&
9764                     (src_addr + len != 0)) {
9765                         /*
9766                          * Stop copying now, even though we haven't reached
9767                          * "src_end".  We'll adjust the end of the last copy
9768                          * entry at the end, if needed.
9769                          *
9770                          * If src_map's aligment is different from the
9771                          * system's page-alignment, there could be
9772                          * extra non-map-aligned map entries between
9773                          * the original (non-rounded) "src_addr + len"
9774                          * and the rounded "src_end".
9775                          * We do not want to copy those map entries since
9776                          * they're not part of the copied range.
9777                          */
9778                         break;
9779                 }
9780
9781                 if ((src_start >= src_end) && (src_end != 0))
9782                         break;
9783
9784                 /*
9785                  *      Verify that there are no gaps in the region
9786                  */
9787
9788                 tmp_entry = src_entry->vme_next;
9789                 if ((tmp_entry->vme_start != src_start) ||
9790                     (tmp_entry == vm_map_to_entry(src_map))) {
9791                         RETURN(KERN_INVALID_ADDRESS);
9792                 }
9793         }
9794
9795         /*
9796          * If the source should be destroyed, do it now, since the
9797          * copy was successful.
9798          */
9799         if (src_destroy) {
9800                 (void) vm_map_delete(
9801                         src_map,
9802                         vm_map_trunc_page(src_addr,
9803                                           VM_MAP_PAGE_MASK(src_map)),
9804                         src_end,
9805                         ((src_map == kernel_map) ?
9806                          VM_MAP_REMOVE_KUNWIRE :
9807                          VM_MAP_NO_FLAGS),
9808                         VM_MAP_NULL);
9809         } else {
9810                 /* fix up the damage we did in the base map */
9811                 vm_map_simplify_range(
9812                         src_map,
9813                         vm_map_trunc_page(src_addr,
9814                                           VM_MAP_PAGE_MASK(src_map)),
9815                         vm_map_round_page(src_end,
9816                                           VM_MAP_PAGE_MASK(src_map)));
9817         }
9818
9819         vm_map_unlock(src_map);
9820
9821         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9822                 vm_map_offset_t original_start, original_offset, original_end;
9823
9824                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9825
9826                 /* adjust alignment of first copy_entry's "vme_start" */
9827                 tmp_entry = vm_map_copy_first_entry(copy);
9828                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9829                         vm_map_offset_t adjustment;
9830
9831                         original_start = tmp_entry->vme_start;
9832                         original_offset = VME_OFFSET(tmp_entry);
9833
9834                         /* map-align the start of the first copy entry... */
9835                         adjustment = (tmp_entry->vme_start -
9836                                       vm_map_trunc_page(
9837                                               tmp_entry->vme_start,
9838                                               VM_MAP_PAGE_MASK(src_map)));
9839                         tmp_entry->vme_start -= adjustment;
9840                         VME_OFFSET_SET(tmp_entry,
9841                                        VME_OFFSET(tmp_entry) - adjustment);
9842                         copy_addr -= adjustment;
9843                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
9844                         /* ... adjust for mis-aligned start of copy range */
9845                         adjustment =
9846                                 (vm_map_trunc_page(copy->offset,
9847                                                    PAGE_MASK) -
9848                                  vm_map_trunc_page(copy->offset,
9849                                                    VM_MAP_PAGE_MASK(src_map)));
9850                         if (adjustment) {
9851                                 assert(page_aligned(adjustment));
9852                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9853                                 tmp_entry->vme_start += adjustment;
9854                                 VME_OFFSET_SET(tmp_entry,
9855                                                (VME_OFFSET(tmp_entry) +
9856                                                 adjustment));
9857                                 copy_addr += adjustment;
9858                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9859                         }
9860
9861                         /*
9862                          * Assert that the adjustments haven't exposed
9863                          * more than was originally copied...
9864                          */
9865                         assert(tmp_entry->vme_start >= original_start);
9866                         assert(VME_OFFSET(tmp_entry) >= original_offset);
9867                         /*
9868                          * ... and that it did not adjust outside of a
9869                          * a single 16K page.
9870                          */
9871                         assert(vm_map_trunc_page(tmp_entry->vme_start,
9872                                                  VM_MAP_PAGE_MASK(src_map)) ==
9873                                vm_map_trunc_page(original_start,
9874                                                  VM_MAP_PAGE_MASK(src_map)));
9875                 }
9876
9877                 /* adjust alignment of last copy_entry's "vme_end" */
9878                 tmp_entry = vm_map_copy_last_entry(copy);
9879                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9880                         vm_map_offset_t adjustment;
9881
9882                         original_end = tmp_entry->vme_end;
9883
9884                         /* map-align the end of the last copy entry... */
9885                         tmp_entry->vme_end =
9886                                 vm_map_round_page(tmp_entry->vme_end,
9887                                                   VM_MAP_PAGE_MASK(src_map));
9888                         /* ... adjust for mis-aligned end of copy range */
9889                         adjustment =
9890                                 (vm_map_round_page((copy->offset +
9891                                                     copy->size),
9892                                                    VM_MAP_PAGE_MASK(src_map)) -
9893                                  vm_map_round_page((copy->offset +
9894                                                     copy->size),
9895                                                    PAGE_MASK));
9896                         if (adjustment) {
9897                                 assert(page_aligned(adjustment));
9898                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9899                                 tmp_entry->vme_end -= adjustment;
9900                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9901                         }
9902
9903                         /*
9904                          * Assert that the adjustments haven't exposed
9905                          * more than was originally copied...
9906                          */
9907                         assert(tmp_entry->vme_end <= original_end);
9908                         /*
9909                          * ... and that it did not adjust outside of a
9910                          * a single 16K page.
9911                          */
9912                         assert(vm_map_round_page(tmp_entry->vme_end,
9913                                                  VM_MAP_PAGE_MASK(src_map)) ==
9914                                vm_map_round_page(original_end,
9915                                                  VM_MAP_PAGE_MASK(src_map)));
9916                 }
9917         }
9918
9919         /* Fix-up start and end points in copy.  This is necessary */
9920         /* when the various entries in the copy object were picked */
9921         /* up from different sub-maps */
9922
9923         tmp_entry = vm_map_copy_first_entry(copy);
9924         copy_size = 0; /* compute actual size */
9925         while (tmp_entry != vm_map_copy_to_entry(copy)) {
9926                 assert(VM_MAP_PAGE_ALIGNED(
9927                                copy_addr + (tmp_entry->vme_end -
9928                                             tmp_entry->vme_start),
9929                                VM_MAP_COPY_PAGE_MASK(copy)));
9930                 assert(VM_MAP_PAGE_ALIGNED(
9931                                copy_addr,
9932                                VM_MAP_COPY_PAGE_MASK(copy)));
9933
9934                 /*
9935                  * The copy_entries will be injected directly into the
9936                  * destination map and might not be "map aligned" there...
9937                  */
9938                 tmp_entry->map_aligned = FALSE;
9939
9940                 tmp_entry->vme_end = copy_addr +
9941                         (tmp_entry->vme_end - tmp_entry->vme_start);
9942                 tmp_entry->vme_start = copy_addr;
9943                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9944                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9945                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9946                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9947         }
9948
9949         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9950             copy_size < copy->size) {
9951                 /*
9952                  * The actual size of the VM map copy is smaller than what
9953                  * was requested by the caller.  This must be because some
9954                  * PAGE_SIZE-sized pages are missing at the end of the last
9955                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9956                  * The caller might not have been aware of those missing
9957                  * pages and might not want to be aware of it, which is
9958                  * fine as long as they don't try to access (and crash on)
9959                  * those missing pages.
9960                  * Let's adjust the size of the "copy", to avoid failing
9961                  * in vm_map_copyout() or vm_map_copy_overwrite().
9962                  */
9963                 assert(vm_map_round_page(copy_size,
9964                                          VM_MAP_PAGE_MASK(src_map)) ==
9965                        vm_map_round_page(copy->size,
9966                                          VM_MAP_PAGE_MASK(src_map)));
9967                 copy->size = copy_size;
9968         }
9969
9970         *copy_result = copy;
9971         return(KERN_SUCCESS);
9972
9973 #undef  RETURN
9974 }
9975
9976 kern_return_t
9977 vm_map_copy_extract(
9978         vm_map_t                src_map,
9979         vm_map_address_t        src_addr,
9980         vm_map_size_t           len,
9981         vm_map_copy_t           *copy_result,   /* OUT */
9982         vm_prot_t               *cur_prot,      /* OUT */
9983         vm_prot_t               *max_prot)
9984 {
9985         vm_map_offset_t src_start, src_end;
9986         vm_map_copy_t   copy;
9987         kern_return_t   kr;
9988
9989         /*
9990          *      Check for copies of zero bytes.
9991          */
9992
9993         if (len == 0) {
9994                 *copy_result = VM_MAP_COPY_NULL;
9995                 return(KERN_SUCCESS);
9996         }
9997
9998         /*
9999          *      Check that the end address doesn't overflow
10000          */
10001         src_end = src_addr + len;
10002         if (src_end < src_addr)
10003                 return KERN_INVALID_ADDRESS;
10004
10005         /*
10006          *      Compute (page aligned) start and end of region
10007          */
10008         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10009         src_end = vm_map_round_page(src_end, PAGE_MASK);
10010
10011         /*
10012          *      Allocate a header element for the list.
10013          *
10014          *      Use the start and end in the header to
10015          *      remember the endpoints prior to rounding.
10016          */
10017
10018         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10019         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10020         vm_map_copy_first_entry(copy) =
10021                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10022         copy->type = VM_MAP_COPY_ENTRY_LIST;
10023         copy->cpy_hdr.nentries = 0;
10024         copy->cpy_hdr.entries_pageable = TRUE;
10025
10026         vm_map_store_init(&copy->cpy_hdr);
10027
10028         copy->offset = 0;
10029         copy->size = len;
10030
10031         kr = vm_map_remap_extract(src_map,
10032                                   src_addr,
10033                                   len,
10034                                   FALSE, /* copy */
10035                                   &copy->cpy_hdr,
10036                                   cur_prot,
10037                                   max_prot,
10038                                   VM_INHERIT_SHARE,
10039                                   TRUE); /* pageable */
10040         if (kr != KERN_SUCCESS) {
10041                 vm_map_copy_discard(copy);
10042                 return kr;
10043         }
10044
10045         *copy_result = copy;
10046         return KERN_SUCCESS;
10047 }
10048
10049 /*
10050  *      vm_map_copyin_object:
10051  *
10052  *      Create a copy object from an object.
10053  *      Our caller donates an object reference.
10054  */
10055
10056 kern_return_t
10057 vm_map_copyin_object(
10058         vm_object_t             object,
10059         vm_object_offset_t      offset, /* offset of region in object */
10060         vm_object_size_t        size,   /* size of region in object */
10061         vm_map_copy_t   *copy_result)   /* OUT */
10062 {
10063         vm_map_copy_t   copy;           /* Resulting copy */
10064
10065         /*
10066          *      We drop the object into a special copy object
10067          *      that contains the object directly.
10068          */
10069
10070         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10071         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10072         copy->type = VM_MAP_COPY_OBJECT;
10073         copy->cpy_object = object;
10074         copy->offset = offset;
10075         copy->size = size;
10076
10077         *copy_result = copy;
10078         return(KERN_SUCCESS);
10079 }
10080
10081 static void
10082 vm_map_fork_share(
10083         vm_map_t        old_map,
10084         vm_map_entry_t  old_entry,
10085         vm_map_t        new_map)
10086 {
10087         vm_object_t     object;
10088         vm_map_entry_t  new_entry;
10089
10090         /*
10091          *      New sharing code.  New map entry
10092          *      references original object.  Internal
10093          *      objects use asynchronous copy algorithm for
10094          *      future copies.  First make sure we have
10095          *      the right object.  If we need a shadow,
10096          *      or someone else already has one, then
10097          *      make a new shadow and share it.
10098          */
10099
10100         object = VME_OBJECT(old_entry);
10101         if (old_entry->is_sub_map) {
10102                 assert(old_entry->wired_count == 0);
10103 #ifndef NO_NESTED_PMAP
10104                 if(old_entry->use_pmap) {
10105                         kern_return_t   result;
10106
10107                         result = pmap_nest(new_map->pmap,
10108                                            (VME_SUBMAP(old_entry))->pmap,
10109                                            (addr64_t)old_entry->vme_start,
10110                                            (addr64_t)old_entry->vme_start,
10111                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
10112                         if(result)
10113                                 panic("vm_map_fork_share: pmap_nest failed!");
10114                 }
10115 #endif  /* NO_NESTED_PMAP */
10116         } else if (object == VM_OBJECT_NULL) {
10117                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
10118                                                             old_entry->vme_start));
10119                 VME_OFFSET_SET(old_entry, 0);
10120                 VME_OBJECT_SET(old_entry, object);
10121                 old_entry->use_pmap = TRUE;
10122                 assert(!old_entry->needs_copy);
10123         } else if (object->copy_strategy !=
10124                    MEMORY_OBJECT_COPY_SYMMETRIC) {
10125
10126                 /*
10127                  *      We are already using an asymmetric
10128                  *      copy, and therefore we already have
10129                  *      the right object.
10130                  */
10131
10132                 assert(! old_entry->needs_copy);
10133         }
10134         else if (old_entry->needs_copy ||       /* case 1 */
10135                  object->shadowed ||            /* case 2 */
10136                  (!object->true_share &&        /* case 3 */
10137                   !old_entry->is_shared &&
10138                   (object->vo_size >
10139                    (vm_map_size_t)(old_entry->vme_end -
10140                                    old_entry->vme_start)))) {
10141
10142                 /*
10143                  *      We need to create a shadow.
10144                  *      There are three cases here.
10145                  *      In the first case, we need to
10146                  *      complete a deferred symmetrical
10147                  *      copy that we participated in.
10148                  *      In the second and third cases,
10149                  *      we need to create the shadow so
10150                  *      that changes that we make to the
10151                  *      object do not interfere with
10152                  *      any symmetrical copies which
10153                  *      have occured (case 2) or which
10154                  *      might occur (case 3).
10155                  *
10156                  *      The first case is when we had
10157                  *      deferred shadow object creation
10158                  *      via the entry->needs_copy mechanism.
10159                  *      This mechanism only works when
10160                  *      only one entry points to the source
10161                  *      object, and we are about to create
10162                  *      a second entry pointing to the
10163                  *      same object. The problem is that
10164                  *      there is no way of mapping from
10165                  *      an object to the entries pointing
10166                  *      to it. (Deferred shadow creation
10167                  *      works with one entry because occurs
10168                  *      at fault time, and we walk from the
10169                  *      entry to the object when handling
10170                  *      the fault.)
10171                  *
10172                  *      The second case is when the object
10173                  *      to be shared has already been copied
10174                  *      with a symmetric copy, but we point
10175                  *      directly to the object without
10176                  *      needs_copy set in our entry. (This
10177                  *      can happen because different ranges
10178                  *      of an object can be pointed to by
10179                  *      different entries. In particular,
10180                  *      a single entry pointing to an object
10181                  *      can be split by a call to vm_inherit,
10182                  *      which, combined with task_create, can
10183                  *      result in the different entries
10184                  *      having different needs_copy values.)
10185                  *      The shadowed flag in the object allows
10186                  *      us to detect this case. The problem
10187                  *      with this case is that if this object
10188                  *      has or will have shadows, then we
10189                  *      must not perform an asymmetric copy
10190                  *      of this object, since such a copy
10191                  *      allows the object to be changed, which
10192                  *      will break the previous symmetrical
10193                  *      copies (which rely upon the object
10194                  *      not changing). In a sense, the shadowed
10195                  *      flag says "don't change this object".
10196                  *      We fix this by creating a shadow
10197                  *      object for this object, and sharing
10198                  *      that. This works because we are free
10199                  *      to change the shadow object (and thus
10200                  *      to use an asymmetric copy strategy);
10201                  *      this is also semantically correct,
10202                  *      since this object is temporary, and
10203                  *      therefore a copy of the object is
10204                  *      as good as the object itself. (This
10205                  *      is not true for permanent objects,
10206                  *      since the pager needs to see changes,
10207                  *      which won't happen if the changes
10208                  *      are made to a copy.)
10209                  *
10210                  *      The third case is when the object
10211                  *      to be shared has parts sticking
10212                  *      outside of the entry we're working
10213                  *      with, and thus may in the future
10214                  *      be subject to a symmetrical copy.
10215                  *      (This is a preemptive version of
10216                  *      case 2.)
10217                  */
10218                 VME_OBJECT_SHADOW(old_entry,
10219                                   (vm_map_size_t) (old_entry->vme_end -
10220                                                    old_entry->vme_start));
10221
10222                 /*
10223                  *      If we're making a shadow for other than
10224                  *      copy on write reasons, then we have
10225                  *      to remove write permission.
10226                  */
10227
10228                 if (!old_entry->needs_copy &&
10229                     (old_entry->protection & VM_PROT_WRITE)) {
10230                         vm_prot_t prot;
10231
10232                         prot = old_entry->protection & ~VM_PROT_WRITE;
10233
10234                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
10235                                 prot |= VM_PROT_EXECUTE;
10236
10237                         if (old_map->mapped_in_other_pmaps) {
10238                                 vm_object_pmap_protect(
10239                                         VME_OBJECT(old_entry),
10240                                         VME_OFFSET(old_entry),
10241                                         (old_entry->vme_end -
10242                                          old_entry->vme_start),
10243                                         PMAP_NULL,
10244                                         old_entry->vme_start,
10245                                         prot);
10246                         } else {
10247                                 pmap_protect(old_map->pmap,
10248                                              old_entry->vme_start,
10249                                              old_entry->vme_end,
10250                                              prot);
10251                         }
10252                 }
10253
10254                 old_entry->needs_copy = FALSE;
10255                 object = VME_OBJECT(old_entry);
10256         }
10257
10258
10259         /*
10260          *      If object was using a symmetric copy strategy,
10261          *      change its copy strategy to the default
10262          *      asymmetric copy strategy, which is copy_delay
10263          *      in the non-norma case and copy_call in the
10264          *      norma case. Bump the reference count for the
10265          *      new entry.
10266          */
10267
10268         if(old_entry->is_sub_map) {
10269                 vm_map_lock(VME_SUBMAP(old_entry));
10270                 vm_map_reference(VME_SUBMAP(old_entry));
10271                 vm_map_unlock(VME_SUBMAP(old_entry));
10272         } else {
10273                 vm_object_lock(object);
10274                 vm_object_reference_locked(object);
10275                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10276                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10277                 }
10278                 vm_object_unlock(object);
10279         }
10280
10281         /*
10282          *      Clone the entry, using object ref from above.
10283          *      Mark both entries as shared.
10284          */
10285
10286         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10287                                                           * map or descendants */
10288         vm_map_entry_copy(new_entry, old_entry);
10289         old_entry->is_shared = TRUE;
10290         new_entry->is_shared = TRUE;
10291
10292         /*
10293          *      Insert the entry into the new map -- we
10294          *      know we're inserting at the end of the new
10295          *      map.
10296          */
10297
10298         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
10299
10300         /*
10301          *      Update the physical map
10302          */
10303
10304         if (old_entry->is_sub_map) {
10305                 /* Bill Angell pmap support goes here */
10306         } else {
10307                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
10308                           old_entry->vme_end - old_entry->vme_start,
10309                           old_entry->vme_start);
10310         }
10311 }
10312
10313 static boolean_t
10314 vm_map_fork_copy(
10315         vm_map_t        old_map,
10316         vm_map_entry_t  *old_entry_p,
10317         vm_map_t        new_map)
10318 {
10319         vm_map_entry_t old_entry = *old_entry_p;
10320         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10321         vm_map_offset_t start = old_entry->vme_start;
10322         vm_map_copy_t copy;
10323         vm_map_entry_t last = vm_map_last_entry(new_map);
10324
10325         vm_map_unlock(old_map);
10326         /*
10327          *      Use maxprot version of copyin because we
10328          *      care about whether this memory can ever
10329          *      be accessed, not just whether it's accessible
10330          *      right now.
10331          */
10332         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
10333             != KERN_SUCCESS) {
10334                 /*
10335                  *      The map might have changed while it
10336                  *      was unlocked, check it again.  Skip
10337                  *      any blank space or permanently
10338                  *      unreadable region.
10339                  */
10340                 vm_map_lock(old_map);
10341                 if (!vm_map_lookup_entry(old_map, start, &last) ||
10342                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
10343                         last = last->vme_next;
10344                 }
10345                 *old_entry_p = last;
10346
10347                 /*
10348                  * XXX  For some error returns, want to
10349                  * XXX  skip to the next element.  Note
10350                  *      that INVALID_ADDRESS and
10351                  *      PROTECTION_FAILURE are handled above.
10352                  */
10353
10354                 return FALSE;
10355         }
10356
10357         /*
10358          *      Insert the copy into the new map
10359          */
10360
10361         vm_map_copy_insert(new_map, last, copy);
10362
10363         /*
10364          *      Pick up the traversal at the end of
10365          *      the copied region.
10366          */
10367
10368         vm_map_lock(old_map);
10369         start += entry_size;
10370         if (! vm_map_lookup_entry(old_map, start, &last)) {
10371                 last = last->vme_next;
10372         } else {
10373                 if (last->vme_start == start) {
10374                         /*
10375                          * No need to clip here and we don't
10376                          * want to cause any unnecessary
10377                          * unnesting...
10378                          */
10379                 } else {
10380                         vm_map_clip_start(old_map, last, start);
10381                 }
10382         }
10383         *old_entry_p = last;
10384
10385         return TRUE;
10386 }
10387
10388 /*
10389  *      vm_map_fork:
10390  *
10391  *      Create and return a new map based on the old
10392  *      map, according to the inheritance values on the
10393  *      regions in that map.
10394  *
10395  *      The source map must not be locked.
10396  */
10397 vm_map_t
10398 vm_map_fork(
10399         ledger_t        ledger,
10400         vm_map_t        old_map)
10401 {
10402         pmap_t          new_pmap;
10403         vm_map_t        new_map;
10404         vm_map_entry_t  old_entry;
10405         vm_map_size_t   new_size = 0, entry_size;
10406         vm_map_entry_t  new_entry;
10407         boolean_t       src_needs_copy;
10408         boolean_t       new_entry_needs_copy;
10409         boolean_t       pmap_is64bit;
10410
10411         pmap_is64bit =
10412 #if defined(__i386__) || defined(__x86_64__)
10413                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
10414 #else
10415 #error Unknown architecture.
10416 #endif
10417
10418         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
10419
10420         vm_map_reference_swap(old_map);
10421         vm_map_lock(old_map);
10422
10423         new_map = vm_map_create(new_pmap,
10424                                 old_map->min_offset,
10425                                 old_map->max_offset,
10426                                 old_map->hdr.entries_pageable);
10427         /* inherit the parent map's page size */
10428         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
10429         for (
10430                 old_entry = vm_map_first_entry(old_map);
10431                 old_entry != vm_map_to_entry(old_map);
10432                 ) {
10433
10434                 entry_size = old_entry->vme_end - old_entry->vme_start;
10435
10436                 switch (old_entry->inheritance) {
10437                 case VM_INHERIT_NONE:
10438                         break;
10439
10440                 case VM_INHERIT_SHARE:
10441                         vm_map_fork_share(old_map, old_entry, new_map);
10442                         new_size += entry_size;
10443                         break;
10444
10445                 case VM_INHERIT_COPY:
10446
10447                         /*
10448                          *      Inline the copy_quickly case;
10449                          *      upon failure, fall back on call
10450                          *      to vm_map_fork_copy.
10451                          */
10452
10453                         if(old_entry->is_sub_map)
10454                                 break;
10455                         if ((old_entry->wired_count != 0) ||
10456                             ((VME_OBJECT(old_entry) != NULL) &&
10457                              (VME_OBJECT(old_entry)->true_share))) {
10458                                 goto slow_vm_map_fork_copy;
10459                         }
10460
10461                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
10462                         vm_map_entry_copy(new_entry, old_entry);
10463                         if (new_entry->is_sub_map) {
10464                                 /* clear address space specifics */
10465                                 new_entry->use_pmap = FALSE;
10466                         }
10467
10468                         if (! vm_object_copy_quickly(
10469                                     &VME_OBJECT(new_entry),
10470                                     VME_OFFSET(old_entry),
10471                                     (old_entry->vme_end -
10472                                      old_entry->vme_start),
10473                                     &src_needs_copy,
10474                                     &new_entry_needs_copy)) {
10475                                 vm_map_entry_dispose(new_map, new_entry);
10476                                 goto slow_vm_map_fork_copy;
10477                         }
10478
10479                         /*
10480                          *      Handle copy-on-write obligations
10481                          */
10482
10483                         if (src_needs_copy && !old_entry->needs_copy) {
10484                                 vm_prot_t prot;
10485
10486                                 prot = old_entry->protection & ~VM_PROT_WRITE;
10487
10488                                 if (override_nx(old_map, VME_ALIAS(old_entry))
10489                                     && prot)
10490                                         prot |= VM_PROT_EXECUTE;
10491
10492                                 vm_object_pmap_protect(
10493                                         VME_OBJECT(old_entry),
10494                                         VME_OFFSET(old_entry),
10495                                         (old_entry->vme_end -
10496                                          old_entry->vme_start),
10497                                         ((old_entry->is_shared
10498                                           || old_map->mapped_in_other_pmaps)
10499                                          ? PMAP_NULL :
10500                                          old_map->pmap),
10501                                         old_entry->vme_start,
10502                                         prot);
10503
10504                                 assert(old_entry->wired_count == 0);
10505                                 old_entry->needs_copy = TRUE;
10506                         }
10507                         new_entry->needs_copy = new_entry_needs_copy;
10508
10509                         /*
10510                          *      Insert the entry at the end
10511                          *      of the map.
10512                          */
10513
10514                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
10515                                           new_entry);
10516                         new_size += entry_size;
10517                         break;
10518
10519                 slow_vm_map_fork_copy:
10520                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
10521                                 new_size += entry_size;
10522                         }
10523                         continue;
10524                 }
10525                 old_entry = old_entry->vme_next;
10526         }
10527
10528
10529         new_map->size = new_size;
10530         vm_map_unlock(old_map);
10531         vm_map_deallocate(old_map);
10532
10533         return(new_map);
10534 }
10535
10536 /*
10537  * vm_map_exec:
10538  *
10539  *      Setup the "new_map" with the proper execution environment according
10540  *      to the type of executable (platform, 64bit, chroot environment).
10541  *      Map the comm page and shared region, etc...
10542  */
10543 kern_return_t
10544 vm_map_exec(
10545         vm_map_t        new_map,
10546         task_t          task,
10547         void            *fsroot,
10548         cpu_type_t      cpu)
10549 {
10550         SHARED_REGION_TRACE_DEBUG(
10551                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
10552                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10553                  (void *)VM_KERNEL_ADDRPERM(new_map),
10554                  (void *)VM_KERNEL_ADDRPERM(task),
10555                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10556                  cpu));
10557         (void) vm_commpage_enter(new_map, task);
10558         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
10559         SHARED_REGION_TRACE_DEBUG(
10560                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
10561                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10562                  (void *)VM_KERNEL_ADDRPERM(new_map),
10563                  (void *)VM_KERNEL_ADDRPERM(task),
10564                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10565                  cpu));
10566         return KERN_SUCCESS;
10567 }
10568
10569 /*
10570  *      vm_map_lookup_locked:
10571  *
10572  *      Finds the VM object, offset, and
10573  *      protection for a given virtual address in the
10574  *      specified map, assuming a page fault of the
10575  *      type specified.
10576  *
10577  *      Returns the (object, offset, protection) for
10578  *      this address, whether it is wired down, and whether
10579  *      this map has the only reference to the data in question.
10580  *      In order to later verify this lookup, a "version"
10581  *      is returned.
10582  *
10583  *      The map MUST be locked by the caller and WILL be
10584  *      locked on exit.  In order to guarantee the
10585  *      existence of the returned object, it is returned
10586  *      locked.
10587  *
10588  *      If a lookup is requested with "write protection"
10589  *      specified, the map may be changed to perform virtual
10590  *      copying operations, although the data referenced will
10591  *      remain the same.
10592  */
10593 kern_return_t
10594 vm_map_lookup_locked(
10595         vm_map_t                *var_map,       /* IN/OUT */
10596         vm_map_offset_t         vaddr,
10597         vm_prot_t               fault_type,
10598         int                     object_lock_type,
10599         vm_map_version_t        *out_version,   /* OUT */
10600         vm_object_t             *object,        /* OUT */
10601         vm_object_offset_t      *offset,        /* OUT */
10602         vm_prot_t               *out_prot,      /* OUT */
10603         boolean_t               *wired,         /* OUT */
10604         vm_object_fault_info_t  fault_info,     /* OUT */
10605         vm_map_t                *real_map)
10606 {
10607         vm_map_entry_t                  entry;
10608         register vm_map_t               map = *var_map;
10609         vm_map_t                        old_map = *var_map;
10610         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
10611         vm_map_offset_t                 cow_parent_vaddr = 0;
10612         vm_map_offset_t                 old_start = 0;
10613         vm_map_offset_t                 old_end = 0;
10614         register vm_prot_t              prot;
10615         boolean_t                       mask_protections;
10616         boolean_t                       force_copy;
10617         vm_prot_t                       original_fault_type;
10618
10619         /*
10620          * VM_PROT_MASK means that the caller wants us to use "fault_type"
10621          * as a mask against the mapping's actual protections, not as an
10622          * absolute value.
10623          */
10624         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
10625         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10626         fault_type &= VM_PROT_ALL;
10627         original_fault_type = fault_type;
10628
10629         *real_map = map;
10630
10631 RetryLookup:
10632         fault_type = original_fault_type;
10633
10634         /*
10635          *      If the map has an interesting hint, try it before calling
10636          *      full blown lookup routine.
10637          */
10638         entry = map->hint;
10639
10640         if ((entry == vm_map_to_entry(map)) ||
10641             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10642                 vm_map_entry_t  tmp_entry;
10643
10644                 /*
10645                  *      Entry was either not a valid hint, or the vaddr
10646                  *      was not contained in the entry, so do a full lookup.
10647                  */
10648                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10649                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10650                                 vm_map_unlock(cow_sub_map_parent);
10651                         if((*real_map != map)
10652                            && (*real_map != cow_sub_map_parent))
10653                                 vm_map_unlock(*real_map);
10654                         return KERN_INVALID_ADDRESS;
10655                 }
10656
10657                 entry = tmp_entry;
10658         }
10659         if(map == old_map) {
10660                 old_start = entry->vme_start;
10661                 old_end = entry->vme_end;
10662         }
10663
10664         /*
10665          *      Handle submaps.  Drop lock on upper map, submap is
10666          *      returned locked.
10667          */
10668
10669 submap_recurse:
10670         if (entry->is_sub_map) {
10671                 vm_map_offset_t         local_vaddr;
10672                 vm_map_offset_t         end_delta;
10673                 vm_map_offset_t         start_delta;
10674                 vm_map_entry_t          submap_entry;
10675                 boolean_t               mapped_needs_copy=FALSE;
10676
10677                 local_vaddr = vaddr;
10678
10679                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10680                         /* if real_map equals map we unlock below */
10681                         if ((*real_map != map) &&
10682                             (*real_map != cow_sub_map_parent))
10683                                 vm_map_unlock(*real_map);
10684                         *real_map = VME_SUBMAP(entry);
10685                 }
10686
10687                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10688                         if (!mapped_needs_copy) {
10689                                 if (vm_map_lock_read_to_write(map)) {
10690                                         vm_map_lock_read(map);
10691                                         *real_map = map;
10692                                         goto RetryLookup;
10693                                 }
10694                                 vm_map_lock_read(VME_SUBMAP(entry));
10695                                 *var_map = VME_SUBMAP(entry);
10696                                 cow_sub_map_parent = map;
10697                                 /* reset base to map before cow object */
10698                                 /* this is the map which will accept   */
10699                                 /* the new cow object */
10700                                 old_start = entry->vme_start;
10701                                 old_end = entry->vme_end;
10702                                 cow_parent_vaddr = vaddr;
10703                                 mapped_needs_copy = TRUE;
10704                         } else {
10705                                 vm_map_lock_read(VME_SUBMAP(entry));
10706                                 *var_map = VME_SUBMAP(entry);
10707                                 if((cow_sub_map_parent != map) &&
10708                                    (*real_map != map))
10709                                         vm_map_unlock(map);
10710                         }
10711                 } else {
10712                         vm_map_lock_read(VME_SUBMAP(entry));
10713                         *var_map = VME_SUBMAP(entry);
10714                         /* leave map locked if it is a target */
10715                         /* cow sub_map above otherwise, just  */
10716                         /* follow the maps down to the object */
10717                         /* here we unlock knowing we are not  */
10718                         /* revisiting the map.  */
10719                         if((*real_map != map) && (map != cow_sub_map_parent))
10720                                 vm_map_unlock_read(map);
10721                 }
10722
10723                 map = *var_map;
10724
10725                 /* calculate the offset in the submap for vaddr */
10726                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
10727
10728         RetrySubMap:
10729                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10730                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10731                                 vm_map_unlock(cow_sub_map_parent);
10732                         }
10733                         if((*real_map != map)
10734                            && (*real_map != cow_sub_map_parent)) {
10735                                 vm_map_unlock(*real_map);
10736                         }
10737                         *real_map = map;
10738                         return KERN_INVALID_ADDRESS;
10739                 }
10740
10741                 /* find the attenuated shadow of the underlying object */
10742                 /* on our target map */
10743
10744                 /* in english the submap object may extend beyond the     */
10745                 /* region mapped by the entry or, may only fill a portion */
10746                 /* of it.  For our purposes, we only care if the object   */
10747                 /* doesn't fill.  In this case the area which will        */
10748                 /* ultimately be clipped in the top map will only need    */
10749                 /* to be as big as the portion of the underlying entry    */
10750                 /* which is mapped */
10751                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
10752                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
10753
10754                 end_delta =
10755                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
10756                         submap_entry->vme_end ?
10757                         0 : (VME_OFFSET(entry) +
10758                              (old_end - old_start))
10759                         - submap_entry->vme_end;
10760
10761                 old_start += start_delta;
10762                 old_end -= end_delta;
10763
10764                 if(submap_entry->is_sub_map) {
10765                         entry = submap_entry;
10766                         vaddr = local_vaddr;
10767                         goto submap_recurse;
10768                 }
10769
10770                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10771
10772                         vm_object_t     sub_object, copy_object;
10773                         vm_object_offset_t copy_offset;
10774                         vm_map_offset_t local_start;
10775                         vm_map_offset_t local_end;
10776                         boolean_t               copied_slowly = FALSE;
10777
10778                         if (vm_map_lock_read_to_write(map)) {
10779                                 vm_map_lock_read(map);
10780                                 old_start -= start_delta;
10781                                 old_end += end_delta;
10782                                 goto RetrySubMap;
10783                         }
10784
10785
10786                         sub_object = VME_OBJECT(submap_entry);
10787                         if (sub_object == VM_OBJECT_NULL) {
10788                                 sub_object =
10789                                         vm_object_allocate(
10790                                                 (vm_map_size_t)
10791                                                 (submap_entry->vme_end -
10792                                                  submap_entry->vme_start));
10793                                 VME_OBJECT_SET(submap_entry, sub_object);
10794                                 VME_OFFSET_SET(submap_entry, 0);
10795                         }
10796                         local_start =  local_vaddr -
10797                                 (cow_parent_vaddr - old_start);
10798                         local_end = local_vaddr +
10799                                 (old_end - cow_parent_vaddr);
10800                         vm_map_clip_start(map, submap_entry, local_start);
10801                         vm_map_clip_end(map, submap_entry, local_end);
10802                         if (submap_entry->is_sub_map) {
10803                                 /* unnesting was done when clipping */
10804                                 assert(!submap_entry->use_pmap);
10805                         }
10806
10807                         /* This is the COW case, lets connect */
10808                         /* an entry in our space to the underlying */
10809                         /* object in the submap, bypassing the  */
10810                         /* submap. */
10811
10812
10813                         if(submap_entry->wired_count != 0 ||
10814                            (sub_object->copy_strategy ==
10815                             MEMORY_OBJECT_COPY_NONE)) {
10816                                 vm_object_lock(sub_object);
10817                                 vm_object_copy_slowly(sub_object,
10818                                                       VME_OFFSET(submap_entry),
10819                                                       (submap_entry->vme_end -
10820                                                        submap_entry->vme_start),
10821                                                       FALSE,
10822                                                       &copy_object);
10823                                 copied_slowly = TRUE;
10824                         } else {
10825
10826                                 /* set up shadow object */
10827                                 copy_object = sub_object;
10828                                 vm_object_reference(copy_object);
10829                                 sub_object->shadowed = TRUE;
10830                                 assert(submap_entry->wired_count == 0);
10831                                 submap_entry->needs_copy = TRUE;
10832
10833                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
10834
10835                                 if (override_nx(old_map,
10836                                                 VME_ALIAS(submap_entry))
10837                                     && prot)
10838                                         prot |= VM_PROT_EXECUTE;
10839
10840                                 vm_object_pmap_protect(
10841                                         sub_object,
10842                                         VME_OFFSET(submap_entry),
10843                                         submap_entry->vme_end -
10844                                         submap_entry->vme_start,
10845                                         (submap_entry->is_shared
10846                                          || map->mapped_in_other_pmaps) ?
10847                                         PMAP_NULL : map->pmap,
10848                                         submap_entry->vme_start,
10849                                         prot);
10850                         }
10851
10852                         /*
10853                          * Adjust the fault offset to the submap entry.
10854                          */
10855                         copy_offset = (local_vaddr -
10856                                        submap_entry->vme_start +
10857                                        VME_OFFSET(submap_entry));
10858
10859                         /* This works diffently than the   */
10860                         /* normal submap case. We go back  */
10861                         /* to the parent of the cow map and*/
10862                         /* clip out the target portion of  */
10863                         /* the sub_map, substituting the   */
10864                         /* new copy object,                */
10865
10866                         vm_map_unlock(map);
10867                         local_start = old_start;
10868                         local_end = old_end;
10869                         map = cow_sub_map_parent;
10870                         *var_map = cow_sub_map_parent;
10871                         vaddr = cow_parent_vaddr;
10872                         cow_sub_map_parent = NULL;
10873
10874                         if(!vm_map_lookup_entry(map,
10875                                                 vaddr, &entry)) {
10876                                 vm_object_deallocate(
10877                                         copy_object);
10878                                 vm_map_lock_write_to_read(map);
10879                                 return KERN_INVALID_ADDRESS;
10880                         }
10881
10882                         /* clip out the portion of space */
10883                         /* mapped by the sub map which   */
10884                         /* corresponds to the underlying */
10885                         /* object */
10886
10887                         /*
10888                          * Clip (and unnest) the smallest nested chunk
10889                          * possible around the faulting address...
10890                          */
10891                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
10892                         local_end = local_start + pmap_nesting_size_min;
10893                         /*
10894                          * ... but don't go beyond the "old_start" to "old_end"
10895                          * range, to avoid spanning over another VM region
10896                          * with a possibly different VM object and/or offset.
10897                          */
10898                         if (local_start < old_start) {
10899                                 local_start = old_start;
10900                         }
10901                         if (local_end > old_end) {
10902                                 local_end = old_end;
10903                         }
10904                         /*
10905                          * Adjust copy_offset to the start of the range.
10906                          */
10907                         copy_offset -= (vaddr - local_start);
10908
10909                         vm_map_clip_start(map, entry, local_start);
10910                         vm_map_clip_end(map, entry, local_end);
10911                         if (entry->is_sub_map) {
10912                                 /* unnesting was done when clipping */
10913                                 assert(!entry->use_pmap);
10914                         }
10915
10916                         /* substitute copy object for */
10917                         /* shared map entry           */
10918                         vm_map_deallocate(VME_SUBMAP(entry));
10919                         assert(!entry->iokit_acct);
10920                         entry->is_sub_map = FALSE;
10921                         entry->use_pmap = TRUE;
10922                         VME_OBJECT_SET(entry, copy_object);
10923
10924                         /* propagate the submap entry's protections */
10925                         entry->protection |= submap_entry->protection;
10926                         entry->max_protection |= submap_entry->max_protection;
10927
10928                         if(copied_slowly) {
10929                                 VME_OFFSET_SET(entry, local_start - old_start);
10930                                 entry->needs_copy = FALSE;
10931                                 entry->is_shared = FALSE;
10932                         } else {
10933                                 VME_OFFSET_SET(entry, copy_offset);
10934                                 assert(entry->wired_count == 0);
10935                                 entry->needs_copy = TRUE;
10936                                 if(entry->inheritance == VM_INHERIT_SHARE)
10937                                         entry->inheritance = VM_INHERIT_COPY;
10938                                 if (map != old_map)
10939                                         entry->is_shared = TRUE;
10940                         }
10941                         if(entry->inheritance == VM_INHERIT_SHARE)
10942                                 entry->inheritance = VM_INHERIT_COPY;
10943
10944                         vm_map_lock_write_to_read(map);
10945                 } else {
10946                         if((cow_sub_map_parent)
10947                            && (cow_sub_map_parent != *real_map)
10948                            && (cow_sub_map_parent != map)) {
10949                                 vm_map_unlock(cow_sub_map_parent);
10950                         }
10951                         entry = submap_entry;
10952                         vaddr = local_vaddr;
10953                 }
10954         }
10955
10956         /*
10957          *      Check whether this task is allowed to have
10958          *      this page.
10959          */
10960
10961         prot = entry->protection;
10962
10963         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
10964                 /*
10965                  * HACK -- if not a stack, then allow execution
10966                  */
10967                 prot |= VM_PROT_EXECUTE;
10968         }
10969
10970         if (mask_protections) {
10971                 fault_type &= prot;
10972                 if (fault_type == VM_PROT_NONE) {
10973                         goto protection_failure;
10974                 }
10975         }
10976         if ((fault_type & (prot)) != fault_type) {
10977         protection_failure:
10978                 if (*real_map != map) {
10979                         vm_map_unlock(*real_map);
10980                 }
10981                 *real_map = map;
10982
10983                 if ((fault_type & VM_PROT_EXECUTE) && prot)
10984                         log_stack_execution_failure((addr64_t)vaddr, prot);
10985
10986                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10987                 return KERN_PROTECTION_FAILURE;
10988         }
10989
10990         /*
10991          *      If this page is not pageable, we have to get
10992          *      it for all possible accesses.
10993          */
10994
10995         *wired = (entry->wired_count != 0);
10996         if (*wired)
10997                 fault_type = prot;
10998
10999         /*
11000          *      If the entry was copy-on-write, we either ...
11001          */
11002
11003         if (entry->needs_copy) {
11004                 /*
11005                  *      If we want to write the page, we may as well
11006                  *      handle that now since we've got the map locked.
11007                  *
11008                  *      If we don't need to write the page, we just
11009                  *      demote the permissions allowed.
11010                  */
11011
11012                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
11013                         /*
11014                          *      Make a new object, and place it in the
11015                          *      object chain.  Note that no new references
11016                          *      have appeared -- one just moved from the
11017                          *      map to the new object.
11018                          */
11019
11020                         if (vm_map_lock_read_to_write(map)) {
11021                                 vm_map_lock_read(map);
11022                                 goto RetryLookup;
11023                         }
11024                         VME_OBJECT_SHADOW(entry,
11025                                           (vm_map_size_t) (entry->vme_end -
11026                                                            entry->vme_start));
11027
11028                         VME_OBJECT(entry)->shadowed = TRUE;
11029                         entry->needs_copy = FALSE;
11030                         vm_map_lock_write_to_read(map);
11031                 }
11032                 else {
11033                         /*
11034                          *      We're attempting to read a copy-on-write
11035                          *      page -- don't allow writes.
11036                          */
11037
11038                         prot &= (~VM_PROT_WRITE);
11039                 }
11040         }
11041
11042         /*
11043          *      Create an object if necessary.
11044          */
11045         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
11046
11047                 if (vm_map_lock_read_to_write(map)) {
11048                         vm_map_lock_read(map);
11049                         goto RetryLookup;
11050                 }
11051
11052                 VME_OBJECT_SET(entry,
11053                                vm_object_allocate(
11054                                        (vm_map_size_t)(entry->vme_end -
11055                                                        entry->vme_start)));
11056                 VME_OFFSET_SET(entry, 0);
11057                 vm_map_lock_write_to_read(map);
11058         }
11059
11060         /*
11061          *      Return the object/offset from this entry.  If the entry
11062          *      was copy-on-write or empty, it has been fixed up.  Also
11063          *      return the protection.
11064          */
11065
11066         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11067         *object = VME_OBJECT(entry);
11068         *out_prot = prot;
11069
11070         if (fault_info) {
11071                 fault_info->interruptible = THREAD_UNINT; /* for now... */
11072                 /* ... the caller will change "interruptible" if needed */
11073                 fault_info->cluster_size = 0;
11074                 fault_info->user_tag = VME_ALIAS(entry);
11075                 fault_info->pmap_options = 0;
11076                 if (entry->iokit_acct ||
11077                     (!entry->is_sub_map && !entry->use_pmap)) {
11078                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11079                 }
11080                 fault_info->behavior = entry->behavior;
11081                 fault_info->lo_offset = VME_OFFSET(entry);
11082                 fault_info->hi_offset =
11083                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
11084                 fault_info->no_cache  = entry->no_cache;
11085                 fault_info->stealth = FALSE;
11086                 fault_info->io_sync = FALSE;
11087                 if (entry->used_for_jit ||
11088                     entry->vme_resilient_codesign) {
11089                         fault_info->cs_bypass = TRUE;
11090                 } else {
11091                         fault_info->cs_bypass = FALSE;
11092                 }
11093                 fault_info->mark_zf_absent = FALSE;
11094                 fault_info->batch_pmap_op = FALSE;
11095         }
11096
11097         /*
11098          *      Lock the object to prevent it from disappearing
11099          */
11100         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11101                 vm_object_lock(*object);
11102         else
11103                 vm_object_lock_shared(*object);
11104
11105         /*
11106          *      Save the version number
11107          */
11108
11109         out_version->main_timestamp = map->timestamp;
11110
11111         return KERN_SUCCESS;
11112 }
11113
11114
11115 /*
11116  *      vm_map_verify:
11117  *
11118  *      Verifies that the map in question has not changed
11119  *      since the given version.  If successful, the map
11120  *      will not change until vm_map_verify_done() is called.
11121  */
11122 boolean_t
11123 vm_map_verify(
11124         register vm_map_t               map,
11125         register vm_map_version_t       *version)       /* REF */
11126 {
11127         boolean_t       result;
11128
11129         vm_map_lock_read(map);
11130         result = (map->timestamp == version->main_timestamp);
11131
11132         if (!result)
11133                 vm_map_unlock_read(map);
11134
11135         return(result);
11136 }
11137
11138 /*
11139  *      vm_map_verify_done:
11140  *
11141  *      Releases locks acquired by a vm_map_verify.
11142  *
11143  *      This is now a macro in vm/vm_map.h.  It does a
11144  *      vm_map_unlock_read on the map.
11145  */
11146
11147
11148 /*
11149  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11150  *      Goes away after regular vm_region_recurse function migrates to
11151  *      64 bits
11152  *      vm_region_recurse: A form of vm_region which follows the
11153  *      submaps in a target map
11154  *
11155  */
11156
11157 kern_return_t
11158 vm_map_region_recurse_64(
11159         vm_map_t                 map,
11160         vm_map_offset_t *address,               /* IN/OUT */
11161         vm_map_size_t           *size,                  /* OUT */
11162         natural_t               *nesting_depth, /* IN/OUT */
11163         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
11164         mach_msg_type_number_t  *count) /* IN/OUT */
11165 {
11166         mach_msg_type_number_t  original_count;
11167         vm_region_extended_info_data_t  extended;
11168         vm_map_entry_t                  tmp_entry;
11169         vm_map_offset_t                 user_address;
11170         unsigned int                    user_max_depth;
11171
11172         /*
11173          * "curr_entry" is the VM map entry preceding or including the
11174          * address we're looking for.
11175          * "curr_map" is the map or sub-map containing "curr_entry".
11176          * "curr_address" is the equivalent of the top map's "user_address"
11177          * in the current map.
11178          * "curr_offset" is the cumulated offset of "curr_map" in the
11179          * target task's address space.
11180          * "curr_depth" is the depth of "curr_map" in the chain of
11181          * sub-maps.
11182          *
11183          * "curr_max_below" and "curr_max_above" limit the range (around
11184          * "curr_address") we should take into account in the current (sub)map.
11185          * They limit the range to what's visible through the map entries
11186          * we've traversed from the top map to the current map.
11187
11188          */
11189         vm_map_entry_t                  curr_entry;
11190         vm_map_address_t                curr_address;
11191         vm_map_offset_t                 curr_offset;
11192         vm_map_t                        curr_map;
11193         unsigned int                    curr_depth;
11194         vm_map_offset_t                 curr_max_below, curr_max_above;
11195         vm_map_offset_t                 curr_skip;
11196
11197         /*
11198          * "next_" is the same as "curr_" but for the VM region immediately
11199          * after the address we're looking for.  We need to keep track of this
11200          * too because we want to return info about that region if the
11201          * address we're looking for is not mapped.
11202          */
11203         vm_map_entry_t                  next_entry;
11204         vm_map_offset_t                 next_offset;
11205         vm_map_offset_t                 next_address;
11206         vm_map_t                        next_map;
11207         unsigned int                    next_depth;
11208         vm_map_offset_t                 next_max_below, next_max_above;
11209         vm_map_offset_t                 next_skip;
11210
11211         boolean_t                       look_for_pages;
11212         vm_region_submap_short_info_64_t short_info;
11213
11214         if (map == VM_MAP_NULL) {
11215                 /* no address space to work on */
11216                 return KERN_INVALID_ARGUMENT;
11217         }
11218
11219
11220         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11221                 /*
11222                  * "info" structure is not big enough and
11223                  * would overflow
11224                  */
11225                 return KERN_INVALID_ARGUMENT;
11226         }
11227
11228         original_count = *count;
11229
11230         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11231                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11232                 look_for_pages = FALSE;
11233                 short_info = (vm_region_submap_short_info_64_t) submap_info;
11234                 submap_info = NULL;
11235         } else {
11236                 look_for_pages = TRUE;
11237                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
11238                 short_info = NULL;
11239
11240                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11241                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11242                 }
11243         }
11244
11245         user_address = *address;
11246         user_max_depth = *nesting_depth;
11247
11248         if (not_in_kdp) {
11249                 vm_map_lock_read(map);
11250         }
11251
11252 recurse_again:
11253         curr_entry = NULL;
11254         curr_map = map;
11255         curr_address = user_address;
11256         curr_offset = 0;
11257         curr_skip = 0;
11258         curr_depth = 0;
11259         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11260         curr_max_below = curr_address;
11261
11262         next_entry = NULL;
11263         next_map = NULL;
11264         next_address = 0;
11265         next_offset = 0;
11266         next_skip = 0;
11267         next_depth = 0;
11268         next_max_above = (vm_map_offset_t) -1;
11269         next_max_below = (vm_map_offset_t) -1;
11270
11271         for (;;) {
11272                 if (vm_map_lookup_entry(curr_map,
11273                                         curr_address,
11274                                         &tmp_entry)) {
11275                         /* tmp_entry contains the address we're looking for */
11276                         curr_entry = tmp_entry;
11277                 } else {
11278                         vm_map_offset_t skip;
11279                         /*
11280                          * The address is not mapped.  "tmp_entry" is the
11281                          * map entry preceding the address.  We want the next
11282                          * one, if it exists.
11283                          */
11284                         curr_entry = tmp_entry->vme_next;
11285
11286                         if (curr_entry == vm_map_to_entry(curr_map) ||
11287                             (curr_entry->vme_start >=
11288                              curr_address + curr_max_above)) {
11289                                 /* no next entry at this level: stop looking */
11290                                 if (not_in_kdp) {
11291                                         vm_map_unlock_read(curr_map);
11292                                 }
11293                                 curr_entry = NULL;
11294                                 curr_map = NULL;
11295                                 curr_skip = 0;
11296                                 curr_offset = 0;
11297                                 curr_depth = 0;
11298                                 curr_max_above = 0;
11299                                 curr_max_below = 0;
11300                                 break;
11301                         }
11302
11303                         /* adjust current address and offset */
11304                         skip = curr_entry->vme_start - curr_address;
11305                         curr_address = curr_entry->vme_start;
11306                         curr_skip += skip;
11307                         curr_offset += skip;
11308                         curr_max_above -= skip;
11309                         curr_max_below = 0;
11310                 }
11311
11312                 /*
11313                  * Is the next entry at this level closer to the address (or
11314                  * deeper in the submap chain) than the one we had
11315                  * so far ?
11316                  */
11317                 tmp_entry = curr_entry->vme_next;
11318                 if (tmp_entry == vm_map_to_entry(curr_map)) {
11319                         /* no next entry at this level */
11320                 } else if (tmp_entry->vme_start >=
11321                            curr_address + curr_max_above) {
11322                         /*
11323                          * tmp_entry is beyond the scope of what we mapped of
11324                          * this submap in the upper level: ignore it.
11325                          */
11326                 } else if ((next_entry == NULL) ||
11327                            (tmp_entry->vme_start + curr_offset <=
11328                             next_entry->vme_start + next_offset)) {
11329                         /*
11330                          * We didn't have a "next_entry" or this one is
11331                          * closer to the address we're looking for:
11332                          * use this "tmp_entry" as the new "next_entry".
11333                          */
11334                         if (next_entry != NULL) {
11335                                 /* unlock the last "next_map" */
11336                                 if (next_map != curr_map && not_in_kdp) {
11337                                         vm_map_unlock_read(next_map);
11338                                 }
11339                         }
11340                         next_entry = tmp_entry;
11341                         next_map = curr_map;
11342                         next_depth = curr_depth;
11343                         next_address = next_entry->vme_start;
11344                         next_skip = curr_skip;
11345                         next_skip += (next_address - curr_address);
11346                         next_offset = curr_offset;
11347                         next_offset += (next_address - curr_address);
11348                         next_max_above = MIN(next_max_above, curr_max_above);
11349                         next_max_above = MIN(next_max_above,
11350                                              next_entry->vme_end - next_address);
11351                         next_max_below = MIN(next_max_below, curr_max_below);
11352                         next_max_below = MIN(next_max_below,
11353                                              next_address - next_entry->vme_start);
11354                 }
11355
11356                 /*
11357                  * "curr_max_{above,below}" allow us to keep track of the
11358                  * portion of the submap that is actually mapped at this level:
11359                  * the rest of that submap is irrelevant to us, since it's not
11360                  * mapped here.
11361                  * The relevant portion of the map starts at
11362                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
11363                  */
11364                 curr_max_above = MIN(curr_max_above,
11365                                      curr_entry->vme_end - curr_address);
11366                 curr_max_below = MIN(curr_max_below,
11367                                      curr_address - curr_entry->vme_start);
11368
11369                 if (!curr_entry->is_sub_map ||
11370                     curr_depth >= user_max_depth) {
11371                         /*
11372                          * We hit a leaf map or we reached the maximum depth
11373                          * we could, so stop looking.  Keep the current map
11374                          * locked.
11375                          */
11376                         break;
11377                 }
11378
11379                 /*
11380                  * Get down to the next submap level.
11381                  */
11382
11383                 /*
11384                  * Lock the next level and unlock the current level,
11385                  * unless we need to keep it locked to access the "next_entry"
11386                  * later.
11387                  */
11388                 if (not_in_kdp) {
11389                         vm_map_lock_read(VME_SUBMAP(curr_entry));
11390                 }
11391                 if (curr_map == next_map) {
11392                         /* keep "next_map" locked in case we need it */
11393                 } else {
11394                         /* release this map */
11395                         if (not_in_kdp)
11396                                 vm_map_unlock_read(curr_map);
11397                 }
11398
11399                 /*
11400                  * Adjust the offset.  "curr_entry" maps the submap
11401                  * at relative address "curr_entry->vme_start" in the
11402                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
11403                  * bytes of the submap.
11404                  * "curr_offset" always represents the offset of a virtual
11405                  * address in the curr_map relative to the absolute address
11406                  * space (i.e. the top-level VM map).
11407                  */
11408                 curr_offset +=
11409                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
11410                 curr_address = user_address + curr_offset;
11411                 /* switch to the submap */
11412                 curr_map = VME_SUBMAP(curr_entry);
11413                 curr_depth++;
11414                 curr_entry = NULL;
11415         }
11416
11417         if (curr_entry == NULL) {
11418                 /* no VM region contains the address... */
11419                 if (next_entry == NULL) {
11420                         /* ... and no VM region follows it either */
11421                         return KERN_INVALID_ADDRESS;
11422                 }
11423                 /* ... gather info about the next VM region */
11424                 curr_entry = next_entry;
11425                 curr_map = next_map;    /* still locked ... */
11426                 curr_address = next_address;
11427                 curr_skip = next_skip;
11428                 curr_offset = next_offset;
11429                 curr_depth = next_depth;
11430                 curr_max_above = next_max_above;
11431                 curr_max_below = next_max_below;
11432         } else {
11433                 /* we won't need "next_entry" after all */
11434                 if (next_entry != NULL) {
11435                         /* release "next_map" */
11436                         if (next_map != curr_map && not_in_kdp) {
11437                                 vm_map_unlock_read(next_map);
11438                         }
11439                 }
11440         }
11441         next_entry = NULL;
11442         next_map = NULL;
11443         next_offset = 0;
11444         next_skip = 0;
11445         next_depth = 0;
11446         next_max_below = -1;
11447         next_max_above = -1;
11448
11449         if (curr_entry->is_sub_map &&
11450             curr_depth < user_max_depth) {
11451                 /*
11452                  * We're not as deep as we could be:  we must have
11453                  * gone back up after not finding anything mapped
11454                  * below the original top-level map entry's.
11455                  * Let's move "curr_address" forward and recurse again.
11456                  */
11457                 user_address = curr_address;
11458                 goto recurse_again;
11459         }
11460
11461         *nesting_depth = curr_depth;
11462         *size = curr_max_above + curr_max_below;
11463         *address = user_address + curr_skip - curr_max_below;
11464
11465 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
11466 // so probably should be a real 32b ID vs. ptr.
11467 // Current users just check for equality
11468 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
11469
11470         if (look_for_pages) {
11471                 submap_info->user_tag = VME_ALIAS(curr_entry);
11472                 submap_info->offset = VME_OFFSET(curr_entry);
11473                 submap_info->protection = curr_entry->protection;
11474                 submap_info->inheritance = curr_entry->inheritance;
11475                 submap_info->max_protection = curr_entry->max_protection;
11476                 submap_info->behavior = curr_entry->behavior;
11477                 submap_info->user_wired_count = curr_entry->user_wired_count;
11478                 submap_info->is_submap = curr_entry->is_sub_map;
11479                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11480         } else {
11481                 short_info->user_tag = VME_ALIAS(curr_entry);
11482                 short_info->offset = VME_OFFSET(curr_entry);
11483                 short_info->protection = curr_entry->protection;
11484                 short_info->inheritance = curr_entry->inheritance;
11485                 short_info->max_protection = curr_entry->max_protection;
11486                 short_info->behavior = curr_entry->behavior;
11487                 short_info->user_wired_count = curr_entry->user_wired_count;
11488                 short_info->is_submap = curr_entry->is_sub_map;
11489                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11490         }
11491
11492         extended.pages_resident = 0;
11493         extended.pages_swapped_out = 0;
11494         extended.pages_shared_now_private = 0;
11495         extended.pages_dirtied = 0;
11496         extended.pages_reusable = 0;
11497         extended.external_pager = 0;
11498         extended.shadow_depth = 0;
11499         extended.share_mode = SM_EMPTY;
11500         extended.ref_count = 0;
11501
11502         if (not_in_kdp) {
11503                 if (!curr_entry->is_sub_map) {
11504                         vm_map_offset_t range_start, range_end;
11505                         range_start = MAX((curr_address - curr_max_below),
11506                                           curr_entry->vme_start);
11507                         range_end = MIN((curr_address + curr_max_above),
11508                                         curr_entry->vme_end);
11509                         vm_map_region_walk(curr_map,
11510                                            range_start,
11511                                            curr_entry,
11512                                            (VME_OFFSET(curr_entry) +
11513                                             (range_start -
11514                                              curr_entry->vme_start)),
11515                                            range_end - range_start,
11516                                            &extended,
11517                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
11518                         if (extended.external_pager &&
11519                             extended.ref_count == 2 &&
11520                             extended.share_mode == SM_SHARED) {
11521                                 extended.share_mode = SM_PRIVATE;
11522                         }
11523                 } else {
11524                         if (curr_entry->use_pmap) {
11525                                 extended.share_mode = SM_TRUESHARED;
11526                         } else {
11527                                 extended.share_mode = SM_PRIVATE;
11528                         }
11529                         extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
11530                 }
11531         }
11532
11533         if (look_for_pages) {
11534                 submap_info->pages_resident = extended.pages_resident;
11535                 submap_info->pages_swapped_out = extended.pages_swapped_out;
11536                 submap_info->pages_shared_now_private =
11537                         extended.pages_shared_now_private;
11538                 submap_info->pages_dirtied = extended.pages_dirtied;
11539                 submap_info->external_pager = extended.external_pager;
11540                 submap_info->shadow_depth = extended.shadow_depth;
11541                 submap_info->share_mode = extended.share_mode;
11542                 submap_info->ref_count = extended.ref_count;
11543
11544                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11545                         submap_info->pages_reusable = extended.pages_reusable;
11546                 }
11547         } else {
11548                 short_info->external_pager = extended.external_pager;
11549                 short_info->shadow_depth = extended.shadow_depth;
11550                 short_info->share_mode = extended.share_mode;
11551                 short_info->ref_count = extended.ref_count;
11552         }
11553
11554         if (not_in_kdp) {
11555                 vm_map_unlock_read(curr_map);
11556         }
11557
11558         return KERN_SUCCESS;
11559 }
11560
11561 /*
11562  *      vm_region:
11563  *
11564  *      User call to obtain information about a region in
11565  *      a task's address map. Currently, only one flavor is
11566  *      supported.
11567  *
11568  *      XXX The reserved and behavior fields cannot be filled
11569  *          in until the vm merge from the IK is completed, and
11570  *          vm_reserve is implemented.
11571  */
11572
11573 kern_return_t
11574 vm_map_region(
11575         vm_map_t                 map,
11576         vm_map_offset_t *address,               /* IN/OUT */
11577         vm_map_size_t           *size,                  /* OUT */
11578         vm_region_flavor_t       flavor,                /* IN */
11579         vm_region_info_t         info,                  /* OUT */
11580         mach_msg_type_number_t  *count, /* IN/OUT */
11581         mach_port_t             *object_name)           /* OUT */
11582 {
11583         vm_map_entry_t          tmp_entry;
11584         vm_map_entry_t          entry;
11585         vm_map_offset_t         start;
11586
11587         if (map == VM_MAP_NULL)
11588                 return(KERN_INVALID_ARGUMENT);
11589
11590         switch (flavor) {
11591
11592         case VM_REGION_BASIC_INFO:
11593                 /* legacy for old 32-bit objects info */
11594         {
11595                 vm_region_basic_info_t  basic;
11596
11597                 if (*count < VM_REGION_BASIC_INFO_COUNT)
11598                         return(KERN_INVALID_ARGUMENT);
11599
11600                 basic = (vm_region_basic_info_t) info;
11601                 *count = VM_REGION_BASIC_INFO_COUNT;
11602
11603                 vm_map_lock_read(map);
11604
11605                 start = *address;
11606                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11607                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11608                                 vm_map_unlock_read(map);
11609                                 return(KERN_INVALID_ADDRESS);
11610                         }
11611                 } else {
11612                         entry = tmp_entry;
11613                 }
11614
11615                 start = entry->vme_start;
11616
11617                 basic->offset = (uint32_t)VME_OFFSET(entry);
11618                 basic->protection = entry->protection;
11619                 basic->inheritance = entry->inheritance;
11620                 basic->max_protection = entry->max_protection;
11621                 basic->behavior = entry->behavior;
11622                 basic->user_wired_count = entry->user_wired_count;
11623                 basic->reserved = entry->is_sub_map;
11624                 *address = start;
11625                 *size = (entry->vme_end - start);
11626
11627                 if (object_name) *object_name = IP_NULL;
11628                 if (entry->is_sub_map) {
11629                         basic->shared = FALSE;
11630                 } else {
11631                         basic->shared = entry->is_shared;
11632                 }
11633
11634                 vm_map_unlock_read(map);
11635                 return(KERN_SUCCESS);
11636         }
11637
11638         case VM_REGION_BASIC_INFO_64:
11639         {
11640                 vm_region_basic_info_64_t       basic;
11641
11642                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11643                         return(KERN_INVALID_ARGUMENT);
11644
11645                 basic = (vm_region_basic_info_64_t) info;
11646                 *count = VM_REGION_BASIC_INFO_COUNT_64;
11647
11648                 vm_map_lock_read(map);
11649
11650                 start = *address;
11651                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11652                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11653                                 vm_map_unlock_read(map);
11654                                 return(KERN_INVALID_ADDRESS);
11655                         }
11656                 } else {
11657                         entry = tmp_entry;
11658                 }
11659
11660                 start = entry->vme_start;
11661
11662                 basic->offset = VME_OFFSET(entry);
11663                 basic->protection = entry->protection;
11664                 basic->inheritance = entry->inheritance;
11665                 basic->max_protection = entry->max_protection;
11666                 basic->behavior = entry->behavior;
11667                 basic->user_wired_count = entry->user_wired_count;
11668                 basic->reserved = entry->is_sub_map;
11669                 *address = start;
11670                 *size = (entry->vme_end - start);
11671
11672                 if (object_name) *object_name = IP_NULL;
11673                 if (entry->is_sub_map) {
11674                         basic->shared = FALSE;
11675                 } else {
11676                         basic->shared = entry->is_shared;
11677                 }
11678
11679                 vm_map_unlock_read(map);
11680                 return(KERN_SUCCESS);
11681         }
11682         case VM_REGION_EXTENDED_INFO:
11683                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11684                         return(KERN_INVALID_ARGUMENT);
11685                 /*fallthru*/
11686         case VM_REGION_EXTENDED_INFO__legacy:
11687                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11688                         return KERN_INVALID_ARGUMENT;
11689
11690         {
11691                 vm_region_extended_info_t       extended;
11692                 mach_msg_type_number_t original_count;
11693
11694                 extended = (vm_region_extended_info_t) info;
11695
11696                 vm_map_lock_read(map);
11697
11698                 start = *address;
11699                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11700                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11701                                 vm_map_unlock_read(map);
11702                                 return(KERN_INVALID_ADDRESS);
11703                         }
11704                 } else {
11705                         entry = tmp_entry;
11706                 }
11707                 start = entry->vme_start;
11708
11709                 extended->protection = entry->protection;
11710                 extended->user_tag = VME_ALIAS(entry);
11711                 extended->pages_resident = 0;
11712                 extended->pages_swapped_out = 0;
11713                 extended->pages_shared_now_private = 0;
11714                 extended->pages_dirtied = 0;
11715                 extended->external_pager = 0;
11716                 extended->shadow_depth = 0;
11717
11718                 original_count = *count;
11719                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11720                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11721                 } else {
11722                         extended->pages_reusable = 0;
11723                         *count = VM_REGION_EXTENDED_INFO_COUNT;
11724                 }
11725
11726                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
11727
11728                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11729                         extended->share_mode = SM_PRIVATE;
11730
11731                 if (object_name)
11732                         *object_name = IP_NULL;
11733                 *address = start;
11734                 *size = (entry->vme_end - start);
11735
11736                 vm_map_unlock_read(map);
11737                 return(KERN_SUCCESS);
11738         }
11739         case VM_REGION_TOP_INFO:
11740         {
11741                 vm_region_top_info_t    top;
11742
11743                 if (*count < VM_REGION_TOP_INFO_COUNT)
11744                         return(KERN_INVALID_ARGUMENT);
11745
11746                 top = (vm_region_top_info_t) info;
11747                 *count = VM_REGION_TOP_INFO_COUNT;
11748
11749                 vm_map_lock_read(map);
11750
11751                 start = *address;
11752                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11753                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11754                                 vm_map_unlock_read(map);
11755                                 return(KERN_INVALID_ADDRESS);
11756                         }
11757                 } else {
11758                         entry = tmp_entry;
11759
11760                 }
11761                 start = entry->vme_start;
11762
11763                 top->private_pages_resident = 0;
11764                 top->shared_pages_resident = 0;
11765
11766                 vm_map_region_top_walk(entry, top);
11767
11768                 if (object_name)
11769                         *object_name = IP_NULL;
11770                 *address = start;
11771                 *size = (entry->vme_end - start);
11772
11773                 vm_map_unlock_read(map);
11774                 return(KERN_SUCCESS);
11775         }
11776         default:
11777                 return(KERN_INVALID_ARGUMENT);
11778         }
11779 }
11780
11781 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
11782         MIN((entry_size),                                               \
11783             ((obj)->all_reusable ?                                      \
11784              (obj)->wired_page_count :                                  \
11785              (obj)->resident_page_count - (obj)->reusable_page_count))
11786
11787 void
11788 vm_map_region_top_walk(
11789         vm_map_entry_t             entry,
11790         vm_region_top_info_t       top)
11791 {
11792
11793         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
11794                 top->share_mode = SM_EMPTY;
11795                 top->ref_count = 0;
11796                 top->obj_id = 0;
11797                 return;
11798         }
11799
11800         {
11801                 struct  vm_object *obj, *tmp_obj;
11802                 int             ref_count;
11803                 uint32_t        entry_size;
11804
11805                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11806
11807                 obj = VME_OBJECT(entry);
11808
11809                 vm_object_lock(obj);
11810
11811                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11812                         ref_count--;
11813
11814                 assert(obj->reusable_page_count <= obj->resident_page_count);
11815                 if (obj->shadow) {
11816                         if (ref_count == 1)
11817                                 top->private_pages_resident =
11818                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11819                         else
11820                                 top->shared_pages_resident =
11821                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11822                         top->ref_count  = ref_count;
11823                         top->share_mode = SM_COW;
11824
11825                         while ((tmp_obj = obj->shadow)) {
11826                                 vm_object_lock(tmp_obj);
11827                                 vm_object_unlock(obj);
11828                                 obj = tmp_obj;
11829
11830                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11831                                         ref_count--;
11832
11833                                 assert(obj->reusable_page_count <= obj->resident_page_count);
11834                                 top->shared_pages_resident +=
11835                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11836                                 top->ref_count += ref_count - 1;
11837                         }
11838                 } else {
11839                         if (entry->superpage_size) {
11840                                 top->share_mode = SM_LARGE_PAGE;
11841                                 top->shared_pages_resident = 0;
11842                                 top->private_pages_resident = entry_size;
11843                         } else if (entry->needs_copy) {
11844                                 top->share_mode = SM_COW;
11845                                 top->shared_pages_resident =
11846                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11847                         } else {
11848                                 if (ref_count == 1 ||
11849                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11850                                         top->share_mode = SM_PRIVATE;
11851                                                 top->private_pages_resident =
11852                                                         OBJ_RESIDENT_COUNT(obj,
11853                                                                            entry_size);
11854                                 } else {
11855                                         top->share_mode = SM_SHARED;
11856                                         top->shared_pages_resident =
11857                                                 OBJ_RESIDENT_COUNT(obj,
11858                                                                   entry_size);
11859                                 }
11860                         }
11861                         top->ref_count = ref_count;
11862                 }
11863                 /* XXX K64: obj_id will be truncated */
11864                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11865
11866                 vm_object_unlock(obj);
11867         }
11868 }
11869
11870 void
11871 vm_map_region_walk(
11872         vm_map_t                        map,
11873         vm_map_offset_t                 va,
11874         vm_map_entry_t                  entry,
11875         vm_object_offset_t              offset,
11876         vm_object_size_t                range,
11877         vm_region_extended_info_t       extended,
11878         boolean_t                       look_for_pages,
11879         mach_msg_type_number_t count)
11880 {
11881         register struct vm_object *obj, *tmp_obj;
11882         register vm_map_offset_t       last_offset;
11883         register int               i;
11884         register int               ref_count;
11885         struct vm_object        *shadow_object;
11886         int                     shadow_depth;
11887
11888         if ((VME_OBJECT(entry) == 0) ||
11889             (entry->is_sub_map) ||
11890             (VME_OBJECT(entry)->phys_contiguous &&
11891              !entry->superpage_size)) {
11892                 extended->share_mode = SM_EMPTY;
11893                 extended->ref_count = 0;
11894                 return;
11895         }
11896
11897         if (entry->superpage_size) {
11898                 extended->shadow_depth = 0;
11899                 extended->share_mode = SM_LARGE_PAGE;
11900                 extended->ref_count = 1;
11901                 extended->external_pager = 0;
11902                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11903                 extended->shadow_depth = 0;
11904                 return;
11905         }
11906
11907         {
11908                 obj = VME_OBJECT(entry);
11909
11910                 vm_object_lock(obj);
11911
11912                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11913                         ref_count--;
11914
11915                 if (look_for_pages) {
11916                         for (last_offset = offset + range;
11917                              offset < last_offset;
11918                              offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11919                                         vm_map_region_look_for_page(map, va, obj,
11920                                                                     offset, ref_count,
11921                                                                     0, extended, count);
11922                         }
11923                 } else {
11924                         shadow_object = obj->shadow;
11925                         shadow_depth = 0;
11926
11927                         if ( !(obj->pager_trusted) && !(obj->internal))
11928                                 extended->external_pager = 1;
11929
11930                         if (shadow_object != VM_OBJECT_NULL) {
11931                                 vm_object_lock(shadow_object);
11932                                 for (;
11933                                      shadow_object != VM_OBJECT_NULL;
11934                                      shadow_depth++) {
11935                                         vm_object_t     next_shadow;
11936
11937                                         if ( !(shadow_object->pager_trusted) &&
11938                                              !(shadow_object->internal))
11939                                                 extended->external_pager = 1;
11940
11941                                         next_shadow = shadow_object->shadow;
11942                                         if (next_shadow) {
11943                                                 vm_object_lock(next_shadow);
11944                                         }
11945                                         vm_object_unlock(shadow_object);
11946                                         shadow_object = next_shadow;
11947                                 }
11948                         }
11949                         extended->shadow_depth = shadow_depth;
11950                 }
11951
11952                 if (extended->shadow_depth || entry->needs_copy)
11953                         extended->share_mode = SM_COW;
11954                 else {
11955                         if (ref_count == 1)
11956                                 extended->share_mode = SM_PRIVATE;
11957                         else {
11958                                 if (obj->true_share)
11959                                         extended->share_mode = SM_TRUESHARED;
11960                                 else
11961                                         extended->share_mode = SM_SHARED;
11962                         }
11963                 }
11964                 extended->ref_count = ref_count - extended->shadow_depth;
11965
11966                 for (i = 0; i < extended->shadow_depth; i++) {
11967                         if ((tmp_obj = obj->shadow) == 0)
11968                                 break;
11969                         vm_object_lock(tmp_obj);
11970                         vm_object_unlock(obj);
11971
11972                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11973                                 ref_count--;
11974
11975                         extended->ref_count += ref_count;
11976                         obj = tmp_obj;
11977                 }
11978                 vm_object_unlock(obj);
11979
11980                 if (extended->share_mode == SM_SHARED) {
11981                         register vm_map_entry_t      cur;
11982                         register vm_map_entry_t      last;
11983                         int      my_refs;
11984
11985                         obj = VME_OBJECT(entry);
11986                         last = vm_map_to_entry(map);
11987                         my_refs = 0;
11988
11989                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11990                                 ref_count--;
11991                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11992                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
11993
11994                         if (my_refs == ref_count)
11995                                 extended->share_mode = SM_PRIVATE_ALIASED;
11996                         else if (my_refs > 1)
11997                                 extended->share_mode = SM_SHARED_ALIASED;
11998                 }
11999         }
12000 }
12001
12002
12003 /* object is locked on entry and locked on return */
12004
12005
12006 static void
12007 vm_map_region_look_for_page(
12008         __unused vm_map_t               map,
12009         __unused vm_map_offset_t        va,
12010         vm_object_t                     object,
12011         vm_object_offset_t              offset,
12012         int                             max_refcnt,
12013         int                             depth,
12014         vm_region_extended_info_t       extended,
12015         mach_msg_type_number_t count)
12016 {
12017         register vm_page_t      p;
12018         register vm_object_t    shadow;
12019         register int            ref_count;
12020         vm_object_t             caller_object;
12021         kern_return_t           kr;
12022         shadow = object->shadow;
12023         caller_object = object;
12024
12025
12026         while (TRUE) {
12027
12028                 if ( !(object->pager_trusted) && !(object->internal))
12029                         extended->external_pager = 1;
12030
12031                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12032                         if (shadow && (max_refcnt == 1))
12033                                 extended->pages_shared_now_private++;
12034
12035                         if (!p->fictitious &&
12036                             (p->dirty || pmap_is_modified(p->phys_page)))
12037                                 extended->pages_dirtied++;
12038                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12039                                 if (p->reusable || p->object->all_reusable) {
12040                                         extended->pages_reusable++;
12041                                 }
12042                         }
12043
12044                         extended->pages_resident++;
12045
12046                         if(object != caller_object)
12047                                 vm_object_unlock(object);
12048
12049                         return;
12050                 }
12051 #if     MACH_PAGEMAP
12052                 if (object->existence_map) {
12053                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
12054
12055                                 extended->pages_swapped_out++;
12056
12057                                 if(object != caller_object)
12058                                         vm_object_unlock(object);
12059
12060                                 return;
12061                         }
12062                 } else
12063 #endif /* MACH_PAGEMAP */
12064                 if (object->internal &&
12065                     object->alive &&
12066                     !object->terminating &&
12067                     object->pager_ready) {
12068
12069                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12070                                 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
12071                                                                   offset)
12072                                     == VM_EXTERNAL_STATE_EXISTS) {
12073                                         /* the pager has that page */
12074                                         extended->pages_swapped_out++;
12075                                         if (object != caller_object)
12076                                                 vm_object_unlock(object);
12077                                         return;
12078                                 }
12079                         } else {
12080                                 memory_object_t pager;
12081
12082                                 vm_object_paging_begin(object);
12083                                 pager = object->pager;
12084                                 vm_object_unlock(object);
12085
12086                                 kr = memory_object_data_request(
12087                                         pager,
12088                                         offset + object->paging_offset,
12089                                         0, /* just poke the pager */
12090                                         VM_PROT_READ,
12091                                         NULL);
12092
12093                                 vm_object_lock(object);
12094                                 vm_object_paging_end(object);
12095
12096                                 if (kr == KERN_SUCCESS) {
12097                                         /* the pager has that page */
12098                                         extended->pages_swapped_out++;
12099                                         if (object != caller_object)
12100                                                 vm_object_unlock(object);
12101                                         return;
12102                                 }
12103                         }
12104                 }
12105
12106                 if (shadow) {
12107                         vm_object_lock(shadow);
12108
12109                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12110                                 ref_count--;
12111
12112                         if (++depth > extended->shadow_depth)
12113                                 extended->shadow_depth = depth;
12114
12115                         if (ref_count > max_refcnt)
12116                                 max_refcnt = ref_count;
12117
12118                         if(object != caller_object)
12119                                 vm_object_unlock(object);
12120
12121                         offset = offset + object->vo_shadow_offset;
12122                         object = shadow;
12123                         shadow = object->shadow;
12124                         continue;
12125                 }
12126                 if(object != caller_object)
12127                         vm_object_unlock(object);
12128                 break;
12129         }
12130 }
12131
12132 static int
12133 vm_map_region_count_obj_refs(
12134         vm_map_entry_t    entry,
12135         vm_object_t       object)
12136 {
12137         register int ref_count;
12138         register vm_object_t chk_obj;
12139         register vm_object_t tmp_obj;
12140
12141         if (VME_OBJECT(entry) == 0)
12142                 return(0);
12143
12144         if (entry->is_sub_map)
12145                 return(0);
12146         else {
12147                 ref_count = 0;
12148
12149                 chk_obj = VME_OBJECT(entry);
12150                 vm_object_lock(chk_obj);
12151
12152                 while (chk_obj) {
12153                         if (chk_obj == object)
12154                                 ref_count++;
12155                         tmp_obj = chk_obj->shadow;
12156                         if (tmp_obj)
12157                                 vm_object_lock(tmp_obj);
12158                         vm_object_unlock(chk_obj);
12159
12160                         chk_obj = tmp_obj;
12161                 }
12162         }
12163         return(ref_count);
12164 }
12165
12166
12167 /*
12168  *      Routine:        vm_map_simplify
12169  *
12170  *      Description:
12171  *              Attempt to simplify the map representation in
12172  *              the vicinity of the given starting address.
12173  *      Note:
12174  *              This routine is intended primarily to keep the
12175  *              kernel maps more compact -- they generally don't
12176  *              benefit from the "expand a map entry" technology
12177  *              at allocation time because the adjacent entry
12178  *              is often wired down.
12179  */
12180 void
12181 vm_map_simplify_entry(
12182         vm_map_t        map,
12183         vm_map_entry_t  this_entry)
12184 {
12185         vm_map_entry_t  prev_entry;
12186
12187         counter(c_vm_map_simplify_entry_called++);
12188
12189         prev_entry = this_entry->vme_prev;
12190
12191         if ((this_entry != vm_map_to_entry(map)) &&
12192             (prev_entry != vm_map_to_entry(map)) &&
12193
12194             (prev_entry->vme_end == this_entry->vme_start) &&
12195
12196             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
12197             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12198             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
12199                                     prev_entry->vme_start))
12200              == VME_OFFSET(this_entry)) &&
12201
12202             (prev_entry->behavior == this_entry->behavior) &&
12203             (prev_entry->needs_copy == this_entry->needs_copy) &&
12204             (prev_entry->protection == this_entry->protection) &&
12205             (prev_entry->max_protection == this_entry->max_protection) &&
12206             (prev_entry->inheritance == this_entry->inheritance) &&
12207             (prev_entry->use_pmap == this_entry->use_pmap) &&
12208             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
12209             (prev_entry->no_cache == this_entry->no_cache) &&
12210             (prev_entry->permanent == this_entry->permanent) &&
12211             (prev_entry->map_aligned == this_entry->map_aligned) &&
12212             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12213             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12214             /* from_reserved_zone: OK if that field doesn't match */
12215             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
12216             (prev_entry->vme_resilient_codesign ==
12217              this_entry->vme_resilient_codesign) &&
12218             (prev_entry->vme_resilient_media ==
12219              this_entry->vme_resilient_media) &&
12220
12221             (prev_entry->wired_count == this_entry->wired_count) &&
12222             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
12223
12224             (prev_entry->in_transition == FALSE) &&
12225             (this_entry->in_transition == FALSE) &&
12226             (prev_entry->needs_wakeup == FALSE) &&
12227             (this_entry->needs_wakeup == FALSE) &&
12228             (prev_entry->is_shared == FALSE) &&
12229             (this_entry->is_shared == FALSE) &&
12230             (prev_entry->superpage_size == FALSE) &&
12231             (this_entry->superpage_size == FALSE)
12232                 ) {
12233                 vm_map_store_entry_unlink(map, prev_entry);
12234                 assert(prev_entry->vme_start < this_entry->vme_end);
12235                 if (prev_entry->map_aligned)
12236                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12237                                                    VM_MAP_PAGE_MASK(map)));
12238                 this_entry->vme_start = prev_entry->vme_start;
12239                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12240
12241                 if (map->holelistenabled) {
12242                         vm_map_store_update_first_free(map, this_entry, TRUE);
12243                 }
12244
12245                 if (prev_entry->is_sub_map) {
12246                         vm_map_deallocate(VME_SUBMAP(prev_entry));
12247                 } else {
12248                         vm_object_deallocate(VME_OBJECT(prev_entry));
12249                 }
12250                 vm_map_entry_dispose(map, prev_entry);
12251                 SAVE_HINT_MAP_WRITE(map, this_entry);
12252                 counter(c_vm_map_simplified++);
12253         }
12254 }
12255
12256 void
12257 vm_map_simplify(
12258         vm_map_t        map,
12259         vm_map_offset_t start)
12260 {
12261         vm_map_entry_t  this_entry;
12262
12263         vm_map_lock(map);
12264         if (vm_map_lookup_entry(map, start, &this_entry)) {
12265                 vm_map_simplify_entry(map, this_entry);
12266                 vm_map_simplify_entry(map, this_entry->vme_next);
12267         }
12268         counter(c_vm_map_simplify_called++);
12269         vm_map_unlock(map);
12270 }
12271
12272 static void
12273 vm_map_simplify_range(
12274         vm_map_t        map,
12275         vm_map_offset_t start,
12276         vm_map_offset_t end)
12277 {
12278         vm_map_entry_t  entry;
12279
12280         /*
12281          * The map should be locked (for "write") by the caller.
12282          */
12283
12284         if (start >= end) {
12285                 /* invalid address range */
12286                 return;
12287         }
12288
12289         start = vm_map_trunc_page(start,
12290                                   VM_MAP_PAGE_MASK(map));
12291         end = vm_map_round_page(end,
12292                                 VM_MAP_PAGE_MASK(map));
12293
12294         if (!vm_map_lookup_entry(map, start, &entry)) {
12295                 /* "start" is not mapped and "entry" ends before "start" */
12296                 if (entry == vm_map_to_entry(map)) {
12297                         /* start with first entry in the map */
12298                         entry = vm_map_first_entry(map);
12299                 } else {
12300                         /* start with next entry */
12301                         entry = entry->vme_next;
12302                 }
12303         }
12304
12305         while (entry != vm_map_to_entry(map) &&
12306                entry->vme_start <= end) {
12307                 /* try and coalesce "entry" with its previous entry */
12308                 vm_map_simplify_entry(map, entry);
12309                 entry = entry->vme_next;
12310         }
12311 }
12312
12313
12314 /*
12315  *      Routine:        vm_map_machine_attribute
12316  *      Purpose:
12317  *              Provide machine-specific attributes to mappings,
12318  *              such as cachability etc. for machines that provide
12319  *              them.  NUMA architectures and machines with big/strange
12320  *              caches will use this.
12321  *      Note:
12322  *              Responsibilities for locking and checking are handled here,
12323  *              everything else in the pmap module. If any non-volatile
12324  *              information must be kept, the pmap module should handle
12325  *              it itself. [This assumes that attributes do not
12326  *              need to be inherited, which seems ok to me]
12327  */
12328 kern_return_t
12329 vm_map_machine_attribute(
12330         vm_map_t                        map,
12331         vm_map_offset_t         start,
12332         vm_map_offset_t         end,
12333         vm_machine_attribute_t  attribute,
12334         vm_machine_attribute_val_t* value)              /* IN/OUT */
12335 {
12336         kern_return_t   ret;
12337         vm_map_size_t sync_size;
12338         vm_map_entry_t entry;
12339
12340         if (start < vm_map_min(map) || end > vm_map_max(map))
12341                 return KERN_INVALID_ADDRESS;
12342
12343         /* Figure how much memory we need to flush (in page increments) */
12344         sync_size = end - start;
12345
12346         vm_map_lock(map);
12347
12348         if (attribute != MATTR_CACHE) {
12349                 /* If we don't have to find physical addresses, we */
12350                 /* don't have to do an explicit traversal here.    */
12351                 ret = pmap_attribute(map->pmap, start, end-start,
12352                                      attribute, value);
12353                 vm_map_unlock(map);
12354                 return ret;
12355         }
12356
12357         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
12358
12359         while(sync_size) {
12360                 if (vm_map_lookup_entry(map, start, &entry)) {
12361                         vm_map_size_t   sub_size;
12362                         if((entry->vme_end - start) > sync_size) {
12363                                 sub_size = sync_size;
12364                                 sync_size = 0;
12365                         } else {
12366                                 sub_size = entry->vme_end - start;
12367                                 sync_size -= sub_size;
12368                         }
12369                         if(entry->is_sub_map) {
12370                                 vm_map_offset_t sub_start;
12371                                 vm_map_offset_t sub_end;
12372
12373                                 sub_start = (start - entry->vme_start)
12374                                         + VME_OFFSET(entry);
12375                                 sub_end = sub_start + sub_size;
12376                                 vm_map_machine_attribute(
12377                                         VME_SUBMAP(entry),
12378                                         sub_start,
12379                                         sub_end,
12380                                         attribute, value);
12381                         } else {
12382                                 if (VME_OBJECT(entry)) {
12383                                         vm_page_t               m;
12384                                         vm_object_t             object;
12385                                         vm_object_t             base_object;
12386                                         vm_object_t             last_object;
12387                                         vm_object_offset_t      offset;
12388                                         vm_object_offset_t      base_offset;
12389                                         vm_map_size_t           range;
12390                                         range = sub_size;
12391                                         offset = (start - entry->vme_start)
12392                                                 + VME_OFFSET(entry);
12393                                         base_offset = offset;
12394                                         object = VME_OBJECT(entry);
12395                                         base_object = object;
12396                                         last_object = NULL;
12397
12398                                         vm_object_lock(object);
12399
12400                                         while (range) {
12401                                                 m = vm_page_lookup(
12402                                                         object, offset);
12403
12404                                                 if (m && !m->fictitious) {
12405                                                         ret =
12406                                                                 pmap_attribute_cache_sync(
12407                                                                         m->phys_page,
12408                                                                         PAGE_SIZE,
12409                                                                         attribute, value);
12410
12411                                                 } else if (object->shadow) {
12412                                                         offset = offset + object->vo_shadow_offset;
12413                                                         last_object = object;
12414                                                         object = object->shadow;
12415                                                         vm_object_lock(last_object->shadow);
12416                                                         vm_object_unlock(last_object);
12417                                                         continue;
12418                                                 }
12419                                                 range -= PAGE_SIZE;
12420
12421                                                 if (base_object != object) {
12422                                                         vm_object_unlock(object);
12423                                                         vm_object_lock(base_object);
12424                                                         object = base_object;
12425                                                 }
12426                                                 /* Bump to the next page */
12427                                                 base_offset += PAGE_SIZE;
12428                                                 offset = base_offset;
12429                                         }
12430                                         vm_object_unlock(object);
12431                                 }
12432                         }
12433                         start += sub_size;
12434                 } else {
12435                         vm_map_unlock(map);
12436                         return KERN_FAILURE;
12437                 }
12438
12439         }
12440
12441         vm_map_unlock(map);
12442
12443         return ret;
12444 }
12445
12446 /*
12447  *      vm_map_behavior_set:
12448  *
12449  *      Sets the paging reference behavior of the specified address
12450  *      range in the target map.  Paging reference behavior affects
12451  *      how pagein operations resulting from faults on the map will be
12452  *      clustered.
12453  */
12454 kern_return_t
12455 vm_map_behavior_set(
12456         vm_map_t        map,
12457         vm_map_offset_t start,
12458         vm_map_offset_t end,
12459         vm_behavior_t   new_behavior)
12460 {
12461         register vm_map_entry_t entry;
12462         vm_map_entry_t  temp_entry;
12463
12464         XPR(XPR_VM_MAP,
12465             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
12466             map, start, end, new_behavior, 0);
12467
12468         if (start > end ||
12469             start < vm_map_min(map) ||
12470             end > vm_map_max(map)) {
12471                 return KERN_NO_SPACE;
12472         }
12473
12474         switch (new_behavior) {
12475
12476         /*
12477          * This first block of behaviors all set a persistent state on the specified
12478          * memory range.  All we have to do here is to record the desired behavior
12479          * in the vm_map_entry_t's.
12480          */
12481
12482         case VM_BEHAVIOR_DEFAULT:
12483         case VM_BEHAVIOR_RANDOM:
12484         case VM_BEHAVIOR_SEQUENTIAL:
12485         case VM_BEHAVIOR_RSEQNTL:
12486         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12487                 vm_map_lock(map);
12488
12489                 /*
12490                  *      The entire address range must be valid for the map.
12491                  *      Note that vm_map_range_check() does a
12492                  *      vm_map_lookup_entry() internally and returns the
12493                  *      entry containing the start of the address range if
12494                  *      the entire range is valid.
12495                  */
12496                 if (vm_map_range_check(map, start, end, &temp_entry)) {
12497                         entry = temp_entry;
12498                         vm_map_clip_start(map, entry, start);
12499                 }
12500                 else {
12501                         vm_map_unlock(map);
12502                         return(KERN_INVALID_ADDRESS);
12503                 }
12504
12505                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12506                         vm_map_clip_end(map, entry, end);
12507                         if (entry->is_sub_map) {
12508                                 assert(!entry->use_pmap);
12509                         }
12510
12511                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12512                                 entry->zero_wired_pages = TRUE;
12513                         } else {
12514                                 entry->behavior = new_behavior;
12515                         }
12516                         entry = entry->vme_next;
12517                 }
12518
12519                 vm_map_unlock(map);
12520                 break;
12521
12522         /*
12523          * The rest of these are different from the above in that they cause
12524          * an immediate action to take place as opposed to setting a behavior that
12525          * affects future actions.
12526          */
12527
12528         case VM_BEHAVIOR_WILLNEED:
12529                 return vm_map_willneed(map, start, end);
12530
12531         case VM_BEHAVIOR_DONTNEED:
12532                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12533
12534         case VM_BEHAVIOR_FREE:
12535                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12536
12537         case VM_BEHAVIOR_REUSABLE:
12538                 return vm_map_reusable_pages(map, start, end);
12539
12540         case VM_BEHAVIOR_REUSE:
12541                 return vm_map_reuse_pages(map, start, end);
12542
12543         case VM_BEHAVIOR_CAN_REUSE:
12544                 return vm_map_can_reuse(map, start, end);
12545
12546 #if MACH_ASSERT
12547         case VM_BEHAVIOR_PAGEOUT:
12548                 return vm_map_pageout(map, start, end);
12549 #endif /* MACH_ASSERT */
12550
12551         default:
12552                 return(KERN_INVALID_ARGUMENT);
12553         }
12554
12555         return(KERN_SUCCESS);
12556 }
12557
12558
12559 /*
12560  * Internals for madvise(MADV_WILLNEED) system call.
12561  *
12562  * The present implementation is to do a read-ahead if the mapping corresponds
12563  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
12564  * and basically ignore the "advice" (which we are always free to do).
12565  */
12566
12567
12568 static kern_return_t
12569 vm_map_willneed(
12570         vm_map_t        map,
12571         vm_map_offset_t start,
12572         vm_map_offset_t end
12573 )
12574 {
12575         vm_map_entry_t                  entry;
12576         vm_object_t                     object;
12577         memory_object_t                 pager;
12578         struct vm_object_fault_info     fault_info;
12579         kern_return_t                   kr;
12580         vm_object_size_t                len;
12581         vm_object_offset_t              offset;
12582
12583         /*
12584          * Fill in static values in fault_info.  Several fields get ignored by the code
12585          * we call, but we'll fill them in anyway since uninitialized fields are bad
12586          * when it comes to future backwards compatibility.
12587          */
12588
12589         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
12590         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
12591         fault_info.no_cache      = FALSE;                       /* ignored value */
12592         fault_info.stealth       = TRUE;
12593         fault_info.io_sync = FALSE;
12594         fault_info.cs_bypass = FALSE;
12595         fault_info.mark_zf_absent = FALSE;
12596         fault_info.batch_pmap_op = FALSE;
12597
12598         /*
12599          * The MADV_WILLNEED operation doesn't require any changes to the
12600          * vm_map_entry_t's, so the read lock is sufficient.
12601          */
12602
12603         vm_map_lock_read(map);
12604
12605         /*
12606          * The madvise semantics require that the address range be fully
12607          * allocated with no holes.  Otherwise, we're required to return
12608          * an error.
12609          */
12610
12611         if (! vm_map_range_check(map, start, end, &entry)) {
12612                 vm_map_unlock_read(map);
12613                 return KERN_INVALID_ADDRESS;
12614         }
12615
12616         /*
12617          * Examine each vm_map_entry_t in the range.
12618          */
12619         for (; entry != vm_map_to_entry(map) && start < end; ) {
12620
12621                 /*
12622                  * The first time through, the start address could be anywhere
12623                  * within the vm_map_entry we found.  So adjust the offset to
12624                  * correspond.  After that, the offset will always be zero to
12625                  * correspond to the beginning of the current vm_map_entry.
12626                  */
12627                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
12628
12629                 /*
12630                  * Set the length so we don't go beyond the end of the
12631                  * map_entry or beyond the end of the range we were given.
12632                  * This range could span also multiple map entries all of which
12633                  * map different files, so make sure we only do the right amount
12634                  * of I/O for each object.  Note that it's possible for there
12635                  * to be multiple map entries all referring to the same object
12636                  * but with different page permissions, but it's not worth
12637                  * trying to optimize that case.
12638                  */
12639                 len = MIN(entry->vme_end - start, end - start);
12640
12641                 if ((vm_size_t) len != len) {
12642                         /* 32-bit overflow */
12643                         len = (vm_size_t) (0 - PAGE_SIZE);
12644                 }
12645                 fault_info.cluster_size = (vm_size_t) len;
12646                 fault_info.lo_offset    = offset;
12647                 fault_info.hi_offset    = offset + len;
12648                 fault_info.user_tag     = VME_ALIAS(entry);
12649                 fault_info.pmap_options = 0;
12650                 if (entry->iokit_acct ||
12651                     (!entry->is_sub_map && !entry->use_pmap)) {
12652                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12653                 }
12654
12655                 /*
12656                  * If there's no read permission to this mapping, then just
12657                  * skip it.
12658                  */
12659                 if ((entry->protection & VM_PROT_READ) == 0) {
12660                         entry = entry->vme_next;
12661                         start = entry->vme_start;
12662                         continue;
12663                 }
12664
12665                 /*
12666                  * Find the file object backing this map entry.  If there is
12667                  * none, then we simply ignore the "will need" advice for this
12668                  * entry and go on to the next one.
12669                  */
12670                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12671                         entry = entry->vme_next;
12672                         start = entry->vme_start;
12673                         continue;
12674                 }
12675
12676                 /*
12677                  * The data_request() could take a long time, so let's
12678                  * release the map lock to avoid blocking other threads.
12679                  */
12680                 vm_map_unlock_read(map);
12681
12682                 vm_object_paging_begin(object);
12683                 pager = object->pager;
12684                 vm_object_unlock(object);
12685
12686                 /*
12687                  * Get the data from the object asynchronously.
12688                  *
12689                  * Note that memory_object_data_request() places limits on the
12690                  * amount of I/O it will do.  Regardless of the len we
12691                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12692                  * silently truncates the len to that size.  This isn't
12693                  * necessarily bad since madvise shouldn't really be used to
12694                  * page in unlimited amounts of data.  Other Unix variants
12695                  * limit the willneed case as well.  If this turns out to be an
12696                  * issue for developers, then we can always adjust the policy
12697                  * here and still be backwards compatible since this is all
12698                  * just "advice".
12699                  */
12700                 kr = memory_object_data_request(
12701                         pager,
12702                         offset + object->paging_offset,
12703                         0,      /* ignored */
12704                         VM_PROT_READ,
12705                         (memory_object_fault_info_t)&fault_info);
12706
12707                 vm_object_lock(object);
12708                 vm_object_paging_end(object);
12709                 vm_object_unlock(object);
12710
12711                 /*
12712                  * If we couldn't do the I/O for some reason, just give up on
12713                  * the madvise.  We still return success to the user since
12714                  * madvise isn't supposed to fail when the advice can't be
12715                  * taken.
12716                  */
12717                 if (kr != KERN_SUCCESS) {
12718                         return KERN_SUCCESS;
12719                 }
12720
12721                 start += len;
12722                 if (start >= end) {
12723                         /* done */
12724                         return KERN_SUCCESS;
12725                 }
12726
12727                 /* look up next entry */
12728                 vm_map_lock_read(map);
12729                 if (! vm_map_lookup_entry(map, start, &entry)) {
12730                         /*
12731                          * There's a new hole in the address range.
12732                          */
12733                         vm_map_unlock_read(map);
12734                         return KERN_INVALID_ADDRESS;
12735                 }
12736         }
12737
12738         vm_map_unlock_read(map);
12739         return KERN_SUCCESS;
12740 }
12741
12742 static boolean_t
12743 vm_map_entry_is_reusable(
12744         vm_map_entry_t entry)
12745 {
12746         /* Only user map entries */
12747
12748         vm_object_t object;
12749
12750         if (entry->is_sub_map) {
12751                 return FALSE;
12752         }
12753
12754         switch (VME_ALIAS(entry)) {
12755         case VM_MEMORY_MALLOC:
12756         case VM_MEMORY_MALLOC_SMALL:
12757         case VM_MEMORY_MALLOC_LARGE:
12758         case VM_MEMORY_REALLOC:
12759         case VM_MEMORY_MALLOC_TINY:
12760         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12761         case VM_MEMORY_MALLOC_LARGE_REUSED:
12762                 /*
12763                  * This is a malloc() memory region: check if it's still
12764                  * in its original state and can be re-used for more
12765                  * malloc() allocations.
12766                  */
12767                 break;
12768         default:
12769                 /*
12770                  * Not a malloc() memory region: let the caller decide if
12771                  * it's re-usable.
12772                  */
12773                 return TRUE;
12774         }
12775
12776         if (entry->is_shared ||
12777             entry->is_sub_map ||
12778             entry->in_transition ||
12779             entry->protection != VM_PROT_DEFAULT ||
12780             entry->max_protection != VM_PROT_ALL ||
12781             entry->inheritance != VM_INHERIT_DEFAULT ||
12782             entry->no_cache ||
12783             entry->permanent ||
12784             entry->superpage_size != FALSE ||
12785             entry->zero_wired_pages ||
12786             entry->wired_count != 0 ||
12787             entry->user_wired_count != 0) {
12788                 return FALSE;
12789         }
12790
12791         object = VME_OBJECT(entry);
12792         if (object == VM_OBJECT_NULL) {
12793                 return TRUE;
12794         }
12795         if (
12796 #if 0
12797                 /*
12798                  * Let's proceed even if the VM object is potentially
12799                  * shared.
12800                  * We check for this later when processing the actual
12801                  * VM pages, so the contents will be safe if shared.
12802                  *
12803                  * But we can still mark this memory region as "reusable" to
12804                  * acknowledge that the caller did let us know that the memory
12805                  * could be re-used and should not be penalized for holding
12806                  * on to it.  This allows its "resident size" to not include
12807                  * the reusable range.
12808                  */
12809             object->ref_count == 1 &&
12810 #endif
12811             object->wired_page_count == 0 &&
12812             object->copy == VM_OBJECT_NULL &&
12813             object->shadow == VM_OBJECT_NULL &&
12814             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12815             object->internal &&
12816             !object->true_share &&
12817             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12818             !object->code_signed) {
12819                 return TRUE;
12820         }
12821         return FALSE;
12822
12823
12824 }
12825
12826 static kern_return_t
12827 vm_map_reuse_pages(
12828         vm_map_t        map,
12829         vm_map_offset_t start,
12830         vm_map_offset_t end)
12831 {
12832         vm_map_entry_t                  entry;
12833         vm_object_t                     object;
12834         vm_object_offset_t              start_offset, end_offset;
12835
12836         /*
12837          * The MADV_REUSE operation doesn't require any changes to the
12838          * vm_map_entry_t's, so the read lock is sufficient.
12839          */
12840
12841         vm_map_lock_read(map);
12842         assert(map->pmap != kernel_pmap);       /* protect alias access */
12843
12844         /*
12845          * The madvise semantics require that the address range be fully
12846          * allocated with no holes.  Otherwise, we're required to return
12847          * an error.
12848          */
12849
12850         if (!vm_map_range_check(map, start, end, &entry)) {
12851                 vm_map_unlock_read(map);
12852                 vm_page_stats_reusable.reuse_pages_failure++;
12853                 return KERN_INVALID_ADDRESS;
12854         }
12855
12856         /*
12857          * Examine each vm_map_entry_t in the range.
12858          */
12859         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12860              entry = entry->vme_next) {
12861                 /*
12862                  * Sanity check on the VM map entry.
12863                  */
12864                 if (! vm_map_entry_is_reusable(entry)) {
12865                         vm_map_unlock_read(map);
12866                         vm_page_stats_reusable.reuse_pages_failure++;
12867                         return KERN_INVALID_ADDRESS;
12868                 }
12869
12870                 /*
12871                  * The first time through, the start address could be anywhere
12872                  * within the vm_map_entry we found.  So adjust the offset to
12873                  * correspond.
12874                  */
12875                 if (entry->vme_start < start) {
12876                         start_offset = start - entry->vme_start;
12877                 } else {
12878                         start_offset = 0;
12879                 }
12880                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12881                 start_offset += VME_OFFSET(entry);
12882                 end_offset += VME_OFFSET(entry);
12883
12884                 assert(!entry->is_sub_map);
12885                 object = VME_OBJECT(entry);
12886                 if (object != VM_OBJECT_NULL) {
12887                         vm_object_lock(object);
12888                         vm_object_reuse_pages(object, start_offset, end_offset,
12889                                               TRUE);
12890                         vm_object_unlock(object);
12891                 }
12892
12893                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12894                         /*
12895                          * XXX
12896                          * We do not hold the VM map exclusively here.
12897                          * The "alias" field is not that critical, so it's
12898                          * safe to update it here, as long as it is the only
12899                          * one that can be modified while holding the VM map
12900                          * "shared".
12901                          */
12902                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
12903                 }
12904         }
12905
12906         vm_map_unlock_read(map);
12907         vm_page_stats_reusable.reuse_pages_success++;
12908         return KERN_SUCCESS;
12909 }
12910
12911
12912 static kern_return_t
12913 vm_map_reusable_pages(
12914         vm_map_t        map,
12915         vm_map_offset_t start,
12916         vm_map_offset_t end)
12917 {
12918         vm_map_entry_t                  entry;
12919         vm_object_t                     object;
12920         vm_object_offset_t              start_offset, end_offset;
12921         vm_map_offset_t                 pmap_offset;
12922
12923         /*
12924          * The MADV_REUSABLE operation doesn't require any changes to the
12925          * vm_map_entry_t's, so the read lock is sufficient.
12926          */
12927
12928         vm_map_lock_read(map);
12929         assert(map->pmap != kernel_pmap);       /* protect alias access */
12930
12931         /*
12932          * The madvise semantics require that the address range be fully
12933          * allocated with no holes.  Otherwise, we're required to return
12934          * an error.
12935          */
12936
12937         if (!vm_map_range_check(map, start, end, &entry)) {
12938                 vm_map_unlock_read(map);
12939                 vm_page_stats_reusable.reusable_pages_failure++;
12940                 return KERN_INVALID_ADDRESS;
12941         }
12942
12943         /*
12944          * Examine each vm_map_entry_t in the range.
12945          */
12946         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12947              entry = entry->vme_next) {
12948                 int kill_pages = 0;
12949
12950                 /*
12951                  * Sanity check on the VM map entry.
12952                  */
12953                 if (! vm_map_entry_is_reusable(entry)) {
12954                         vm_map_unlock_read(map);
12955                         vm_page_stats_reusable.reusable_pages_failure++;
12956                         return KERN_INVALID_ADDRESS;
12957                 }
12958
12959                 /*
12960                  * The first time through, the start address could be anywhere
12961                  * within the vm_map_entry we found.  So adjust the offset to
12962                  * correspond.
12963                  */
12964                 if (entry->vme_start < start) {
12965                         start_offset = start - entry->vme_start;
12966                         pmap_offset = start;
12967                 } else {
12968                         start_offset = 0;
12969                         pmap_offset = entry->vme_start;
12970                 }
12971                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12972                 start_offset += VME_OFFSET(entry);
12973                 end_offset += VME_OFFSET(entry);
12974
12975                 assert(!entry->is_sub_map);
12976                 object = VME_OBJECT(entry);
12977                 if (object == VM_OBJECT_NULL)
12978                         continue;
12979
12980
12981                 vm_object_lock(object);
12982                 if (object->ref_count == 1 &&
12983                     !object->shadow &&
12984                     /*
12985                      * "iokit_acct" entries are billed for their virtual size
12986                      * (rather than for their resident pages only), so they
12987                      * wouldn't benefit from making pages reusable, and it
12988                      * would be hard to keep track of pages that are both
12989                      * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12990                      */
12991                     !(entry->iokit_acct ||
12992                       (!entry->is_sub_map && !entry->use_pmap)))
12993                         kill_pages = 1;
12994                 else
12995                         kill_pages = -1;
12996                 if (kill_pages != -1) {
12997                         vm_object_deactivate_pages(object,
12998                                                    start_offset,
12999                                                    end_offset - start_offset,
13000                                                    kill_pages,
13001                                                    TRUE /*reusable_pages*/,
13002                                                    map->pmap,
13003                                                    pmap_offset);
13004                 } else {
13005                         vm_page_stats_reusable.reusable_pages_shared++;
13006                 }
13007                 vm_object_unlock(object);
13008
13009                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13010                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
13011                         /*
13012                          * XXX
13013                          * We do not hold the VM map exclusively here.
13014                          * The "alias" field is not that critical, so it's
13015                          * safe to update it here, as long as it is the only
13016                          * one that can be modified while holding the VM map
13017                          * "shared".
13018                          */
13019                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
13020                 }
13021         }
13022
13023         vm_map_unlock_read(map);
13024         vm_page_stats_reusable.reusable_pages_success++;
13025         return KERN_SUCCESS;
13026 }
13027
13028
13029 static kern_return_t
13030 vm_map_can_reuse(
13031         vm_map_t        map,
13032         vm_map_offset_t start,
13033         vm_map_offset_t end)
13034 {
13035         vm_map_entry_t                  entry;
13036
13037         /*
13038          * The MADV_REUSABLE operation doesn't require any changes to the
13039          * vm_map_entry_t's, so the read lock is sufficient.
13040          */
13041
13042         vm_map_lock_read(map);
13043         assert(map->pmap != kernel_pmap);       /* protect alias access */
13044
13045         /*
13046          * The madvise semantics require that the address range be fully
13047          * allocated with no holes.  Otherwise, we're required to return
13048          * an error.
13049          */
13050
13051         if (!vm_map_range_check(map, start, end, &entry)) {
13052                 vm_map_unlock_read(map);
13053                 vm_page_stats_reusable.can_reuse_failure++;
13054                 return KERN_INVALID_ADDRESS;
13055         }
13056
13057         /*
13058          * Examine each vm_map_entry_t in the range.
13059          */
13060         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13061              entry = entry->vme_next) {
13062                 /*
13063                  * Sanity check on the VM map entry.
13064                  */
13065                 if (! vm_map_entry_is_reusable(entry)) {
13066                         vm_map_unlock_read(map);
13067                         vm_page_stats_reusable.can_reuse_failure++;
13068                         return KERN_INVALID_ADDRESS;
13069                 }
13070         }
13071
13072         vm_map_unlock_read(map);
13073         vm_page_stats_reusable.can_reuse_success++;
13074         return KERN_SUCCESS;
13075 }
13076
13077
13078 #if MACH_ASSERT
13079 static kern_return_t
13080 vm_map_pageout(
13081         vm_map_t        map,
13082         vm_map_offset_t start,
13083         vm_map_offset_t end)
13084 {
13085         vm_map_entry_t                  entry;
13086
13087         /*
13088          * The MADV_PAGEOUT operation doesn't require any changes to the
13089          * vm_map_entry_t's, so the read lock is sufficient.
13090          */
13091
13092         vm_map_lock_read(map);
13093
13094         /*
13095          * The madvise semantics require that the address range be fully
13096          * allocated with no holes.  Otherwise, we're required to return
13097          * an error.
13098          */
13099
13100         if (!vm_map_range_check(map, start, end, &entry)) {
13101                 vm_map_unlock_read(map);
13102                 return KERN_INVALID_ADDRESS;
13103         }
13104
13105         /*
13106          * Examine each vm_map_entry_t in the range.
13107          */
13108         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13109              entry = entry->vme_next) {
13110                 vm_object_t     object;
13111
13112                 /*
13113                  * Sanity check on the VM map entry.
13114                  */
13115                 if (entry->is_sub_map) {
13116                         vm_map_t submap;
13117                         vm_map_offset_t submap_start;
13118                         vm_map_offset_t submap_end;
13119                         vm_map_entry_t submap_entry;
13120
13121                         submap = VME_SUBMAP(entry);
13122                         submap_start = VME_OFFSET(entry);
13123                         submap_end = submap_start + (entry->vme_end -
13124                                                      entry->vme_start);
13125
13126                         vm_map_lock_read(submap);
13127
13128                         if (! vm_map_range_check(submap,
13129                                                  submap_start,
13130                                                  submap_end,
13131                                                  &submap_entry)) {
13132                                 vm_map_unlock_read(submap);
13133                                 vm_map_unlock_read(map);
13134                                 return KERN_INVALID_ADDRESS;
13135                         }
13136
13137                         object = VME_OBJECT(submap_entry);
13138                         if (submap_entry->is_sub_map ||
13139                             object == VM_OBJECT_NULL ||
13140                             !object->internal) {
13141                                 vm_map_unlock_read(submap);
13142                                 continue;
13143                         }
13144
13145                         vm_object_pageout(object);
13146
13147                         vm_map_unlock_read(submap);
13148                         submap = VM_MAP_NULL;
13149                         submap_entry = VM_MAP_ENTRY_NULL;
13150                         continue;
13151                 }
13152
13153                 object = VME_OBJECT(entry);
13154                 if (entry->is_sub_map ||
13155                     object == VM_OBJECT_NULL ||
13156                     !object->internal) {
13157                         continue;
13158                 }
13159
13160                 vm_object_pageout(object);
13161         }
13162
13163         vm_map_unlock_read(map);
13164         return KERN_SUCCESS;
13165 }
13166 #endif /* MACH_ASSERT */
13167
13168
13169 /*
13170  *      Routine:        vm_map_entry_insert
13171  *
13172  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
13173  */
13174 vm_map_entry_t
13175 vm_map_entry_insert(
13176         vm_map_t                map,
13177         vm_map_entry_t          insp_entry,
13178         vm_map_offset_t         start,
13179         vm_map_offset_t         end,
13180         vm_object_t             object,
13181         vm_object_offset_t      offset,
13182         boolean_t               needs_copy,
13183         boolean_t               is_shared,
13184         boolean_t               in_transition,
13185         vm_prot_t               cur_protection,
13186         vm_prot_t               max_protection,
13187         vm_behavior_t           behavior,
13188         vm_inherit_t            inheritance,
13189         unsigned                wired_count,
13190         boolean_t               no_cache,
13191         boolean_t               permanent,
13192         unsigned int            superpage_size,
13193         boolean_t               clear_map_aligned,
13194         boolean_t               is_submap)
13195 {
13196         vm_map_entry_t  new_entry;
13197
13198         assert(insp_entry != (vm_map_entry_t)0);
13199
13200         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
13201
13202         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13203                 new_entry->map_aligned = TRUE;
13204         } else {
13205                 new_entry->map_aligned = FALSE;
13206         }
13207         if (clear_map_aligned &&
13208             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13209              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
13210                 new_entry->map_aligned = FALSE;
13211         }
13212
13213         new_entry->vme_start = start;
13214         new_entry->vme_end = end;
13215         assert(page_aligned(new_entry->vme_start));
13216         assert(page_aligned(new_entry->vme_end));
13217         if (new_entry->map_aligned) {
13218                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13219                                            VM_MAP_PAGE_MASK(map)));
13220                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13221                                            VM_MAP_PAGE_MASK(map)));
13222         }
13223         assert(new_entry->vme_start < new_entry->vme_end);
13224
13225         VME_OBJECT_SET(new_entry, object);
13226         VME_OFFSET_SET(new_entry, offset);
13227         new_entry->is_shared = is_shared;
13228         new_entry->is_sub_map = is_submap;
13229         new_entry->needs_copy = needs_copy;
13230         new_entry->in_transition = in_transition;
13231         new_entry->needs_wakeup = FALSE;
13232         new_entry->inheritance = inheritance;
13233         new_entry->protection = cur_protection;
13234         new_entry->max_protection = max_protection;
13235         new_entry->behavior = behavior;
13236         new_entry->wired_count = wired_count;
13237         new_entry->user_wired_count = 0;
13238         if (is_submap) {
13239                 /*
13240                  * submap: "use_pmap" means "nested".
13241                  * default: false.
13242                  */
13243                 new_entry->use_pmap = FALSE;
13244         } else {
13245                 /*
13246                  * object: "use_pmap" means "use pmap accounting" for footprint.
13247                  * default: true.
13248                  */
13249                 new_entry->use_pmap = TRUE;
13250         }
13251         VME_ALIAS_SET(new_entry, 0);
13252         new_entry->zero_wired_pages = FALSE;
13253         new_entry->no_cache = no_cache;
13254         new_entry->permanent = permanent;
13255         if (superpage_size)
13256                 new_entry->superpage_size = TRUE;
13257         else
13258                 new_entry->superpage_size = FALSE;
13259         new_entry->used_for_jit = FALSE;
13260         new_entry->iokit_acct = FALSE;
13261         new_entry->vme_resilient_codesign = FALSE;
13262         new_entry->vme_resilient_media = FALSE;
13263
13264         /*
13265          *      Insert the new entry into the list.
13266          */
13267
13268         vm_map_store_entry_link(map, insp_entry, new_entry);
13269         map->size += end - start;
13270
13271         /*
13272          *      Update the free space hint and the lookup hint.
13273          */
13274
13275         SAVE_HINT_MAP_WRITE(map, new_entry);
13276         return new_entry;
13277 }
13278
13279 /*
13280  *      Routine:        vm_map_remap_extract
13281  *
13282  *      Descritpion:    This routine returns a vm_entry list from a map.
13283  */
13284 static kern_return_t
13285 vm_map_remap_extract(
13286         vm_map_t                map,
13287         vm_map_offset_t         addr,
13288         vm_map_size_t           size,
13289         boolean_t               copy,
13290         struct vm_map_header    *map_header,
13291         vm_prot_t               *cur_protection,
13292         vm_prot_t               *max_protection,
13293         /* What, no behavior? */
13294         vm_inherit_t            inheritance,
13295         boolean_t               pageable)
13296 {
13297         kern_return_t           result;
13298         vm_map_size_t           mapped_size;
13299         vm_map_size_t           tmp_size;
13300         vm_map_entry_t          src_entry;     /* result of last map lookup */
13301         vm_map_entry_t          new_entry;
13302         vm_object_offset_t      offset;
13303         vm_map_offset_t         map_address;
13304         vm_map_offset_t         src_start;     /* start of entry to map */
13305         vm_map_offset_t         src_end;       /* end of region to be mapped */
13306         vm_object_t             object;
13307         vm_map_version_t        version;
13308         boolean_t               src_needs_copy;
13309         boolean_t               new_entry_needs_copy;
13310
13311         assert(map != VM_MAP_NULL);
13312         assert(size != 0);
13313         assert(size == vm_map_round_page(size, PAGE_MASK));
13314         assert(inheritance == VM_INHERIT_NONE ||
13315                inheritance == VM_INHERIT_COPY ||
13316                inheritance == VM_INHERIT_SHARE);
13317
13318         /*
13319          *      Compute start and end of region.
13320          */
13321         src_start = vm_map_trunc_page(addr, PAGE_MASK);
13322         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13323
13324
13325         /*
13326          *      Initialize map_header.
13327          */
13328         map_header->links.next = (struct vm_map_entry *)&map_header->links;
13329         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13330         map_header->nentries = 0;
13331         map_header->entries_pageable = pageable;
13332         map_header->page_shift = PAGE_SHIFT;
13333
13334         vm_map_store_init( map_header );
13335
13336         *cur_protection = VM_PROT_ALL;
13337         *max_protection = VM_PROT_ALL;
13338
13339         map_address = 0;
13340         mapped_size = 0;
13341         result = KERN_SUCCESS;
13342
13343         /*
13344          *      The specified source virtual space might correspond to
13345          *      multiple map entries, need to loop on them.
13346          */
13347         vm_map_lock(map);
13348         while (mapped_size != size) {
13349                 vm_map_size_t   entry_size;
13350
13351                 /*
13352                  *      Find the beginning of the region.
13353                  */
13354                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13355                         result = KERN_INVALID_ADDRESS;
13356                         break;
13357                 }
13358
13359                 if (src_start < src_entry->vme_start ||
13360                     (mapped_size && src_start != src_entry->vme_start)) {
13361                         result = KERN_INVALID_ADDRESS;
13362                         break;
13363                 }
13364
13365                 tmp_size = size - mapped_size;
13366                 if (src_end > src_entry->vme_end)
13367                         tmp_size -= (src_end - src_entry->vme_end);
13368
13369                 entry_size = (vm_map_size_t)(src_entry->vme_end -
13370                                              src_entry->vme_start);
13371
13372                 if(src_entry->is_sub_map) {
13373                         vm_map_reference(VME_SUBMAP(src_entry));
13374                         object = VM_OBJECT_NULL;
13375                 } else {
13376                         object = VME_OBJECT(src_entry);
13377                         if (src_entry->iokit_acct) {
13378                                 /*
13379                                  * This entry uses "IOKit accounting".
13380                                  */
13381                         } else if (object != VM_OBJECT_NULL &&
13382                                    object->purgable != VM_PURGABLE_DENY) {
13383                                 /*
13384                                  * Purgeable objects have their own accounting:
13385                                  * no pmap accounting for them.
13386                                  */
13387                                 assert(!src_entry->use_pmap);
13388                         } else {
13389                                 /*
13390                                  * Not IOKit or purgeable:
13391                                  * must be accounted by pmap stats.
13392                                  */
13393                                 assert(src_entry->use_pmap);
13394                         }
13395
13396                         if (object == VM_OBJECT_NULL) {
13397                                 object = vm_object_allocate(entry_size);
13398                                 VME_OFFSET_SET(src_entry, 0);
13399                                 VME_OBJECT_SET(src_entry, object);
13400                         } else if (object->copy_strategy !=
13401                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
13402                                 /*
13403                                  *      We are already using an asymmetric
13404                                  *      copy, and therefore we already have
13405                                  *      the right object.
13406                                  */
13407                                 assert(!src_entry->needs_copy);
13408                         } else if (src_entry->needs_copy || object->shadowed ||
13409                                    (object->internal && !object->true_share &&
13410                                     !src_entry->is_shared &&
13411                                     object->vo_size > entry_size)) {
13412
13413                                 VME_OBJECT_SHADOW(src_entry, entry_size);
13414
13415                                 if (!src_entry->needs_copy &&
13416                                     (src_entry->protection & VM_PROT_WRITE)) {
13417                                         vm_prot_t prot;
13418
13419                                         prot = src_entry->protection & ~VM_PROT_WRITE;
13420
13421                                         if (override_nx(map,
13422                                                         VME_ALIAS(src_entry))
13423                                             && prot)
13424                                                 prot |= VM_PROT_EXECUTE;
13425
13426                                         if(map->mapped_in_other_pmaps) {
13427                                                 vm_object_pmap_protect(
13428                                                         VME_OBJECT(src_entry),
13429                                                         VME_OFFSET(src_entry),
13430                                                         entry_size,
13431                                                         PMAP_NULL,
13432                                                         src_entry->vme_start,
13433                                                         prot);
13434                                         } else {
13435                                                 pmap_protect(vm_map_pmap(map),
13436                                                              src_entry->vme_start,
13437                                                              src_entry->vme_end,
13438                                                              prot);
13439                                         }
13440                                 }
13441
13442                                 object = VME_OBJECT(src_entry);
13443                                 src_entry->needs_copy = FALSE;
13444                         }
13445
13446
13447                         vm_object_lock(object);
13448                         vm_object_reference_locked(object); /* object ref. for new entry */
13449                         if (object->copy_strategy ==
13450                             MEMORY_OBJECT_COPY_SYMMETRIC) {
13451                                 object->copy_strategy =
13452                                         MEMORY_OBJECT_COPY_DELAY;
13453                         }
13454                         vm_object_unlock(object);
13455                 }
13456
13457                 offset = (VME_OFFSET(src_entry) +
13458                           (src_start - src_entry->vme_start));
13459
13460                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
13461                 vm_map_entry_copy(new_entry, src_entry);
13462                 if (new_entry->is_sub_map) {
13463                         /* clr address space specifics */
13464                         new_entry->use_pmap = FALSE;
13465                 }
13466
13467                 new_entry->map_aligned = FALSE;
13468
13469                 new_entry->vme_start = map_address;
13470                 new_entry->vme_end = map_address + tmp_size;
13471                 assert(new_entry->vme_start < new_entry->vme_end);
13472                 new_entry->inheritance = inheritance;
13473                 VME_OFFSET_SET(new_entry, offset);
13474
13475                 /*
13476                  * The new region has to be copied now if required.
13477                  */
13478         RestartCopy:
13479                 if (!copy) {
13480                         /*
13481                          * Cannot allow an entry describing a JIT
13482                          * region to be shared across address spaces.
13483                          */
13484                         if (src_entry->used_for_jit == TRUE) {
13485                                 result = KERN_INVALID_ARGUMENT;
13486                                 break;
13487                         }
13488                         src_entry->is_shared = TRUE;
13489                         new_entry->is_shared = TRUE;
13490                         if (!(new_entry->is_sub_map))
13491                                 new_entry->needs_copy = FALSE;
13492
13493                 } else if (src_entry->is_sub_map) {
13494                         /* make this a COW sub_map if not already */
13495                         assert(new_entry->wired_count == 0);
13496                         new_entry->needs_copy = TRUE;
13497                         object = VM_OBJECT_NULL;
13498                 } else if (src_entry->wired_count == 0 &&
13499                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
13500                                                   VME_OFFSET(new_entry),
13501                                                   (new_entry->vme_end -
13502                                                    new_entry->vme_start),
13503                                                   &src_needs_copy,
13504                                                   &new_entry_needs_copy)) {
13505
13506                         new_entry->needs_copy = new_entry_needs_copy;
13507                         new_entry->is_shared = FALSE;
13508
13509                         /*
13510                          * Handle copy_on_write semantics.
13511                          */
13512                         if (src_needs_copy && !src_entry->needs_copy) {
13513                                 vm_prot_t prot;
13514
13515                                 prot = src_entry->protection & ~VM_PROT_WRITE;
13516
13517                                 if (override_nx(map,
13518                                                 VME_ALIAS(src_entry))
13519                                     && prot)
13520                                         prot |= VM_PROT_EXECUTE;
13521
13522                                 vm_object_pmap_protect(object,
13523                                                        offset,
13524                                                        entry_size,
13525                                                        ((src_entry->is_shared
13526                                                          || map->mapped_in_other_pmaps) ?
13527                                                         PMAP_NULL : map->pmap),
13528                                                        src_entry->vme_start,
13529                                                        prot);
13530
13531                                 assert(src_entry->wired_count == 0);
13532                                 src_entry->needs_copy = TRUE;
13533                         }
13534                         /*
13535                          * Throw away the old object reference of the new entry.
13536                          */
13537                         vm_object_deallocate(object);
13538
13539                 } else {
13540                         new_entry->is_shared = FALSE;
13541
13542                         /*
13543                          * The map can be safely unlocked since we
13544                          * already hold a reference on the object.
13545                          *
13546                          * Record the timestamp of the map for later
13547                          * verification, and unlock the map.
13548                          */
13549                         version.main_timestamp = map->timestamp;
13550                         vm_map_unlock(map);     /* Increments timestamp once! */
13551
13552                         /*
13553                          * Perform the copy.
13554                          */
13555                         if (src_entry->wired_count > 0) {
13556                                 vm_object_lock(object);
13557                                 result = vm_object_copy_slowly(
13558                                         object,
13559                                         offset,
13560                                         entry_size,
13561                                         THREAD_UNINT,
13562                                         &VME_OBJECT(new_entry));
13563
13564                                 VME_OFFSET_SET(new_entry, 0);
13565                                 new_entry->needs_copy = FALSE;
13566                         } else {
13567                                 vm_object_offset_t new_offset;
13568
13569                                 new_offset = VME_OFFSET(new_entry);
13570                                 result = vm_object_copy_strategically(
13571                                         object,
13572                                         offset,
13573                                         entry_size,
13574                                         &VME_OBJECT(new_entry),
13575                                         &new_offset,
13576                                         &new_entry_needs_copy);
13577                                 if (new_offset != VME_OFFSET(new_entry)) {
13578                                         VME_OFFSET_SET(new_entry, new_offset);
13579                                 }
13580
13581                                 new_entry->needs_copy = new_entry_needs_copy;
13582                         }
13583
13584                         /*
13585                          * Throw away the old object reference of the new entry.
13586                          */
13587                         vm_object_deallocate(object);
13588
13589                         if (result != KERN_SUCCESS &&
13590                             result != KERN_MEMORY_RESTART_COPY) {
13591                                 _vm_map_entry_dispose(map_header, new_entry);
13592                                 break;
13593                         }
13594
13595                         /*
13596                          * Verify that the map has not substantially
13597                          * changed while the copy was being made.
13598                          */
13599
13600                         vm_map_lock(map);
13601                         if (version.main_timestamp + 1 != map->timestamp) {
13602                                 /*
13603                                  * Simple version comparison failed.
13604                                  *
13605                                  * Retry the lookup and verify that the
13606                                  * same object/offset are still present.
13607                                  */
13608                                 vm_object_deallocate(VME_OBJECT(new_entry));
13609                                 _vm_map_entry_dispose(map_header, new_entry);
13610                                 if (result == KERN_MEMORY_RESTART_COPY)
13611                                         result = KERN_SUCCESS;
13612                                 continue;
13613                         }
13614
13615                         if (result == KERN_MEMORY_RESTART_COPY) {
13616                                 vm_object_reference(object);
13617                                 goto RestartCopy;
13618                         }
13619                 }
13620
13621                 _vm_map_store_entry_link(map_header,
13622                                    map_header->links.prev, new_entry);
13623
13624                 /*Protections for submap mapping are irrelevant here*/
13625                 if( !src_entry->is_sub_map ) {
13626                         *cur_protection &= src_entry->protection;
13627                         *max_protection &= src_entry->max_protection;
13628                 }
13629                 map_address += tmp_size;
13630                 mapped_size += tmp_size;
13631                 src_start += tmp_size;
13632
13633         } /* end while */
13634
13635         vm_map_unlock(map);
13636         if (result != KERN_SUCCESS) {
13637                 /*
13638                  * Free all allocated elements.
13639                  */
13640                 for (src_entry = map_header->links.next;
13641                      src_entry != (struct vm_map_entry *)&map_header->links;
13642                      src_entry = new_entry) {
13643                         new_entry = src_entry->vme_next;
13644                         _vm_map_store_entry_unlink(map_header, src_entry);
13645                         if (src_entry->is_sub_map) {
13646                                 vm_map_deallocate(VME_SUBMAP(src_entry));
13647                         } else {
13648                                 vm_object_deallocate(VME_OBJECT(src_entry));
13649                         }
13650                         _vm_map_entry_dispose(map_header, src_entry);
13651                 }
13652         }
13653         return result;
13654 }
13655
13656 /*
13657  *      Routine:        vm_remap
13658  *
13659  *                      Map portion of a task's address space.
13660  *                      Mapped region must not overlap more than
13661  *                      one vm memory object. Protections and
13662  *                      inheritance attributes remain the same
13663  *                      as in the original task and are out parameters.
13664  *                      Source and Target task can be identical
13665  *                      Other attributes are identical as for vm_map()
13666  */
13667 kern_return_t
13668 vm_map_remap(
13669         vm_map_t                target_map,
13670         vm_map_address_t        *address,
13671         vm_map_size_t           size,
13672         vm_map_offset_t         mask,
13673         int                     flags,
13674         vm_map_t                src_map,
13675         vm_map_offset_t         memory_address,
13676         boolean_t               copy,
13677         vm_prot_t               *cur_protection,
13678         vm_prot_t               *max_protection,
13679         vm_inherit_t            inheritance)
13680 {
13681         kern_return_t           result;
13682         vm_map_entry_t          entry;
13683         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
13684         vm_map_entry_t          new_entry;
13685         struct vm_map_header    map_header;
13686         vm_map_offset_t         offset_in_mapping;
13687
13688         if (target_map == VM_MAP_NULL)
13689                 return KERN_INVALID_ARGUMENT;
13690
13691         switch (inheritance) {
13692         case VM_INHERIT_NONE:
13693         case VM_INHERIT_COPY:
13694         case VM_INHERIT_SHARE:
13695                 if (size != 0 && src_map != VM_MAP_NULL)
13696                         break;
13697                 /*FALL THRU*/
13698         default:
13699                 return KERN_INVALID_ARGUMENT;
13700         }
13701
13702         /*
13703          * If the user is requesting that we return the address of the
13704          * first byte of the data (rather than the base of the page),
13705          * then we use different rounding semantics: specifically,
13706          * we assume that (memory_address, size) describes a region
13707          * all of whose pages we must cover, rather than a base to be truncated
13708          * down and a size to be added to that base.  So we figure out
13709          * the highest page that the requested region includes and make
13710          * sure that the size will cover it.
13711          *
13712          * The key example we're worried about it is of the form:
13713          *
13714          *              memory_address = 0x1ff0, size = 0x20
13715          *
13716          * With the old semantics, we round down the memory_address to 0x1000
13717          * and round up the size to 0x1000, resulting in our covering *only*
13718          * page 0x1000.  With the new semantics, we'd realize that the region covers
13719          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
13720          * 0x1000 and page 0x2000 in the region we remap.
13721          */
13722         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13723                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
13724                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
13725         } else {
13726                 size = vm_map_round_page(size, PAGE_MASK);
13727         }
13728
13729         result = vm_map_remap_extract(src_map, memory_address,
13730                                       size, copy, &map_header,
13731                                       cur_protection,
13732                                       max_protection,
13733                                       inheritance,
13734                                       target_map->hdr.entries_pageable);
13735
13736         if (result != KERN_SUCCESS) {
13737                 return result;
13738         }
13739
13740         /*
13741          * Allocate/check a range of free virtual address
13742          * space for the target
13743          */
13744         *address = vm_map_trunc_page(*address,
13745                                      VM_MAP_PAGE_MASK(target_map));
13746         vm_map_lock(target_map);
13747         result = vm_map_remap_range_allocate(target_map, address, size,
13748                                              mask, flags, &insp_entry);
13749
13750         for (entry = map_header.links.next;
13751              entry != (struct vm_map_entry *)&map_header.links;
13752              entry = new_entry) {
13753                 new_entry = entry->vme_next;
13754                 _vm_map_store_entry_unlink(&map_header, entry);
13755                 if (result == KERN_SUCCESS) {
13756                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13757                                 /* no codesigning -> read-only access */
13758                                 assert(!entry->used_for_jit);
13759                                 entry->max_protection = VM_PROT_READ;
13760                                 entry->protection = VM_PROT_READ;
13761                                 entry->vme_resilient_codesign = TRUE;
13762                         }
13763                         entry->vme_start += *address;
13764                         entry->vme_end += *address;
13765                         assert(!entry->map_aligned);
13766                         vm_map_store_entry_link(target_map, insp_entry, entry);
13767                         insp_entry = entry;
13768                 } else {
13769                         if (!entry->is_sub_map) {
13770                                 vm_object_deallocate(VME_OBJECT(entry));
13771                         } else {
13772                                 vm_map_deallocate(VME_SUBMAP(entry));
13773                         }
13774                         _vm_map_entry_dispose(&map_header, entry);
13775                 }
13776         }
13777
13778         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13779                 *cur_protection = VM_PROT_READ;
13780                 *max_protection = VM_PROT_READ;
13781         }
13782
13783         if( target_map->disable_vmentry_reuse == TRUE) {
13784                 if( target_map->highest_entry_end < insp_entry->vme_end ){
13785                         target_map->highest_entry_end = insp_entry->vme_end;
13786                 }
13787         }
13788
13789         if (result == KERN_SUCCESS) {
13790                 target_map->size += size;
13791                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
13792         }
13793         vm_map_unlock(target_map);
13794
13795         if (result == KERN_SUCCESS && target_map->wiring_required)
13796                 result = vm_map_wire(target_map, *address,
13797                                      *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
13798                                      TRUE);
13799
13800         /*
13801          * If requested, return the address of the data pointed to by the
13802          * request, rather than the base of the resulting page.
13803          */
13804         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13805                 *address += offset_in_mapping;
13806         }
13807
13808         return result;
13809 }
13810
13811 /*
13812  *      Routine:        vm_map_remap_range_allocate
13813  *
13814  *      Description:
13815  *              Allocate a range in the specified virtual address map.
13816  *              returns the address and the map entry just before the allocated
13817  *              range
13818  *
13819  *      Map must be locked.
13820  */
13821
13822 static kern_return_t
13823 vm_map_remap_range_allocate(
13824         vm_map_t                map,
13825         vm_map_address_t        *address,       /* IN/OUT */
13826         vm_map_size_t           size,
13827         vm_map_offset_t         mask,
13828         int                     flags,
13829         vm_map_entry_t          *map_entry)     /* OUT */
13830 {
13831         vm_map_entry_t  entry;
13832         vm_map_offset_t start;
13833         vm_map_offset_t end;
13834         kern_return_t   kr;
13835         vm_map_entry_t          hole_entry;
13836
13837 StartAgain: ;
13838
13839         start = *address;
13840
13841         if (flags & VM_FLAGS_ANYWHERE)
13842         {
13843                 /*
13844                  *      Calculate the first possible address.
13845                  */
13846
13847                 if (start < map->min_offset)
13848                         start = map->min_offset;
13849                 if (start > map->max_offset)
13850                         return(KERN_NO_SPACE);
13851
13852                 /*
13853                  *      Look for the first possible address;
13854                  *      if there's already something at this
13855                  *      address, we have to start after it.
13856                  */
13857
13858                 if( map->disable_vmentry_reuse == TRUE) {
13859                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
13860                 } else {
13861
13862                         if (map->holelistenabled) {
13863                                 hole_entry = (vm_map_entry_t)map->holes_list;
13864
13865                                 if (hole_entry == NULL) {
13866                                         /*
13867                                          * No more space in the map?
13868                                          */
13869                                         return(KERN_NO_SPACE);
13870                                 } else {
13871
13872                                         boolean_t found_hole = FALSE;
13873
13874                                         do {
13875                                                 if (hole_entry->vme_start >= start) {
13876                                                         start = hole_entry->vme_start;
13877                                                         found_hole = TRUE;
13878                                                         break;
13879                                                 }
13880
13881                                                 if (hole_entry->vme_end > start) {
13882                                                         found_hole = TRUE;
13883                                                         break;
13884                                                 }
13885                                                 hole_entry = hole_entry->vme_next;
13886
13887                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
13888
13889                                         if (found_hole == FALSE) {
13890                                                 return (KERN_NO_SPACE);
13891                                         }
13892
13893                                         entry = hole_entry;
13894                                 }
13895                         } else {
13896                                 assert(first_free_is_valid(map));
13897                                 if (start == map->min_offset) {
13898                                         if ((entry = map->first_free) != vm_map_to_entry(map))
13899                                                 start = entry->vme_end;
13900                                 } else {
13901                                         vm_map_entry_t  tmp_entry;
13902                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
13903                                                 start = tmp_entry->vme_end;
13904                                         entry = tmp_entry;
13905                                 }
13906                         }
13907                         start = vm_map_round_page(start,
13908                                                   VM_MAP_PAGE_MASK(map));
13909                 }
13910
13911                 /*
13912                  *      In any case, the "entry" always precedes
13913                  *      the proposed new region throughout the
13914                  *      loop:
13915                  */
13916
13917                 while (TRUE) {
13918                         register vm_map_entry_t next;
13919
13920                         /*
13921                          *      Find the end of the proposed new region.
13922                          *      Be sure we didn't go beyond the end, or
13923                          *      wrap around the address.
13924                          */
13925
13926                         end = ((start + mask) & ~mask);
13927                         end = vm_map_round_page(end,
13928                                                 VM_MAP_PAGE_MASK(map));
13929                         if (end < start)
13930                                 return(KERN_NO_SPACE);
13931                         start = end;
13932                         end += size;
13933
13934                         if ((end > map->max_offset) || (end < start)) {
13935                                 if (map->wait_for_space) {
13936                                         if (size <= (map->max_offset -
13937                                                      map->min_offset)) {
13938                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13939                                                 vm_map_unlock(map);
13940                                                 thread_block(THREAD_CONTINUE_NULL);
13941                                                 vm_map_lock(map);
13942                                                 goto StartAgain;
13943                                         }
13944                                 }
13945
13946                                 return(KERN_NO_SPACE);
13947                         }
13948
13949                         next = entry->vme_next;
13950
13951                         if (map->holelistenabled) {
13952                                 if (entry->vme_end >= end)
13953                                         break;
13954                         } else {
13955                                 /*
13956                                  *      If there are no more entries, we must win.
13957                                  *
13958                                  *      OR
13959                                  *
13960                                  *      If there is another entry, it must be
13961                                  *      after the end of the potential new region.
13962                                  */
13963
13964                                 if (next == vm_map_to_entry(map))
13965                                         break;
13966
13967                                 if (next->vme_start >= end)
13968                                         break;
13969                         }
13970
13971                         /*
13972                          *      Didn't fit -- move to the next entry.
13973                          */
13974
13975                         entry = next;
13976
13977                         if (map->holelistenabled) {
13978                                 if (entry == (vm_map_entry_t) map->holes_list) {
13979                                         /*
13980                                          * Wrapped around
13981                                          */
13982                                         return(KERN_NO_SPACE);
13983                                 }
13984                                 start = entry->vme_start;
13985                         } else {
13986                                 start = entry->vme_end;
13987                         }
13988                 }
13989
13990                 if (map->holelistenabled) {
13991
13992                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
13993                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
13994                         }
13995                 }
13996
13997                 *address = start;
13998
13999         } else {
14000                 vm_map_entry_t          temp_entry;
14001
14002                 /*
14003                  *      Verify that:
14004                  *              the address doesn't itself violate
14005                  *              the mask requirement.
14006                  */
14007
14008                 if ((start & mask) != 0)
14009                         return(KERN_NO_SPACE);
14010
14011
14012                 /*
14013                  *      ...     the address is within bounds
14014                  */
14015
14016                 end = start + size;
14017
14018                 if ((start < map->min_offset) ||
14019                     (end > map->max_offset) ||
14020                     (start >= end)) {
14021                         return(KERN_INVALID_ADDRESS);
14022                 }
14023
14024                 /*
14025                  * If we're asked to overwrite whatever was mapped in that
14026                  * range, first deallocate that range.
14027                  */
14028                 if (flags & VM_FLAGS_OVERWRITE) {
14029                         vm_map_t zap_map;
14030
14031                         /*
14032                          * We use a "zap_map" to avoid having to unlock
14033                          * the "map" in vm_map_delete(), which would compromise
14034                          * the atomicity of the "deallocate" and then "remap"
14035                          * combination.
14036                          */
14037                         zap_map = vm_map_create(PMAP_NULL,
14038                                                 start,
14039                                                 end,
14040                                                 map->hdr.entries_pageable);
14041                         if (zap_map == VM_MAP_NULL) {
14042                                 return KERN_RESOURCE_SHORTAGE;
14043                         }
14044                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
14045                         vm_map_disable_hole_optimization(zap_map);
14046
14047                         kr = vm_map_delete(map, start, end,
14048                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
14049                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
14050                                            zap_map);
14051                         if (kr == KERN_SUCCESS) {
14052                                 vm_map_destroy(zap_map,
14053                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14054                                 zap_map = VM_MAP_NULL;
14055                         }
14056                 }
14057
14058                 /*
14059                  *      ...     the starting address isn't allocated
14060                  */
14061
14062                 if (vm_map_lookup_entry(map, start, &temp_entry))
14063                         return(KERN_NO_SPACE);
14064
14065                 entry = temp_entry;
14066
14067                 /*
14068                  *      ...     the next region doesn't overlap the
14069                  *              end point.
14070                  */
14071
14072                 if ((entry->vme_next != vm_map_to_entry(map)) &&
14073                     (entry->vme_next->vme_start < end))
14074                         return(KERN_NO_SPACE);
14075         }
14076         *map_entry = entry;
14077         return(KERN_SUCCESS);
14078 }
14079
14080 /*
14081  *      vm_map_switch:
14082  *
14083  *      Set the address map for the current thread to the specified map
14084  */
14085
14086 vm_map_t
14087 vm_map_switch(
14088         vm_map_t        map)
14089 {
14090         int             mycpu;
14091         thread_t        thread = current_thread();
14092         vm_map_t        oldmap = thread->map;
14093
14094         mp_disable_preemption();
14095         mycpu = cpu_number();
14096
14097         /*
14098          *      Deactivate the current map and activate the requested map
14099          */
14100         PMAP_SWITCH_USER(thread, map, mycpu);
14101
14102         mp_enable_preemption();
14103         return(oldmap);
14104 }
14105
14106
14107 /*
14108  *      Routine:        vm_map_write_user
14109  *
14110  *      Description:
14111  *              Copy out data from a kernel space into space in the
14112  *              destination map. The space must already exist in the
14113  *              destination map.
14114  *              NOTE:  This routine should only be called by threads
14115  *              which can block on a page fault. i.e. kernel mode user
14116  *              threads.
14117  *
14118  */
14119 kern_return_t
14120 vm_map_write_user(
14121         vm_map_t                map,
14122         void                    *src_p,
14123         vm_map_address_t        dst_addr,
14124         vm_size_t               size)
14125 {
14126         kern_return_t   kr = KERN_SUCCESS;
14127
14128         if(current_map() == map) {
14129                 if (copyout(src_p, dst_addr, size)) {
14130                         kr = KERN_INVALID_ADDRESS;
14131                 }
14132         } else {
14133                 vm_map_t        oldmap;
14134
14135                 /* take on the identity of the target map while doing */
14136                 /* the transfer */
14137
14138                 vm_map_reference(map);
14139                 oldmap = vm_map_switch(map);
14140                 if (copyout(src_p, dst_addr, size)) {
14141                         kr = KERN_INVALID_ADDRESS;
14142                 }
14143                 vm_map_switch(oldmap);
14144                 vm_map_deallocate(map);
14145         }
14146         return kr;
14147 }
14148
14149 /*
14150  *      Routine:        vm_map_read_user
14151  *
14152  *      Description:
14153  *              Copy in data from a user space source map into the
14154  *              kernel map. The space must already exist in the
14155  *              kernel map.
14156  *              NOTE:  This routine should only be called by threads
14157  *              which can block on a page fault. i.e. kernel mode user
14158  *              threads.
14159  *
14160  */
14161 kern_return_t
14162 vm_map_read_user(
14163         vm_map_t                map,
14164         vm_map_address_t        src_addr,
14165         void                    *dst_p,
14166         vm_size_t               size)
14167 {
14168         kern_return_t   kr = KERN_SUCCESS;
14169
14170         if(current_map() == map) {
14171                 if (copyin(src_addr, dst_p, size)) {
14172                         kr = KERN_INVALID_ADDRESS;
14173                 }
14174         } else {
14175                 vm_map_t        oldmap;
14176
14177                 /* take on the identity of the target map while doing */
14178                 /* the transfer */
14179
14180                 vm_map_reference(map);
14181                 oldmap = vm_map_switch(map);
14182                 if (copyin(src_addr, dst_p, size)) {
14183                         kr = KERN_INVALID_ADDRESS;
14184                 }
14185                 vm_map_switch(oldmap);
14186                 vm_map_deallocate(map);
14187         }
14188         return kr;
14189 }
14190
14191
14192 /*
14193  *      vm_map_check_protection:
14194  *
14195  *      Assert that the target map allows the specified
14196  *      privilege on the entire address region given.
14197  *      The entire region must be allocated.
14198  */
14199 boolean_t
14200 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14201                         vm_map_offset_t end, vm_prot_t protection)
14202 {
14203         vm_map_entry_t entry;
14204         vm_map_entry_t tmp_entry;
14205
14206         vm_map_lock(map);
14207
14208         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
14209         {
14210                 vm_map_unlock(map);
14211                 return (FALSE);
14212         }
14213
14214         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14215                 vm_map_unlock(map);
14216                 return(FALSE);
14217         }
14218
14219         entry = tmp_entry;
14220
14221         while (start < end) {
14222                 if (entry == vm_map_to_entry(map)) {
14223                         vm_map_unlock(map);
14224                         return(FALSE);
14225                 }
14226
14227                 /*
14228                  *      No holes allowed!
14229                  */
14230
14231                 if (start < entry->vme_start) {
14232                         vm_map_unlock(map);
14233                         return(FALSE);
14234                 }
14235
14236                 /*
14237                  * Check protection associated with entry.
14238                  */
14239
14240                 if ((entry->protection & protection) != protection) {
14241                         vm_map_unlock(map);
14242                         return(FALSE);
14243                 }
14244
14245                 /* go to next entry */
14246
14247                 start = entry->vme_end;
14248                 entry = entry->vme_next;
14249         }
14250         vm_map_unlock(map);
14251         return(TRUE);
14252 }
14253
14254 kern_return_t
14255 vm_map_purgable_control(
14256         vm_map_t                map,
14257         vm_map_offset_t         address,
14258         vm_purgable_t           control,
14259         int                     *state)
14260 {
14261         vm_map_entry_t          entry;
14262         vm_object_t             object;
14263         kern_return_t           kr;
14264         boolean_t               was_nonvolatile;
14265
14266         /*
14267          * Vet all the input parameters and current type and state of the
14268          * underlaying object.  Return with an error if anything is amiss.
14269          */
14270         if (map == VM_MAP_NULL)
14271                 return(KERN_INVALID_ARGUMENT);
14272
14273         if (control != VM_PURGABLE_SET_STATE &&
14274             control != VM_PURGABLE_GET_STATE &&
14275             control != VM_PURGABLE_PURGE_ALL)
14276                 return(KERN_INVALID_ARGUMENT);
14277
14278         if (control == VM_PURGABLE_PURGE_ALL) {
14279                 vm_purgeable_object_purge_all();
14280                 return KERN_SUCCESS;
14281         }
14282
14283         if (control == VM_PURGABLE_SET_STATE &&
14284             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
14285              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
14286                 return(KERN_INVALID_ARGUMENT);
14287
14288         vm_map_lock_read(map);
14289
14290         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14291
14292                 /*
14293                  * Must pass a valid non-submap address.
14294                  */
14295                 vm_map_unlock_read(map);
14296                 return(KERN_INVALID_ADDRESS);
14297         }
14298
14299         if ((entry->protection & VM_PROT_WRITE) == 0) {
14300                 /*
14301                  * Can't apply purgable controls to something you can't write.
14302                  */
14303                 vm_map_unlock_read(map);
14304                 return(KERN_PROTECTION_FAILURE);
14305         }
14306
14307         object = VME_OBJECT(entry);
14308         if (object == VM_OBJECT_NULL ||
14309             object->purgable == VM_PURGABLE_DENY) {
14310                 /*
14311                  * Object must already be present and be purgeable.
14312                  */
14313                 vm_map_unlock_read(map);
14314                 return KERN_INVALID_ARGUMENT;
14315         }
14316
14317         vm_object_lock(object);
14318
14319 #if 00
14320         if (VME_OFFSET(entry) != 0 ||
14321             entry->vme_end - entry->vme_start != object->vo_size) {
14322                 /*
14323                  * Can only apply purgable controls to the whole (existing)
14324                  * object at once.
14325                  */
14326                 vm_map_unlock_read(map);
14327                 vm_object_unlock(object);
14328                 return KERN_INVALID_ARGUMENT;
14329         }
14330 #endif
14331
14332         assert(!entry->is_sub_map);
14333         assert(!entry->use_pmap); /* purgeable has its own accounting */
14334
14335         vm_map_unlock_read(map);
14336
14337         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14338
14339         kr = vm_object_purgable_control(object, control, state);
14340
14341         if (was_nonvolatile &&
14342             object->purgable != VM_PURGABLE_NONVOLATILE &&
14343             map->pmap == kernel_pmap) {
14344 #if DEBUG
14345                 object->vo_purgeable_volatilizer = kernel_task;
14346 #endif /* DEBUG */
14347         }
14348
14349         vm_object_unlock(object);
14350
14351         return kr;
14352 }
14353
14354 kern_return_t
14355 vm_map_page_query_internal(
14356         vm_map_t        target_map,
14357         vm_map_offset_t offset,
14358         int             *disposition,
14359         int             *ref_count)
14360 {
14361         kern_return_t                   kr;
14362         vm_page_info_basic_data_t       info;
14363         mach_msg_type_number_t          count;
14364
14365         count = VM_PAGE_INFO_BASIC_COUNT;
14366         kr = vm_map_page_info(target_map,
14367                               offset,
14368                               VM_PAGE_INFO_BASIC,
14369                               (vm_page_info_t) &info,
14370                               &count);
14371         if (kr == KERN_SUCCESS) {
14372                 *disposition = info.disposition;
14373                 *ref_count = info.ref_count;
14374         } else {
14375                 *disposition = 0;
14376                 *ref_count = 0;
14377         }
14378
14379         return kr;
14380 }
14381
14382 kern_return_t
14383 vm_map_page_info(
14384         vm_map_t                map,
14385         vm_map_offset_t         offset,
14386         vm_page_info_flavor_t   flavor,
14387         vm_page_info_t          info,
14388         mach_msg_type_number_t  *count)
14389 {
14390         vm_map_entry_t          map_entry;
14391         vm_object_t             object;
14392         vm_page_t               m;
14393         kern_return_t           kr;
14394         kern_return_t           retval = KERN_SUCCESS;
14395         boolean_t               top_object;
14396         int                     disposition;
14397         int                     ref_count;
14398         vm_page_info_basic_t    basic_info;
14399         int                     depth;
14400         vm_map_offset_t         offset_in_page;
14401
14402         switch (flavor) {
14403         case VM_PAGE_INFO_BASIC:
14404                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
14405                         /*
14406                          * The "vm_page_info_basic_data" structure was not
14407                          * properly padded, so allow the size to be off by
14408                          * one to maintain backwards binary compatibility...
14409                          */
14410                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14411                                 return KERN_INVALID_ARGUMENT;
14412                 }
14413                 break;
14414         default:
14415                 return KERN_INVALID_ARGUMENT;
14416         }
14417
14418         disposition = 0;
14419         ref_count = 0;
14420         top_object = TRUE;
14421         depth = 0;
14422
14423         retval = KERN_SUCCESS;
14424         offset_in_page = offset & PAGE_MASK;
14425         offset = vm_map_trunc_page(offset, PAGE_MASK);
14426
14427         vm_map_lock_read(map);
14428
14429         /*
14430          * First, find the map entry covering "offset", going down
14431          * submaps if necessary.
14432          */
14433         for (;;) {
14434                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14435                         vm_map_unlock_read(map);
14436                         return KERN_INVALID_ADDRESS;
14437                 }
14438                 /* compute offset from this map entry's start */
14439                 offset -= map_entry->vme_start;
14440                 /* compute offset into this map entry's object (or submap) */
14441                 offset += VME_OFFSET(map_entry);
14442
14443                 if (map_entry->is_sub_map) {
14444                         vm_map_t sub_map;
14445
14446                         sub_map = VME_SUBMAP(map_entry);
14447                         vm_map_lock_read(sub_map);
14448                         vm_map_unlock_read(map);
14449
14450                         map = sub_map;
14451
14452                         ref_count = MAX(ref_count, map->ref_count);
14453                         continue;
14454                 }
14455                 break;
14456         }
14457
14458         object = VME_OBJECT(map_entry);
14459         if (object == VM_OBJECT_NULL) {
14460                 /* no object -> no page */
14461                 vm_map_unlock_read(map);
14462                 goto done;
14463         }
14464
14465         vm_object_lock(object);
14466         vm_map_unlock_read(map);
14467
14468         /*
14469          * Go down the VM object shadow chain until we find the page
14470          * we're looking for.
14471          */
14472         for (;;) {
14473                 ref_count = MAX(ref_count, object->ref_count);
14474
14475                 m = vm_page_lookup(object, offset);
14476
14477                 if (m != VM_PAGE_NULL) {
14478                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
14479                         break;
14480                 } else {
14481 #if MACH_PAGEMAP
14482                         if (object->existence_map) {
14483                                 if (vm_external_state_get(object->existence_map,
14484                                                           offset) ==
14485                                     VM_EXTERNAL_STATE_EXISTS) {
14486                                         /*
14487                                          * this page has been paged out
14488                                          */
14489                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14490                                         break;
14491                                 }
14492                         } else
14493 #endif
14494                         if (object->internal &&
14495                             object->alive &&
14496                             !object->terminating &&
14497                             object->pager_ready) {
14498
14499                                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14500                                         if (VM_COMPRESSOR_PAGER_STATE_GET(
14501                                                     object,
14502                                                     offset)
14503                                             == VM_EXTERNAL_STATE_EXISTS) {
14504                                                 /* the pager has that page */
14505                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14506                                                 break;
14507                                         }
14508                                 } else {
14509                                         memory_object_t pager;
14510
14511                                         vm_object_paging_begin(object);
14512                                         pager = object->pager;
14513                                         vm_object_unlock(object);
14514
14515                                         /*
14516                                          * Ask the default pager if
14517                                          * it has this page.
14518                                          */
14519                                         kr = memory_object_data_request(
14520                                                 pager,
14521                                                 offset + object->paging_offset,
14522                                                 0, /* just poke the pager */
14523                                                 VM_PROT_READ,
14524                                                 NULL);
14525
14526                                         vm_object_lock(object);
14527                                         vm_object_paging_end(object);
14528
14529                                         if (kr == KERN_SUCCESS) {
14530                                                 /* the default pager has it */
14531                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14532                                                 break;
14533                                         }
14534                                 }
14535                         }
14536
14537                         if (object->shadow != VM_OBJECT_NULL) {
14538                                 vm_object_t shadow;
14539
14540                                 offset += object->vo_shadow_offset;
14541                                 shadow = object->shadow;
14542
14543                                 vm_object_lock(shadow);
14544                                 vm_object_unlock(object);
14545
14546                                 object = shadow;
14547                                 top_object = FALSE;
14548                                 depth++;
14549                         } else {
14550 //                              if (!object->internal)
14551 //                                      break;
14552 //                              retval = KERN_FAILURE;
14553 //                              goto done_with_object;
14554                                 break;
14555                         }
14556                 }
14557         }
14558         /* The ref_count is not strictly accurate, it measures the number   */
14559         /* of entities holding a ref on the object, they may not be mapping */
14560         /* the object or may not be mapping the section holding the         */
14561         /* target page but its still a ball park number and though an over- */
14562         /* count, it picks up the copy-on-write cases                       */
14563
14564         /* We could also get a picture of page sharing from pmap_attributes */
14565         /* but this would under count as only faulted-in mappings would     */
14566         /* show up.                                                         */
14567
14568         if (top_object == TRUE && object->shadow)
14569                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14570
14571         if (! object->internal)
14572                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
14573
14574         if (m == VM_PAGE_NULL)
14575                 goto done_with_object;
14576
14577         if (m->fictitious) {
14578                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14579                 goto done_with_object;
14580         }
14581         if (m->dirty || pmap_is_modified(m->phys_page))
14582                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
14583
14584         if (m->reference || pmap_is_referenced(m->phys_page))
14585                 disposition |= VM_PAGE_QUERY_PAGE_REF;
14586
14587         if (m->speculative)
14588                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
14589
14590         if (m->cs_validated)
14591                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
14592         if (m->cs_tainted)
14593                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
14594         if (m->cs_nx)
14595                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
14596
14597 done_with_object:
14598         vm_object_unlock(object);
14599 done:
14600
14601         switch (flavor) {
14602         case VM_PAGE_INFO_BASIC:
14603                 basic_info = (vm_page_info_basic_t) info;
14604                 basic_info->disposition = disposition;
14605                 basic_info->ref_count = ref_count;
14606                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14607                         VM_KERNEL_ADDRPERM(object);
14608                 basic_info->offset =
14609                         (memory_object_offset_t) offset + offset_in_page;
14610                 basic_info->depth = depth;
14611                 break;
14612         }
14613
14614         return retval;
14615 }
14616
14617 /*
14618  *      vm_map_msync
14619  *
14620  *      Synchronises the memory range specified with its backing store
14621  *      image by either flushing or cleaning the contents to the appropriate
14622  *      memory manager engaging in a memory object synchronize dialog with
14623  *      the manager.  The client doesn't return until the manager issues
14624  *      m_o_s_completed message.  MIG Magically converts user task parameter
14625  *      to the task's address map.
14626  *
14627  *      interpretation of sync_flags
14628  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
14629  *                                pages to manager.
14630  *
14631  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14632  *                              - discard pages, write dirty or precious
14633  *                                pages back to memory manager.
14634  *
14635  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14636  *                              - write dirty or precious pages back to
14637  *                                the memory manager.
14638  *
14639  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
14640  *                                is a hole in the region, and we would
14641  *                                have returned KERN_SUCCESS, return
14642  *                                KERN_INVALID_ADDRESS instead.
14643  *
14644  *      NOTE
14645  *      The memory object attributes have not yet been implemented, this
14646  *      function will have to deal with the invalidate attribute
14647  *
14648  *      RETURNS
14649  *      KERN_INVALID_TASK               Bad task parameter
14650  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
14651  *      KERN_SUCCESS                    The usual.
14652  *      KERN_INVALID_ADDRESS            There was a hole in the region.
14653  */
14654
14655 kern_return_t
14656 vm_map_msync(
14657         vm_map_t                map,
14658         vm_map_address_t        address,
14659         vm_map_size_t           size,
14660         vm_sync_t               sync_flags)
14661 {
14662         msync_req_t             msr;
14663         msync_req_t             new_msr;
14664         queue_chain_t           req_q;  /* queue of requests for this msync */
14665         vm_map_entry_t          entry;
14666         vm_map_size_t           amount_left;
14667         vm_object_offset_t      offset;
14668         boolean_t               do_sync_req;
14669         boolean_t               had_hole = FALSE;
14670         memory_object_t         pager;
14671         vm_map_offset_t         pmap_offset;
14672
14673         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
14674             (sync_flags & VM_SYNC_SYNCHRONOUS))
14675                 return(KERN_INVALID_ARGUMENT);
14676
14677         /*
14678          * align address and size on page boundaries
14679          */
14680         size = (vm_map_round_page(address + size,
14681                                   VM_MAP_PAGE_MASK(map)) -
14682                 vm_map_trunc_page(address,
14683                                   VM_MAP_PAGE_MASK(map)));
14684         address = vm_map_trunc_page(address,
14685                                     VM_MAP_PAGE_MASK(map));
14686
14687         if (map == VM_MAP_NULL)
14688                 return(KERN_INVALID_TASK);
14689
14690         if (size == 0)
14691                 return(KERN_SUCCESS);
14692
14693         queue_init(&req_q);
14694         amount_left = size;
14695
14696         while (amount_left > 0) {
14697                 vm_object_size_t        flush_size;
14698                 vm_object_t             object;
14699
14700                 vm_map_lock(map);
14701                 if (!vm_map_lookup_entry(map,
14702                                          address,
14703                                          &entry)) {
14704
14705                         vm_map_size_t   skip;
14706
14707                         /*
14708                          * hole in the address map.
14709                          */
14710                         had_hole = TRUE;
14711
14712                         /*
14713                          * Check for empty map.
14714                          */
14715                         if (entry == vm_map_to_entry(map) &&
14716                             entry->vme_next == entry) {
14717                                 vm_map_unlock(map);
14718                                 break;
14719                         }
14720                         /*
14721                          * Check that we don't wrap and that
14722                          * we have at least one real map entry.
14723                          */
14724                         if ((map->hdr.nentries == 0) ||
14725                             (entry->vme_next->vme_start < address)) {
14726                                 vm_map_unlock(map);
14727                                 break;
14728                         }
14729                         /*
14730                          * Move up to the next entry if needed
14731                          */
14732                         skip = (entry->vme_next->vme_start - address);
14733                         if (skip >= amount_left)
14734                                 amount_left = 0;
14735                         else
14736                                 amount_left -= skip;
14737                         address = entry->vme_next->vme_start;
14738                         vm_map_unlock(map);
14739                         continue;
14740                 }
14741
14742                 offset = address - entry->vme_start;
14743                 pmap_offset = address;
14744
14745                 /*
14746                  * do we have more to flush than is contained in this
14747                  * entry ?
14748                  */
14749                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
14750                         flush_size = entry->vme_end -
14751                                 (entry->vme_start + offset);
14752                 } else {
14753                         flush_size = amount_left;
14754                 }
14755                 amount_left -= flush_size;
14756                 address += flush_size;
14757
14758                 if (entry->is_sub_map == TRUE) {
14759                         vm_map_t        local_map;
14760                         vm_map_offset_t local_offset;
14761
14762                         local_map = VME_SUBMAP(entry);
14763                         local_offset = VME_OFFSET(entry);
14764                         vm_map_unlock(map);
14765                         if (vm_map_msync(
14766                                     local_map,
14767                                     local_offset,
14768                                     flush_size,
14769                                     sync_flags) == KERN_INVALID_ADDRESS) {
14770                                 had_hole = TRUE;
14771                         }
14772                         continue;
14773                 }
14774                 object = VME_OBJECT(entry);
14775
14776                 /*
14777                  * We can't sync this object if the object has not been
14778                  * created yet
14779                  */
14780                 if (object == VM_OBJECT_NULL) {
14781                         vm_map_unlock(map);
14782                         continue;
14783                 }
14784                 offset += VME_OFFSET(entry);
14785
14786                 vm_object_lock(object);
14787
14788                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
14789                         int kill_pages = 0;
14790                         boolean_t reusable_pages = FALSE;
14791
14792                         if (sync_flags & VM_SYNC_KILLPAGES) {
14793                                 if (object->ref_count == 1 && !object->shadow)
14794                                         kill_pages = 1;
14795                                 else
14796                                         kill_pages = -1;
14797                         }
14798                         if (kill_pages != -1)
14799                                 vm_object_deactivate_pages(
14800                                         object,
14801                                         offset,
14802                                         (vm_object_size_t) flush_size,
14803                                         kill_pages,
14804                                         reusable_pages,
14805                                         map->pmap,
14806                                         pmap_offset);
14807                         vm_object_unlock(object);
14808                         vm_map_unlock(map);
14809                         continue;
14810                 }
14811                 /*
14812                  * We can't sync this object if there isn't a pager.
14813                  * Don't bother to sync internal objects, since there can't
14814                  * be any "permanent" storage for these objects anyway.
14815                  */
14816                 if ((object->pager == MEMORY_OBJECT_NULL) ||
14817                     (object->internal) || (object->private)) {
14818                         vm_object_unlock(object);
14819                         vm_map_unlock(map);
14820                         continue;
14821                 }
14822                 /*
14823                  * keep reference on the object until syncing is done
14824                  */
14825                 vm_object_reference_locked(object);
14826                 vm_object_unlock(object);
14827
14828                 vm_map_unlock(map);
14829
14830                 do_sync_req = vm_object_sync(object,
14831                                              offset,
14832                                              flush_size,
14833                                              sync_flags & VM_SYNC_INVALIDATE,
14834                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
14835                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
14836                                              sync_flags & VM_SYNC_SYNCHRONOUS);
14837                 /*
14838                  * only send a m_o_s if we returned pages or if the entry
14839                  * is writable (ie dirty pages may have already been sent back)
14840                  */
14841                 if (!do_sync_req) {
14842                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
14843                                 /*
14844                                  * clear out the clustering and read-ahead hints
14845                                  */
14846                                 vm_object_lock(object);
14847
14848                                 object->pages_created = 0;
14849                                 object->pages_used = 0;
14850                                 object->sequential = 0;
14851                                 object->last_alloc = 0;
14852
14853                                 vm_object_unlock(object);
14854                         }
14855                         vm_object_deallocate(object);
14856                         continue;
14857                 }
14858                 msync_req_alloc(new_msr);
14859
14860                 vm_object_lock(object);
14861                 offset += object->paging_offset;
14862
14863                 new_msr->offset = offset;
14864                 new_msr->length = flush_size;
14865                 new_msr->object = object;
14866                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
14867         re_iterate:
14868
14869                 /*
14870                  * We can't sync this object if there isn't a pager.  The
14871                  * pager can disappear anytime we're not holding the object
14872                  * lock.  So this has to be checked anytime we goto re_iterate.
14873                  */
14874
14875                 pager = object->pager;
14876
14877                 if (pager == MEMORY_OBJECT_NULL) {
14878                         vm_object_unlock(object);
14879                         vm_object_deallocate(object);
14880                         msync_req_free(new_msr);
14881                         new_msr = NULL;
14882                         continue;
14883                 }
14884
14885                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14886                         /*
14887                          * need to check for overlapping entry, if found, wait
14888                          * on overlapping msr to be done, then reiterate
14889                          */
14890                         msr_lock(msr);
14891                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14892                             ((offset >= msr->offset &&
14893                               offset < (msr->offset + msr->length)) ||
14894                              (msr->offset >= offset &&
14895                               msr->offset < (offset + flush_size))))
14896                         {
14897                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14898                                 msr_unlock(msr);
14899                                 vm_object_unlock(object);
14900                                 thread_block(THREAD_CONTINUE_NULL);
14901                                 vm_object_lock(object);
14902                                 goto re_iterate;
14903                         }
14904                         msr_unlock(msr);
14905                 }/* queue_iterate */
14906
14907                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14908
14909                 vm_object_paging_begin(object);
14910                 vm_object_unlock(object);
14911
14912                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14913
14914                 (void) memory_object_synchronize(
14915                         pager,
14916                         offset,
14917                         flush_size,
14918                         sync_flags & ~VM_SYNC_CONTIGUOUS);
14919
14920                 vm_object_lock(object);
14921                 vm_object_paging_end(object);
14922                 vm_object_unlock(object);
14923         }/* while */
14924
14925         /*
14926          * wait for memory_object_sychronize_completed messages from pager(s)
14927          */
14928
14929         while (!queue_empty(&req_q)) {
14930                 msr = (msync_req_t)queue_first(&req_q);
14931                 msr_lock(msr);
14932                 while(msr->flag != VM_MSYNC_DONE) {
14933                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14934                         msr_unlock(msr);
14935                         thread_block(THREAD_CONTINUE_NULL);
14936                         msr_lock(msr);
14937                 }/* while */
14938                 queue_remove(&req_q, msr, msync_req_t, req_q);
14939                 msr_unlock(msr);
14940                 vm_object_deallocate(msr->object);
14941                 msync_req_free(msr);
14942         }/* queue_iterate */
14943
14944         /* for proper msync() behaviour */
14945         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14946                 return(KERN_INVALID_ADDRESS);
14947
14948         return(KERN_SUCCESS);
14949 }/* vm_msync */
14950
14951 /*
14952  *      Routine:        convert_port_entry_to_map
14953  *      Purpose:
14954  *              Convert from a port specifying an entry or a task
14955  *              to a map. Doesn't consume the port ref; produces a map ref,
14956  *              which may be null.  Unlike convert_port_to_map, the
14957  *              port may be task or a named entry backed.
14958  *      Conditions:
14959  *              Nothing locked.
14960  */
14961
14962
14963 vm_map_t
14964 convert_port_entry_to_map(
14965         ipc_port_t      port)
14966 {
14967         vm_map_t map;
14968         vm_named_entry_t        named_entry;
14969         uint32_t        try_failed_count = 0;
14970
14971         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14972                 while(TRUE) {
14973                         ip_lock(port);
14974                         if(ip_active(port) && (ip_kotype(port)
14975                                                == IKOT_NAMED_ENTRY)) {
14976                                 named_entry =
14977                                         (vm_named_entry_t)port->ip_kobject;
14978                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14979                                         ip_unlock(port);
14980
14981                                         try_failed_count++;
14982                                         mutex_pause(try_failed_count);
14983                                         continue;
14984                                 }
14985                                 named_entry->ref_count++;
14986                                 lck_mtx_unlock(&(named_entry)->Lock);
14987                                 ip_unlock(port);
14988                                 if ((named_entry->is_sub_map) &&
14989                                     (named_entry->protection
14990                                      & VM_PROT_WRITE)) {
14991                                         map = named_entry->backing.map;
14992                                 } else {
14993                                         mach_destroy_memory_entry(port);
14994                                         return VM_MAP_NULL;
14995                                 }
14996                                 vm_map_reference_swap(map);
14997                                 mach_destroy_memory_entry(port);
14998                                 break;
14999                         }
15000                         else
15001                                 return VM_MAP_NULL;
15002                 }
15003         }
15004         else
15005                 map = convert_port_to_map(port);
15006
15007         return map;
15008 }
15009
15010 /*
15011  *      Routine:        convert_port_entry_to_object
15012  *      Purpose:
15013  *              Convert from a port specifying a named entry to an
15014  *              object. Doesn't consume the port ref; produces a map ref,
15015  *              which may be null.
15016  *      Conditions:
15017  *              Nothing locked.
15018  */
15019
15020
15021 vm_object_t
15022 convert_port_entry_to_object(
15023         ipc_port_t      port)
15024 {
15025         vm_object_t             object = VM_OBJECT_NULL;
15026         vm_named_entry_t        named_entry;
15027         uint32_t                try_failed_count = 0;
15028
15029         if (IP_VALID(port) &&
15030             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15031         try_again:
15032                 ip_lock(port);
15033                 if (ip_active(port) &&
15034                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15035                         named_entry = (vm_named_entry_t)port->ip_kobject;
15036                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15037                                 ip_unlock(port);
15038                                 try_failed_count++;
15039                                 mutex_pause(try_failed_count);
15040                                 goto try_again;
15041                         }
15042                         named_entry->ref_count++;
15043                         lck_mtx_unlock(&(named_entry)->Lock);
15044                         ip_unlock(port);
15045                         if (!(named_entry->is_sub_map) &&
15046                             !(named_entry->is_pager) &&
15047                             !(named_entry->is_copy) &&
15048                             (named_entry->protection & VM_PROT_WRITE)) {
15049                                 object = named_entry->backing.object;
15050                                 vm_object_reference(object);
15051                         }
15052                         mach_destroy_memory_entry(port);
15053                 }
15054         }
15055
15056         return object;
15057 }
15058
15059 /*
15060  * Export routines to other components for the things we access locally through
15061  * macros.
15062  */
15063 #undef current_map
15064 vm_map_t
15065 current_map(void)
15066 {
15067         return (current_map_fast());
15068 }
15069
15070 /*
15071  *      vm_map_reference:
15072  *
15073  *      Most code internal to the osfmk will go through a
15074  *      macro defining this.  This is always here for the
15075  *      use of other kernel components.
15076  */
15077 #undef vm_map_reference
15078 void
15079 vm_map_reference(
15080         register vm_map_t       map)
15081 {
15082         if (map == VM_MAP_NULL)
15083                 return;
15084
15085         lck_mtx_lock(&map->s_lock);
15086 #if     TASK_SWAPPER
15087         assert(map->res_count > 0);
15088         assert(map->ref_count >= map->res_count);
15089         map->res_count++;
15090 #endif
15091         map->ref_count++;
15092         lck_mtx_unlock(&map->s_lock);
15093 }
15094
15095 /*
15096  *      vm_map_deallocate:
15097  *
15098  *      Removes a reference from the specified map,
15099  *      destroying it if no references remain.
15100  *      The map should not be locked.
15101  */
15102 void
15103 vm_map_deallocate(
15104         register vm_map_t       map)
15105 {
15106         unsigned int            ref;
15107
15108         if (map == VM_MAP_NULL)
15109                 return;
15110
15111         lck_mtx_lock(&map->s_lock);
15112         ref = --map->ref_count;
15113         if (ref > 0) {
15114                 vm_map_res_deallocate(map);
15115                 lck_mtx_unlock(&map->s_lock);
15116                 return;
15117         }
15118         assert(map->ref_count == 0);
15119         lck_mtx_unlock(&map->s_lock);
15120
15121 #if     TASK_SWAPPER
15122         /*
15123          * The map residence count isn't decremented here because
15124          * the vm_map_delete below will traverse the entire map,
15125          * deleting entries, and the residence counts on objects
15126          * and sharing maps will go away then.
15127          */
15128 #endif
15129
15130         vm_map_destroy(map, VM_MAP_NO_FLAGS);
15131 }
15132
15133
15134 void
15135 vm_map_disable_NX(vm_map_t map)
15136 {
15137         if (map == NULL)
15138                 return;
15139         if (map->pmap == NULL)
15140                 return;
15141
15142         pmap_disable_NX(map->pmap);
15143 }
15144
15145 void
15146 vm_map_disallow_data_exec(vm_map_t map)
15147 {
15148     if (map == NULL)
15149         return;
15150
15151     map->map_disallow_data_exec = TRUE;
15152 }
15153
15154 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15155  * more descriptive.
15156  */
15157 void
15158 vm_map_set_32bit(vm_map_t map)
15159 {
15160         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15161 }
15162
15163
15164 void
15165 vm_map_set_64bit(vm_map_t map)
15166 {
15167         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15168 }
15169
15170 vm_map_offset_t
15171 vm_compute_max_offset(boolean_t is64)
15172 {
15173         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15174 }
15175
15176 uint64_t
15177 vm_map_get_max_aslr_slide_pages(vm_map_t map)
15178 {
15179         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15180 }
15181
15182 boolean_t
15183 vm_map_is_64bit(
15184                 vm_map_t map)
15185 {
15186         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15187 }
15188
15189 boolean_t
15190 vm_map_has_hard_pagezero(
15191                 vm_map_t        map,
15192                 vm_map_offset_t pagezero_size)
15193 {
15194         /*
15195          * XXX FBDP
15196          * We should lock the VM map (for read) here but we can get away
15197          * with it for now because there can't really be any race condition:
15198          * the VM map's min_offset is changed only when the VM map is created
15199          * and when the zero page is established (when the binary gets loaded),
15200          * and this routine gets called only when the task terminates and the
15201          * VM map is being torn down, and when a new map is created via
15202          * load_machfile()/execve().
15203          */
15204         return (map->min_offset >= pagezero_size);
15205 }
15206
15207 /*
15208  * Raise a VM map's maximun offset.
15209  */
15210 kern_return_t
15211 vm_map_raise_max_offset(
15212         vm_map_t        map,
15213         vm_map_offset_t new_max_offset)
15214 {
15215         kern_return_t   ret;
15216
15217         vm_map_lock(map);
15218         ret = KERN_INVALID_ADDRESS;
15219
15220         if (new_max_offset >= map->max_offset) {
15221                 if (!vm_map_is_64bit(map)) {
15222                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15223                                 map->max_offset = new_max_offset;
15224                                 ret = KERN_SUCCESS;
15225                         }
15226                 } else {
15227                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15228                                 map->max_offset = new_max_offset;
15229                                 ret = KERN_SUCCESS;
15230                         }
15231                 }
15232         }
15233
15234         vm_map_unlock(map);
15235         return ret;
15236 }
15237
15238
15239 /*
15240  * Raise a VM map's minimum offset.
15241  * To strictly enforce "page zero" reservation.
15242  */
15243 kern_return_t
15244 vm_map_raise_min_offset(
15245         vm_map_t        map,
15246         vm_map_offset_t new_min_offset)
15247 {
15248         vm_map_entry_t  first_entry;
15249
15250         new_min_offset = vm_map_round_page(new_min_offset,
15251                                            VM_MAP_PAGE_MASK(map));
15252
15253         vm_map_lock(map);
15254
15255         if (new_min_offset < map->min_offset) {
15256                 /*
15257                  * Can't move min_offset backwards, as that would expose
15258                  * a part of the address space that was previously, and for
15259                  * possibly good reasons, inaccessible.
15260                  */
15261                 vm_map_unlock(map);
15262                 return KERN_INVALID_ADDRESS;
15263         }
15264         if (new_min_offset >= map->max_offset) {
15265                 /* can't go beyond the end of the address space */
15266                 vm_map_unlock(map);
15267                 return KERN_INVALID_ADDRESS;
15268         }
15269
15270         first_entry = vm_map_first_entry(map);
15271         if (first_entry != vm_map_to_entry(map) &&
15272             first_entry->vme_start < new_min_offset) {
15273                 /*
15274                  * Some memory was already allocated below the new
15275                  * minimun offset.  It's too late to change it now...
15276                  */
15277                 vm_map_unlock(map);
15278                 return KERN_NO_SPACE;
15279         }
15280
15281         map->min_offset = new_min_offset;
15282
15283         assert(map->holes_list);
15284         map->holes_list->start = new_min_offset;
15285         assert(new_min_offset < map->holes_list->end);
15286
15287         vm_map_unlock(map);
15288
15289         return KERN_SUCCESS;
15290 }
15291
15292 /*
15293  * Set the limit on the maximum amount of user wired memory allowed for this map.
15294  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15295  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
15296  * don't have to reach over to the BSD data structures.
15297  */
15298
15299 void
15300 vm_map_set_user_wire_limit(vm_map_t     map,
15301                            vm_size_t    limit)
15302 {
15303         map->user_wire_limit = limit;
15304 }
15305
15306
15307 void vm_map_switch_protect(vm_map_t     map,
15308                            boolean_t    val)
15309 {
15310         vm_map_lock(map);
15311         map->switch_protect=val;
15312         vm_map_unlock(map);
15313 }
15314
15315 /*
15316  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15317  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15318  * bump both counters.
15319  */
15320 void
15321 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15322 {
15323         pmap_t pmap = vm_map_pmap(map);
15324
15325         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15326         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15327 }
15328
15329 void
15330 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15331 {
15332         pmap_t pmap = vm_map_pmap(map);
15333
15334         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15335         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15336 }
15337
15338 /* Add (generate) code signature for memory range */
15339 #if CONFIG_DYNAMIC_CODE_SIGNING
15340 kern_return_t vm_map_sign(vm_map_t map,
15341                  vm_map_offset_t start,
15342                  vm_map_offset_t end)
15343 {
15344         vm_map_entry_t entry;
15345         vm_page_t m;
15346         vm_object_t object;
15347
15348         /*
15349          * Vet all the input parameters and current type and state of the
15350          * underlaying object.  Return with an error if anything is amiss.
15351          */
15352         if (map == VM_MAP_NULL)
15353                 return(KERN_INVALID_ARGUMENT);
15354
15355         vm_map_lock_read(map);
15356
15357         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15358                 /*
15359                  * Must pass a valid non-submap address.
15360                  */
15361                 vm_map_unlock_read(map);
15362                 return(KERN_INVALID_ADDRESS);
15363         }
15364
15365         if((entry->vme_start > start) || (entry->vme_end < end)) {
15366                 /*
15367                  * Map entry doesn't cover the requested range. Not handling
15368                  * this situation currently.
15369                  */
15370                 vm_map_unlock_read(map);
15371                 return(KERN_INVALID_ARGUMENT);
15372         }
15373
15374         object = VME_OBJECT(entry);
15375         if (object == VM_OBJECT_NULL) {
15376                 /*
15377                  * Object must already be present or we can't sign.
15378                  */
15379                 vm_map_unlock_read(map);
15380                 return KERN_INVALID_ARGUMENT;
15381         }
15382
15383         vm_object_lock(object);
15384         vm_map_unlock_read(map);
15385
15386         while(start < end) {
15387                 uint32_t refmod;
15388
15389                 m = vm_page_lookup(object,
15390                                    start - entry->vme_start + VME_OFFSET(entry));
15391                 if (m==VM_PAGE_NULL) {
15392                         /* shoud we try to fault a page here? we can probably
15393                          * demand it exists and is locked for this request */
15394                         vm_object_unlock(object);
15395                         return KERN_FAILURE;
15396                 }
15397                 /* deal with special page status */
15398                 if (m->busy ||
15399                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15400                         vm_object_unlock(object);
15401                         return KERN_FAILURE;
15402                 }
15403
15404                 /* Page is OK... now "validate" it */
15405                 /* This is the place where we'll call out to create a code
15406                  * directory, later */
15407                 m->cs_validated = TRUE;
15408
15409                 /* The page is now "clean" for codesigning purposes. That means
15410                  * we don't consider it as modified (wpmapped) anymore. But
15411                  * we'll disconnect the page so we note any future modification
15412                  * attempts. */
15413                 m->wpmapped = FALSE;
15414                 refmod = pmap_disconnect(m->phys_page);
15415
15416                 /* Pull the dirty status from the pmap, since we cleared the
15417                  * wpmapped bit */
15418                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
15419                         SET_PAGE_DIRTY(m, FALSE);
15420                 }
15421
15422                 /* On to the next page */
15423                 start += PAGE_SIZE;
15424         }
15425         vm_object_unlock(object);
15426
15427         return KERN_SUCCESS;
15428 }
15429 #endif
15430
15431 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15432 {
15433         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
15434         vm_map_entry_t next_entry;
15435         kern_return_t   kr = KERN_SUCCESS;
15436         vm_map_t        zap_map;
15437
15438         vm_map_lock(map);
15439
15440         /*
15441          * We use a "zap_map" to avoid having to unlock
15442          * the "map" in vm_map_delete().
15443          */
15444         zap_map = vm_map_create(PMAP_NULL,
15445                                 map->min_offset,
15446                                 map->max_offset,
15447                                 map->hdr.entries_pageable);
15448
15449         if (zap_map == VM_MAP_NULL) {
15450                 return KERN_RESOURCE_SHORTAGE;
15451         }
15452
15453         vm_map_set_page_shift(zap_map,
15454                               VM_MAP_PAGE_SHIFT(map));
15455         vm_map_disable_hole_optimization(zap_map);
15456
15457         for (entry = vm_map_first_entry(map);
15458              entry != vm_map_to_entry(map);
15459              entry = next_entry) {
15460                 next_entry = entry->vme_next;
15461
15462                 if (VME_OBJECT(entry) &&
15463                     !entry->is_sub_map &&
15464                     (VME_OBJECT(entry)->internal == TRUE) &&
15465                     (VME_OBJECT(entry)->ref_count == 1)) {
15466
15467                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15468                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
15469
15470                         (void)vm_map_delete(map,
15471                                             entry->vme_start,
15472                                             entry->vme_end,
15473                                             VM_MAP_REMOVE_SAVE_ENTRIES,
15474                                             zap_map);
15475                 }
15476         }
15477
15478         vm_map_unlock(map);
15479
15480         /*
15481          * Get rid of the "zap_maps" and all the map entries that
15482          * they may still contain.
15483          */
15484         if (zap_map != VM_MAP_NULL) {
15485                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15486                 zap_map = VM_MAP_NULL;
15487         }
15488
15489         return kr;
15490 }
15491
15492 #if CONFIG_FREEZE
15493
15494 kern_return_t vm_map_freeze_walk(
15495                 vm_map_t map,
15496                 unsigned int *purgeable_count,
15497                 unsigned int *wired_count,
15498                 unsigned int *clean_count,
15499                 unsigned int *dirty_count,
15500                 unsigned int  dirty_budget,
15501                 boolean_t *has_shared)
15502 {
15503         vm_map_entry_t entry;
15504
15505         vm_map_lock_read(map);
15506
15507         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15508         *has_shared = FALSE;
15509
15510         for (entry = vm_map_first_entry(map);
15511              entry != vm_map_to_entry(map);
15512              entry = entry->vme_next) {
15513                 unsigned int purgeable, clean, dirty, wired;
15514                 boolean_t shared;
15515
15516                 if ((VME_OBJECT(entry) == 0) ||
15517                     (entry->is_sub_map) ||
15518                     (VME_OBJECT(entry)->phys_contiguous)) {
15519                         continue;
15520                 }
15521
15522                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL);
15523
15524                 *purgeable_count += purgeable;
15525                 *wired_count += wired;
15526                 *clean_count += clean;
15527                 *dirty_count += dirty;
15528
15529                 if (shared) {
15530                         *has_shared = TRUE;
15531                 }
15532
15533                 /* Adjust pageout budget and finish up if reached */
15534                 if (dirty_budget) {
15535                         dirty_budget -= dirty;
15536                         if (dirty_budget == 0) {
15537                                 break;
15538                         }
15539                 }
15540         }
15541
15542         vm_map_unlock_read(map);
15543
15544         return KERN_SUCCESS;
15545 }
15546
15547 int c_freezer_swapout_count;
15548 int c_freezer_compression_count = 0;
15549 AbsoluteTime c_freezer_last_yield_ts = 0;
15550
15551 kern_return_t vm_map_freeze(
15552                 vm_map_t map,
15553                 unsigned int *purgeable_count,
15554                 unsigned int *wired_count,
15555                 unsigned int *clean_count,
15556                 unsigned int *dirty_count,
15557                 unsigned int dirty_budget,
15558                 boolean_t *has_shared)
15559 {
15560         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
15561         kern_return_t   kr = KERN_SUCCESS;
15562         boolean_t       default_freezer_active = TRUE;
15563
15564         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15565         *has_shared = FALSE;
15566
15567         /*
15568          * We need the exclusive lock here so that we can
15569          * block any page faults or lookups while we are
15570          * in the middle of freezing this vm map.
15571          */
15572         vm_map_lock(map);
15573
15574         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15575                 default_freezer_active = FALSE;
15576
15577                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15578                         kr = KERN_NO_SPACE;
15579                         goto done;
15580                 }
15581         }
15582         assert(default_freezer_active == FALSE);
15583
15584         if (default_freezer_active) {
15585                 if (map->default_freezer_handle == NULL) {
15586                         map->default_freezer_handle = default_freezer_handle_allocate();
15587                 }
15588
15589                 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
15590                         /*
15591                          * Can happen if default_freezer_handle passed in is NULL
15592                          * Or, a table has already been allocated and associated
15593                          * with this handle, i.e. the map is already frozen.
15594                          */
15595                         goto done;
15596                 }
15597         }
15598         c_freezer_compression_count = 0;
15599         clock_get_uptime(&c_freezer_last_yield_ts);
15600
15601         for (entry2 = vm_map_first_entry(map);
15602              entry2 != vm_map_to_entry(map);
15603              entry2 = entry2->vme_next) {
15604
15605                 vm_object_t     src_object = VME_OBJECT(entry2);
15606
15607                 if (VME_OBJECT(entry2) &&
15608                     !entry2->is_sub_map &&
15609                     !VME_OBJECT(entry2)->phys_contiguous) {
15610                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
15611                         if (default_freezer_active) {
15612                                 unsigned int purgeable, clean, dirty, wired;
15613                                 boolean_t shared;
15614
15615                                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
15616                                                                 src_object, map->default_freezer_handle);
15617
15618                                 *purgeable_count += purgeable;
15619                                 *wired_count += wired;
15620                                 *clean_count += clean;
15621                                 *dirty_count += dirty;
15622
15623                                 /* Adjust pageout budget and finish up if reached */
15624                                 if (dirty_budget) {
15625                                         dirty_budget -= dirty;
15626                                         if (dirty_budget == 0) {
15627                                                 break;
15628                                         }
15629                                 }
15630
15631                                 if (shared) {
15632                                         *has_shared = TRUE;
15633                                 }
15634                         } else {
15635                                 if (VME_OBJECT(entry2)->internal == TRUE) {
15636
15637                                         if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15638                                                 /*
15639                                                  * Pages belonging to this object could be swapped to disk.
15640                                                  * Make sure it's not a shared object because we could end
15641                                                  * up just bringing it back in again.
15642                                                  */
15643                                                 if (VME_OBJECT(entry2)->ref_count > 1) {
15644                                                         continue;
15645                                                 }
15646                                         }
15647                                         vm_object_compressed_freezer_pageout(VME_OBJECT(entry2));
15648                                 }
15649
15650                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15651                                         kr = KERN_NO_SPACE;
15652                                         break;
15653                                 }
15654                         }
15655                 }
15656         }
15657
15658         if (default_freezer_active) {
15659                 /* Finally, throw out the pages to swap */
15660                 default_freezer_pageout(map->default_freezer_handle);
15661         }
15662
15663 done:
15664         vm_map_unlock(map);
15665
15666         if (!default_freezer_active) {
15667                 vm_object_compressed_freezer_done();
15668         }
15669         if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15670                 /*
15671                  * reset the counter tracking the # of swapped c_segs
15672                  * because we are now done with this freeze session and task.
15673                  */
15674                 c_freezer_swapout_count = 0;
15675         }
15676         return kr;
15677 }
15678
15679 kern_return_t
15680 vm_map_thaw(
15681         vm_map_t map)
15682 {
15683         kern_return_t kr = KERN_SUCCESS;
15684
15685         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15686                 /*
15687                  * We will on-demand thaw in the presence of the compressed pager.
15688                  */
15689                 return kr;
15690         }
15691
15692         vm_map_lock(map);
15693
15694         if (map->default_freezer_handle == NULL) {
15695                 /*
15696                  * This map is not in a frozen state.
15697                  */
15698                 kr = KERN_FAILURE;
15699                 goto out;
15700         }
15701
15702         kr = default_freezer_unpack(map->default_freezer_handle);
15703 out:
15704         vm_map_unlock(map);
15705
15706         return kr;
15707 }
15708 #endif
15709
15710 /*
15711  * vm_map_entry_should_cow_for_true_share:
15712  *
15713  * Determines if the map entry should be clipped and setup for copy-on-write
15714  * to avoid applying "true_share" to a large VM object when only a subset is
15715  * targeted.
15716  *
15717  * For now, we target only the map entries created for the Objective C
15718  * Garbage Collector, which initially have the following properties:
15719  *      - alias == VM_MEMORY_MALLOC
15720  *      - wired_count == 0
15721  *      - !needs_copy
15722  * and a VM object with:
15723  *      - internal
15724  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
15725  *      - !true_share
15726  *      - vo_size == ANON_CHUNK_SIZE
15727  *
15728  * Only non-kernel map entries.
15729  */
15730 boolean_t
15731 vm_map_entry_should_cow_for_true_share(
15732         vm_map_entry_t  entry)
15733 {
15734         vm_object_t     object;
15735
15736         if (entry->is_sub_map) {
15737                 /* entry does not point at a VM object */
15738                 return FALSE;
15739         }
15740
15741         if (entry->needs_copy) {
15742                 /* already set for copy_on_write: done! */
15743                 return FALSE;
15744         }
15745
15746         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
15747             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
15748                 /* not a malloc heap or Obj-C Garbage Collector heap */
15749                 return FALSE;
15750         }
15751
15752         if (entry->wired_count) {
15753                 /* wired: can't change the map entry... */
15754                 vm_counters.should_cow_but_wired++;
15755                 return FALSE;
15756         }
15757
15758         object = VME_OBJECT(entry);
15759
15760         if (object == VM_OBJECT_NULL) {
15761                 /* no object yet... */
15762                 return FALSE;
15763         }
15764
15765         if (!object->internal) {
15766                 /* not an internal object */
15767                 return FALSE;
15768         }
15769
15770         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
15771                 /* not the default copy strategy */
15772                 return FALSE;
15773         }
15774
15775         if (object->true_share) {
15776                 /* already true_share: too late to avoid it */
15777                 return FALSE;
15778         }
15779
15780         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
15781             object->vo_size != ANON_CHUNK_SIZE) {
15782                 /* ... not an object created for the ObjC Garbage Collector */
15783                 return FALSE;
15784         }
15785
15786         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
15787             object->vo_size != 2048 * 4096) {
15788                 /* ... not a "MALLOC_SMALL" heap */
15789                 return FALSE;
15790         }
15791
15792         /*
15793          * All the criteria match: we have a large object being targeted for "true_share".
15794          * To limit the adverse side-effects linked with "true_share", tell the caller to
15795          * try and avoid setting up the entire object for "true_share" by clipping the
15796          * targeted range and setting it up for copy-on-write.
15797          */
15798         return TRUE;
15799 }
15800
15801 vm_map_offset_t
15802 vm_map_round_page_mask(
15803         vm_map_offset_t offset,
15804         vm_map_offset_t mask)
15805 {
15806         return VM_MAP_ROUND_PAGE(offset, mask);
15807 }
15808
15809 vm_map_offset_t
15810 vm_map_trunc_page_mask(
15811         vm_map_offset_t offset,
15812         vm_map_offset_t mask)
15813 {
15814         return VM_MAP_TRUNC_PAGE(offset, mask);
15815 }
15816
15817 boolean_t
15818 vm_map_page_aligned(
15819         vm_map_offset_t offset,
15820         vm_map_offset_t mask)
15821 {
15822         return ((offset) & mask) == 0;
15823 }
15824
15825 int
15826 vm_map_page_shift(
15827         vm_map_t map)
15828 {
15829         return VM_MAP_PAGE_SHIFT(map);
15830 }
15831
15832 int
15833 vm_map_page_size(
15834         vm_map_t map)
15835 {
15836         return VM_MAP_PAGE_SIZE(map);
15837 }
15838
15839 vm_map_offset_t
15840 vm_map_page_mask(
15841         vm_map_t map)
15842 {
15843         return VM_MAP_PAGE_MASK(map);
15844 }
15845
15846 kern_return_t
15847 vm_map_set_page_shift(
15848         vm_map_t        map,
15849         int             pageshift)
15850 {
15851         if (map->hdr.nentries != 0) {
15852                 /* too late to change page size */
15853                 return KERN_FAILURE;
15854         }
15855
15856         map->hdr.page_shift = pageshift;
15857
15858         return KERN_SUCCESS;
15859 }
15860
15861 int
15862 vm_map_purge(
15863         vm_map_t        map)
15864 {
15865         int             num_object_purged;
15866         vm_map_entry_t  entry;
15867         vm_map_offset_t next_address;
15868         vm_object_t     object;
15869         int             state;
15870         kern_return_t   kr;
15871
15872         num_object_purged = 0;
15873
15874         vm_map_lock_read(map);
15875         entry = vm_map_first_entry(map);
15876         while (entry != vm_map_to_entry(map)) {
15877                 if (entry->is_sub_map) {
15878                         goto next;
15879                 }
15880                 if (! (entry->protection & VM_PROT_WRITE)) {
15881                         goto next;
15882                 }
15883                 object = VME_OBJECT(entry);
15884                 if (object == VM_OBJECT_NULL) {
15885                         goto next;
15886                 }
15887                 if (object->purgable != VM_PURGABLE_VOLATILE) {
15888                         goto next;
15889                 }
15890
15891                 vm_object_lock(object);
15892 #if 00
15893                 if (VME_OFFSET(entry) != 0 ||
15894                     (entry->vme_end - entry->vme_start) != object->vo_size) {
15895                         vm_object_unlock(object);
15896                         goto next;
15897                 }
15898 #endif
15899                 next_address = entry->vme_end;
15900                 vm_map_unlock_read(map);
15901                 state = VM_PURGABLE_EMPTY;
15902                 kr = vm_object_purgable_control(object,
15903                                                 VM_PURGABLE_SET_STATE,
15904                                                 &state);
15905                 if (kr == KERN_SUCCESS) {
15906                         num_object_purged++;
15907                 }
15908                 vm_object_unlock(object);
15909
15910                 vm_map_lock_read(map);
15911                 if (vm_map_lookup_entry(map, next_address, &entry)) {
15912                         continue;
15913                 }
15914         next:
15915                 entry = entry->vme_next;
15916         }
15917         vm_map_unlock_read(map);
15918
15919         return num_object_purged;
15920 }
15921
15922 kern_return_t
15923 vm_map_query_volatile(
15924         vm_map_t        map,
15925         mach_vm_size_t  *volatile_virtual_size_p,
15926         mach_vm_size_t  *volatile_resident_size_p,
15927         mach_vm_size_t  *volatile_compressed_size_p,
15928         mach_vm_size_t  *volatile_pmap_size_p,
15929         mach_vm_size_t  *volatile_compressed_pmap_size_p)
15930 {
15931         mach_vm_size_t  volatile_virtual_size;
15932         mach_vm_size_t  volatile_resident_count;
15933         mach_vm_size_t  volatile_compressed_count;
15934         mach_vm_size_t  volatile_pmap_count;
15935         mach_vm_size_t  volatile_compressed_pmap_count;
15936         mach_vm_size_t  resident_count;
15937         vm_map_entry_t  entry;
15938         vm_object_t     object;
15939
15940         /* map should be locked by caller */
15941
15942         volatile_virtual_size = 0;
15943         volatile_resident_count = 0;
15944         volatile_compressed_count = 0;
15945         volatile_pmap_count = 0;
15946         volatile_compressed_pmap_count = 0;
15947
15948         for (entry = vm_map_first_entry(map);
15949              entry != vm_map_to_entry(map);
15950              entry = entry->vme_next) {
15951                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
15952
15953                 if (entry->is_sub_map) {
15954                         continue;
15955                 }
15956                 if (! (entry->protection & VM_PROT_WRITE)) {
15957                         continue;
15958                 }
15959                 object = VME_OBJECT(entry);
15960                 if (object == VM_OBJECT_NULL) {
15961                         continue;
15962                 }
15963                 if (object->purgable != VM_PURGABLE_VOLATILE &&
15964                     object->purgable != VM_PURGABLE_EMPTY) {
15965                         continue;
15966                 }
15967                 if (VME_OFFSET(entry)) {
15968                         /*
15969                          * If the map entry has been split and the object now
15970                          * appears several times in the VM map, we don't want
15971                          * to count the object's resident_page_count more than
15972                          * once.  We count it only for the first one, starting
15973                          * at offset 0 and ignore the other VM map entries.
15974                          */
15975                         continue;
15976                 }
15977                 resident_count = object->resident_page_count;
15978                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
15979                         resident_count = 0;
15980                 } else {
15981                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
15982                 }
15983
15984                 volatile_virtual_size += entry->vme_end - entry->vme_start;
15985                 volatile_resident_count += resident_count;
15986                 if (object->pager) {
15987                         volatile_compressed_count +=
15988                                 vm_compressor_pager_get_count(object->pager);
15989                 }
15990                 pmap_compressed_bytes = 0;
15991                 pmap_resident_bytes =
15992                         pmap_query_resident(map->pmap,
15993                                             entry->vme_start,
15994                                             entry->vme_end,
15995                                             &pmap_compressed_bytes);
15996                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
15997                 volatile_compressed_pmap_count += (pmap_compressed_bytes
15998                                                    / PAGE_SIZE);
15999         }
16000
16001         /* map is still locked on return */
16002
16003         *volatile_virtual_size_p = volatile_virtual_size;
16004         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
16005         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
16006         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
16007         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
16008
16009         return KERN_SUCCESS;
16010 }
16011
16012 void
16013 vm_map_sizes(vm_map_t map,
16014                 vm_map_size_t * psize,
16015                 vm_map_size_t * pfree,
16016                 vm_map_size_t * plargest_free)
16017 {
16018     vm_map_entry_t  entry;
16019     vm_map_offset_t prev;
16020     vm_map_size_t   free, total_free, largest_free;
16021     boolean_t       end;
16022
16023     total_free = largest_free = 0;
16024
16025     vm_map_lock_read(map);
16026     if (psize) *psize = map->max_offset - map->min_offset;
16027
16028     prev = map->min_offset;
16029     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16030     {
16031         end = (entry == vm_map_to_entry(map));
16032
16033         if (end) free = entry->vme_end   - prev;
16034         else     free = entry->vme_start - prev;
16035
16036         total_free += free;
16037         if (free > largest_free) largest_free = free;
16038
16039         if (end) break;
16040         prev = entry->vme_end;
16041     }
16042     vm_map_unlock_read(map);
16043     if (pfree)         *pfree = total_free;
16044     if (plargest_free) *plargest_free = largest_free;
16045 }
16046
16047 #if VM_SCAN_FOR_SHADOW_CHAIN
16048 int vm_map_shadow_max(vm_map_t map);
16049 int vm_map_shadow_max(
16050         vm_map_t map)
16051 {
16052         int             shadows, shadows_max;
16053         vm_map_entry_t  entry;
16054         vm_object_t     object, next_object;
16055
16056         if (map == NULL)
16057                 return 0;
16058
16059         shadows_max = 0;
16060
16061         vm_map_lock_read(map);
16062
16063         for (entry = vm_map_first_entry(map);
16064              entry != vm_map_to_entry(map);
16065              entry = entry->vme_next) {
16066                 if (entry->is_sub_map) {
16067                         continue;
16068                 }
16069                 object = VME_OBJECT(entry);
16070                 if (object == NULL) {
16071                         continue;
16072                 }
16073                 vm_object_lock_shared(object);
16074                 for (shadows = 0;
16075                      object->shadow != NULL;
16076                      shadows++, object = next_object) {
16077                         next_object = object->shadow;
16078                         vm_object_lock_shared(next_object);
16079                         vm_object_unlock(object);
16080                 }
16081                 vm_object_unlock(object);
16082                 if (shadows > shadows_max) {
16083                         shadows_max = shadows;
16084                 }
16085         }
16086
16087         vm_map_unlock_read(map);
16088
16089         return shadows_max;
16090 }
16091 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */