osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_compressor_pager.h>
  88 #include <vm/vm_init.h>
  89 #include <vm/vm_fault.h>
  90 #include <vm/vm_map.h>
  91 #include <vm/vm_object.h>
  92 #include <vm/vm_page.h>
  93 #include <vm/vm_pageout.h>
  94 #include <vm/vm_kern.h>
  95 #include <ipc/ipc_port.h>
  96 #include <kern/sched_prim.h>
  97 #include <kern/misc_protos.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_shared_region.h>
 107 #include <vm/vm_map_store.h>
 108
 109 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 110 /* Internal prototypes
 111  */
 112
 113 static void vm_map_simplify_range(
 114         vm_map_t        map,
 115         vm_map_offset_t start,
 116         vm_map_offset_t end);   /* forward */
 117
 118 static boolean_t        vm_map_range_check(
 119         vm_map_t        map,
 120         vm_map_offset_t start,
 121         vm_map_offset_t end,
 122         vm_map_entry_t  *entry);
 123
 124 static vm_map_entry_t   _vm_map_entry_create(
 125         struct vm_map_header    *map_header, boolean_t map_locked);
 126
 127 static void             _vm_map_entry_dispose(
 128         struct vm_map_header    *map_header,
 129         vm_map_entry_t          entry);
 130
 131 static void             vm_map_pmap_enter(
 132         vm_map_t                map,
 133         vm_map_offset_t         addr,
 134         vm_map_offset_t         end_addr,
 135         vm_object_t             object,
 136         vm_object_offset_t      offset,
 137         vm_prot_t               protection);
 138
 139 static void             _vm_map_clip_end(
 140         struct vm_map_header    *map_header,
 141         vm_map_entry_t          entry,
 142         vm_map_offset_t         end);
 143
 144 static void             _vm_map_clip_start(
 145         struct vm_map_header    *map_header,
 146         vm_map_entry_t          entry,
 147         vm_map_offset_t         start);
 148
 149 static void             vm_map_entry_delete(
 150         vm_map_t        map,
 151         vm_map_entry_t  entry);
 152
 153 static kern_return_t    vm_map_delete(
 154         vm_map_t        map,
 155         vm_map_offset_t start,
 156         vm_map_offset_t end,
 157         int             flags,
 158         vm_map_t        zap_map);
 159
 160 static kern_return_t    vm_map_copy_overwrite_unaligned(
 161         vm_map_t        dst_map,
 162         vm_map_entry_t  entry,
 163         vm_map_copy_t   copy,
 164         vm_map_address_t start,
 165         boolean_t       discard_on_success);
 166
 167 static kern_return_t    vm_map_copy_overwrite_aligned(
 168         vm_map_t        dst_map,
 169         vm_map_entry_t  tmp_entry,
 170         vm_map_copy_t   copy,
 171         vm_map_offset_t start,
 172         pmap_t          pmap);
 173
 174 static kern_return_t    vm_map_copyin_kernel_buffer(
 175         vm_map_t        src_map,
 176         vm_map_address_t src_addr,
 177         vm_map_size_t   len,
 178         boolean_t       src_destroy,
 179         vm_map_copy_t   *copy_result);  /* OUT */
 180
 181 static kern_return_t    vm_map_copyout_kernel_buffer(
 182         vm_map_t        map,
 183         vm_map_address_t *addr, /* IN/OUT */
 184         vm_map_copy_t   copy,
 185         boolean_t       overwrite,
 186         boolean_t       consume_on_success);
 187
 188 static void             vm_map_fork_share(
 189         vm_map_t        old_map,
 190         vm_map_entry_t  old_entry,
 191         vm_map_t        new_map);
 192
 193 static boolean_t        vm_map_fork_copy(
 194         vm_map_t        old_map,
 195         vm_map_entry_t  *old_entry_p,
 196         vm_map_t        new_map);
 197
 198 void            vm_map_region_top_walk(
 199         vm_map_entry_t             entry,
 200         vm_region_top_info_t       top);
 201
 202 void            vm_map_region_walk(
 203         vm_map_t                   map,
 204         vm_map_offset_t            va,
 205         vm_map_entry_t             entry,
 206         vm_object_offset_t         offset,
 207         vm_object_size_t           range,
 208         vm_region_extended_info_t  extended,
 209         boolean_t                  look_for_pages,
 210         mach_msg_type_number_t count);
 211
 212 static kern_return_t    vm_map_wire_nested(
 213         vm_map_t                   map,
 214         vm_map_offset_t            start,
 215         vm_map_offset_t            end,
 216         vm_prot_t                  access_type,
 217         boolean_t                  user_wire,
 218         pmap_t                     map_pmap,
 219         vm_map_offset_t            pmap_addr);
 220
 221 static kern_return_t    vm_map_unwire_nested(
 222         vm_map_t                   map,
 223         vm_map_offset_t            start,
 224         vm_map_offset_t            end,
 225         boolean_t                  user_wire,
 226         pmap_t                     map_pmap,
 227         vm_map_offset_t            pmap_addr);
 228
 229 static kern_return_t    vm_map_overwrite_submap_recurse(
 230         vm_map_t                   dst_map,
 231         vm_map_offset_t            dst_addr,
 232         vm_map_size_t              dst_size);
 233
 234 static kern_return_t    vm_map_copy_overwrite_nested(
 235         vm_map_t                   dst_map,
 236         vm_map_offset_t            dst_addr,
 237         vm_map_copy_t              copy,
 238         boolean_t                  interruptible,
 239         pmap_t                     pmap,
 240         boolean_t                  discard_on_success);
 241
 242 static kern_return_t    vm_map_remap_extract(
 243         vm_map_t                map,
 244         vm_map_offset_t         addr,
 245         vm_map_size_t           size,
 246         boolean_t               copy,
 247         struct vm_map_header    *map_header,
 248         vm_prot_t               *cur_protection,
 249         vm_prot_t               *max_protection,
 250         vm_inherit_t            inheritance,
 251         boolean_t               pageable);
 252
 253 static kern_return_t    vm_map_remap_range_allocate(
 254         vm_map_t                map,
 255         vm_map_address_t        *address,
 256         vm_map_size_t           size,
 257         vm_map_offset_t         mask,
 258         int                     flags,
 259         vm_map_entry_t          *map_entry);
 260
 261 static void             vm_map_region_look_for_page(
 262         vm_map_t                   map,
 263         vm_map_offset_t            va,
 264         vm_object_t                object,
 265         vm_object_offset_t         offset,
 266         int                        max_refcnt,
 267         int                        depth,
 268         vm_region_extended_info_t  extended,
 269         mach_msg_type_number_t count);
 270
 271 static int              vm_map_region_count_obj_refs(
 272         vm_map_entry_t             entry,
 273         vm_object_t                object);
 274
 275
 276 static kern_return_t    vm_map_willneed(
 277         vm_map_t        map,
 278         vm_map_offset_t start,
 279         vm_map_offset_t end);
 280
 281 static kern_return_t    vm_map_reuse_pages(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_reusable_pages(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 static kern_return_t    vm_map_can_reuse(
 292         vm_map_t        map,
 293         vm_map_offset_t start,
 294         vm_map_offset_t end);
 295
 296
 297 /*
 298  * Macros to copy a vm_map_entry. We must be careful to correctly
 299  * manage the wired page count. vm_map_entry_copy() creates a new
 300  * map entry to the same memory - the wired count in the new entry
 301  * must be set to zero. vm_map_entry_copy_full() creates a new
 302  * entry that is identical to the old entry.  This preserves the
 303  * wire count; it's used for map splitting and zone changing in
 304  * vm_map_copyout.
 305  */
 306
 307 #define vm_map_entry_copy(NEW,OLD)      \
 308 MACRO_BEGIN                             \
 309 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 310         *(NEW) = *(OLD);                \
 311         (NEW)->is_shared = FALSE;       \
 312         (NEW)->needs_wakeup = FALSE;    \
 313         (NEW)->in_transition = FALSE;   \
 314         (NEW)->wired_count = 0;         \
 315         (NEW)->user_wired_count = 0;    \
 316         (NEW)->permanent = FALSE;       \
 317         (NEW)->used_for_jit = FALSE;    \
 318         (NEW)->from_reserved_zone = _vmec_reserved;                     \
 319 MACRO_END
 320
 321 #define vm_map_entry_copy_full(NEW,OLD)                 \
 322 MACRO_BEGIN                                             \
 323 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 324 (*(NEW) = *(OLD));                                      \
 325 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 326 MACRO_END
 327
 328 /*
 329  *      Decide if we want to allow processes to execute from their data or stack areas.
 330  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 331  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 332  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 333  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 334  *      specific pmap files since the default behavior varies according to architecture.  The
 335  *      main reason it varies is because of the need to provide binary compatibility with old
 336  *      applications that were written before these restrictions came into being.  In the old
 337  *      days, an app could execute anything it could read, but this has slowly been tightened
 338  *      up over time.  The default behavior is:
 339  *
 340  *      32-bit PPC apps         may execute from both stack and data areas
 341  *      32-bit Intel apps       may exeucte from data areas but not stack
 342  *      64-bit PPC/Intel apps   may not execute from either data or stack
 343  *
 344  *      An application on any architecture may override these defaults by explicitly
 345  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 346  *      system call.  This code here just determines what happens when an app tries to
 347  *      execute from a page that lacks execute permission.
 348  *
 349  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 350  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 351  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 352  *      execution from data areas for a particular binary even if the arch normally permits it. As
 353  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 354  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 355  *      are not all NX-safe.
 356  */
 357
 358 extern int allow_data_exec, allow_stack_exec;
 359
 360 int
 361 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 362 {
 363         int current_abi;
 364
 365         /*
 366          * Determine if the app is running in 32 or 64 bit mode.
 367          */
 368
 369         if (vm_map_is_64bit(map))
 370                 current_abi = VM_ABI_64;
 371         else
 372                 current_abi = VM_ABI_32;
 373
 374         /*
 375          * Determine if we should allow the execution based on whether it's a
 376          * stack or data area and the current architecture.
 377          */
 378
 379         if (user_tag == VM_MEMORY_STACK)
 380                 return allow_stack_exec & current_abi;
 381
 382         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 383 }
 384
 385
 386 /*
 387  *      Virtual memory maps provide for the mapping, protection,
 388  *      and sharing of virtual memory objects.  In addition,
 389  *      this module provides for an efficient virtual copy of
 390  *      memory from one map to another.
 391  *
 392  *      Synchronization is required prior to most operations.
 393  *
 394  *      Maps consist of an ordered doubly-linked list of simple
 395  *      entries; a single hint is used to speed up lookups.
 396  *
 397  *      Sharing maps have been deleted from this version of Mach.
 398  *      All shared objects are now mapped directly into the respective
 399  *      maps.  This requires a change in the copy on write strategy;
 400  *      the asymmetric (delayed) strategy is used for shared temporary
 401  *      objects instead of the symmetric (shadow) strategy.  All maps
 402  *      are now "top level" maps (either task map, kernel map or submap
 403  *      of the kernel map).
 404  *
 405  *      Since portions of maps are specified by start/end addreses,
 406  *      which may not align with existing map entries, all
 407  *      routines merely "clip" entries to these start/end values.
 408  *      [That is, an entry is split into two, bordering at a
 409  *      start or end value.]  Note that these clippings may not
 410  *      always be necessary (as the two resulting entries are then
 411  *      not changed); however, the clipping is done for convenience.
 412  *      No attempt is currently made to "glue back together" two
 413  *      abutting entries.
 414  *
 415  *      The symmetric (shadow) copy strategy implements virtual copy
 416  *      by copying VM object references from one map to
 417  *      another, and then marking both regions as copy-on-write.
 418  *      It is important to note that only one writeable reference
 419  *      to a VM object region exists in any map when this strategy
 420  *      is used -- this means that shadow object creation can be
 421  *      delayed until a write operation occurs.  The symmetric (delayed)
 422  *      strategy allows multiple maps to have writeable references to
 423  *      the same region of a vm object, and hence cannot delay creating
 424  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 425  *      Copying of permanent objects is completely different; see
 426  *      vm_object_copy_strategically() in vm_object.c.
 427  */
 428
 429 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 430 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 431 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 432                                          * allocations */
 433 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 434
 435
 436 /*
 437  *      Placeholder object for submap operations.  This object is dropped
 438  *      into the range by a call to vm_map_find, and removed when
 439  *      vm_map_submap creates the submap.
 440  */
 441
 442 vm_object_t     vm_submap_object;
 443
 444 static void             *map_data;
 445 static vm_size_t        map_data_size;
 446 static void             *kentry_data;
 447 static vm_size_t        kentry_data_size;
 448
 449 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 450
 451 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 452 unsigned int not_in_kdp = 1;
 453
 454 unsigned int vm_map_set_cache_attr_count = 0;
 455
 456 kern_return_t
 457 vm_map_set_cache_attr(
 458         vm_map_t        map,
 459         vm_map_offset_t va)
 460 {
 461         vm_map_entry_t  map_entry;
 462         vm_object_t     object;
 463         kern_return_t   kr = KERN_SUCCESS;
 464
 465         vm_map_lock_read(map);
 466
 467         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 468             map_entry->is_sub_map) {
 469                 /*
 470                  * that memory is not properly mapped
 471                  */
 472                 kr = KERN_INVALID_ARGUMENT;
 473                 goto done;
 474         }
 475         object = map_entry->object.vm_object;
 476
 477         if (object == VM_OBJECT_NULL) {
 478                 /*
 479                  * there should be a VM object here at this point
 480                  */
 481                 kr = KERN_INVALID_ARGUMENT;
 482                 goto done;
 483         }
 484         vm_object_lock(object);
 485         object->set_cache_attr = TRUE;
 486         vm_object_unlock(object);
 487
 488         vm_map_set_cache_attr_count++;
 489 done:
 490         vm_map_unlock_read(map);
 491
 492         return kr;
 493 }
 494
 495
 496 #if CONFIG_CODE_DECRYPTION
 497 /*
 498  * vm_map_apple_protected:
 499  * This remaps the requested part of the object with an object backed by
 500  * the decrypting pager.
 501  * crypt_info contains entry points and session data for the crypt module.
 502  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 503  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 504  */
 505 kern_return_t
 506 vm_map_apple_protected(
 507         vm_map_t        map,
 508         vm_map_offset_t start,
 509         vm_map_offset_t end,
 510         struct pager_crypt_info *crypt_info)
 511 {
 512         boolean_t       map_locked;
 513         kern_return_t   kr;
 514         vm_map_entry_t  map_entry;
 515         memory_object_t protected_mem_obj;
 516         vm_object_t     protected_object;
 517         vm_map_offset_t map_addr;
 518
 519         vm_map_lock_read(map);
 520         map_locked = TRUE;
 521
 522         /* lookup the protected VM object */
 523         if (!vm_map_lookup_entry(map,
 524                                  start,
 525                                  &map_entry) ||
 526             map_entry->vme_end < end ||
 527             map_entry->is_sub_map) {
 528                 /* that memory is not properly mapped */
 529                 kr = KERN_INVALID_ARGUMENT;
 530                 goto done;
 531         }
 532         protected_object = map_entry->object.vm_object;
 533         if (protected_object == VM_OBJECT_NULL) {
 534                 /* there should be a VM object here at this point */
 535                 kr = KERN_INVALID_ARGUMENT;
 536                 goto done;
 537         }
 538
 539         /* make sure protected object stays alive while map is unlocked */
 540         vm_object_reference(protected_object);
 541
 542         vm_map_unlock_read(map);
 543         map_locked = FALSE;
 544
 545         /*
 546          * Lookup (and create if necessary) the protected memory object
 547          * matching that VM object.
 548          * If successful, this also grabs a reference on the memory object,
 549          * to guarantee that it doesn't go away before we get a chance to map
 550          * it.
 551          */
 552         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 553
 554         /* release extra ref on protected object */
 555         vm_object_deallocate(protected_object);
 556
 557         if (protected_mem_obj == NULL) {
 558                 kr = KERN_FAILURE;
 559                 goto done;
 560         }
 561
 562         /* map this memory object in place of the current one */
 563         map_addr = start;
 564         kr = vm_map_enter_mem_object(map,
 565                                      &map_addr,
 566                                      end - start,
 567                                      (mach_vm_offset_t) 0,
 568                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 569                                      (ipc_port_t) protected_mem_obj,
 570                                      (map_entry->offset +
 571                                       (start - map_entry->vme_start)),
 572                                      TRUE,
 573                                      map_entry->protection,
 574                                      map_entry->max_protection,
 575                                      map_entry->inheritance);
 576         assert(map_addr == start);
 577         /*
 578          * Release the reference obtained by apple_protect_pager_setup().
 579          * The mapping (if it succeeded) is now holding a reference on the
 580          * memory object.
 581          */
 582         memory_object_deallocate(protected_mem_obj);
 583
 584 done:
 585         if (map_locked) {
 586                 vm_map_unlock_read(map);
 587         }
 588         return kr;
 589 }
 590 #endif  /* CONFIG_CODE_DECRYPTION */
 591
 592
 593 lck_grp_t               vm_map_lck_grp;
 594 lck_grp_attr_t  vm_map_lck_grp_attr;
 595 lck_attr_t              vm_map_lck_attr;
 596
 597
 598 /*
 599  *      vm_map_init:
 600  *
 601  *      Initialize the vm_map module.  Must be called before
 602  *      any other vm_map routines.
 603  *
 604  *      Map and entry structures are allocated from zones -- we must
 605  *      initialize those zones.
 606  *
 607  *      There are three zones of interest:
 608  *
 609  *      vm_map_zone:            used to allocate maps.
 610  *      vm_map_entry_zone:      used to allocate map entries.
 611  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 612  *
 613  *      The kernel allocates map entries from a special zone that is initially
 614  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 615  *      the kernel to allocate more memory to a entry zone when it became
 616  *      empty since the very act of allocating memory implies the creation
 617  *      of a new entry.
 618  */
 619 void
 620 vm_map_init(
 621         void)
 622 {
 623         vm_size_t entry_zone_alloc_size;
 624         const char *mez_name = "VM map entries";
 625
 626         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 627                             PAGE_SIZE, "maps");
 628         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 629 #if     defined(__LP64__)
 630         entry_zone_alloc_size = PAGE_SIZE * 5;
 631 #else
 632         entry_zone_alloc_size = PAGE_SIZE * 6;
 633 #endif
 634         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 635                                   1024*1024, entry_zone_alloc_size,
 636                                   mez_name);
 637         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 638         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 639         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 640
 641         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 642                                    kentry_data_size * 64, kentry_data_size,
 643                                    "Reserved VM map entries");
 644         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 645
 646         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 647                                  16*1024, PAGE_SIZE, "VM map copies");
 648         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 649
 650         /*
 651          *      Cram the map and kentry zones with initial data.
 652          *      Set reserved_zone non-collectible to aid zone_gc().
 653          */
 654         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 655
 656         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 657         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 658         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 659         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 660         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 661         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 662         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 663
 664         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 665         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 666
 667         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 668         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 669         lck_attr_setdefault(&vm_map_lck_attr);
 670
 671 #if CONFIG_FREEZE
 672         default_freezer_init();
 673 #endif /* CONFIG_FREEZE */
 674 }
 675
 676 void
 677 vm_map_steal_memory(
 678         void)
 679 {
 680         uint32_t kentry_initial_pages;
 681
 682         map_data_size = round_page(10 * sizeof(struct _vm_map));
 683         map_data = pmap_steal_memory(map_data_size);
 684
 685         /*
 686          * kentry_initial_pages corresponds to the number of kernel map entries
 687          * required during bootstrap until the asynchronous replenishment
 688          * scheme is activated and/or entries are available from the general
 689          * map entry pool.
 690          */
 691 #if     defined(__LP64__)
 692         kentry_initial_pages = 10;
 693 #else
 694         kentry_initial_pages = 6;
 695 #endif
 696
 697 #if CONFIG_GZALLOC
 698         /* If using the guard allocator, reserve more memory for the kernel
 699          * reserved map entry pool.
 700         */
 701         if (gzalloc_enabled())
 702                 kentry_initial_pages *= 1024;
 703 #endif
 704
 705         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 706         kentry_data = pmap_steal_memory(kentry_data_size);
 707 }
 708
 709 void vm_kernel_reserved_entry_init(void) {
 710         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 711 }
 712
 713 /*
 714  *      vm_map_create:
 715  *
 716  *      Creates and returns a new empty VM map with
 717  *      the given physical map structure, and having
 718  *      the given lower and upper address bounds.
 719  */
 720 vm_map_t
 721 vm_map_create(
 722         pmap_t                  pmap,
 723         vm_map_offset_t min,
 724         vm_map_offset_t max,
 725         boolean_t               pageable)
 726 {
 727         static int              color_seed = 0;
 728         register vm_map_t       result;
 729
 730         result = (vm_map_t) zalloc(vm_map_zone);
 731         if (result == VM_MAP_NULL)
 732                 panic("vm_map_create");
 733
 734         vm_map_first_entry(result) = vm_map_to_entry(result);
 735         vm_map_last_entry(result)  = vm_map_to_entry(result);
 736         result->hdr.nentries = 0;
 737         result->hdr.entries_pageable = pageable;
 738
 739         vm_map_store_init( &(result->hdr) );
 740
 741         result->hdr.page_shift = PAGE_SHIFT;
 742
 743         result->size = 0;
 744         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 745         result->user_wire_size  = 0;
 746         result->ref_count = 1;
 747 #if     TASK_SWAPPER
 748         result->res_count = 1;
 749         result->sw_state = MAP_SW_IN;
 750 #endif  /* TASK_SWAPPER */
 751         result->pmap = pmap;
 752         result->min_offset = min;
 753         result->max_offset = max;
 754         result->wiring_required = FALSE;
 755         result->no_zero_fill = FALSE;
 756         result->mapped_in_other_pmaps = FALSE;
 757         result->wait_for_space = FALSE;
 758         result->switch_protect = FALSE;
 759         result->disable_vmentry_reuse = FALSE;
 760         result->map_disallow_data_exec = FALSE;
 761         result->highest_entry_end = 0;
 762         result->first_free = vm_map_to_entry(result);
 763         result->hint = vm_map_to_entry(result);
 764         result->color_rr = (color_seed++) & vm_color_mask;
 765         result->jit_entry_exists = FALSE;
 766 #if CONFIG_FREEZE
 767         result->default_freezer_handle = NULL;
 768 #endif
 769         vm_map_lock_init(result);
 770         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 771
 772         return(result);
 773 }
 774
 775 /*
 776  *      vm_map_entry_create:    [ internal use only ]
 777  *
 778  *      Allocates a VM map entry for insertion in the
 779  *      given map (or map copy).  No fields are filled.
 780  */
 781 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 782
 783 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 784         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 785 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 786
 787 static vm_map_entry_t
 788 _vm_map_entry_create(
 789         struct vm_map_header    *map_header, boolean_t __unused map_locked)
 790 {
 791         zone_t  zone;
 792         vm_map_entry_t  entry;
 793
 794         zone = vm_map_entry_zone;
 795
 796         assert(map_header->entries_pageable ? !map_locked : TRUE);
 797
 798         if (map_header->entries_pageable) {
 799                 entry = (vm_map_entry_t) zalloc(zone);
 800         }
 801         else {
 802                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
 803
 804                 if (entry == VM_MAP_ENTRY_NULL) {
 805                         zone = vm_map_entry_reserved_zone;
 806                         entry = (vm_map_entry_t) zalloc(zone);
 807                         OSAddAtomic(1, &reserved_zalloc_count);
 808                 } else
 809                         OSAddAtomic(1, &nonreserved_zalloc_count);
 810         }
 811
 812         if (entry == VM_MAP_ENTRY_NULL)
 813                 panic("vm_map_entry_create");
 814         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
 815
 816         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 817 #if     MAP_ENTRY_CREATION_DEBUG
 818         entry->vme_creation_maphdr = map_header;
 819         fastbacktrace(&entry->vme_creation_bt[0],
 820                       (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
 821 #endif
 822         return(entry);
 823 }
 824
 825 /*
 826  *      vm_map_entry_dispose:   [ internal use only ]
 827  *
 828  *      Inverse of vm_map_entry_create.
 829  *
 830  *      write map lock held so no need to
 831  *      do anything special to insure correctness
 832  *      of the stores
 833  */
 834 #define vm_map_entry_dispose(map, entry)                        \
 835         _vm_map_entry_dispose(&(map)->hdr, (entry))
 836
 837 #define vm_map_copy_entry_dispose(map, entry) \
 838         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 839
 840 static void
 841 _vm_map_entry_dispose(
 842         register struct vm_map_header   *map_header,
 843         register vm_map_entry_t         entry)
 844 {
 845         register zone_t         zone;
 846
 847         if (map_header->entries_pageable || !(entry->from_reserved_zone))
 848                 zone = vm_map_entry_zone;
 849         else
 850                 zone = vm_map_entry_reserved_zone;
 851
 852         if (!map_header->entries_pageable) {
 853                 if (zone == vm_map_entry_zone)
 854                         OSAddAtomic(-1, &nonreserved_zalloc_count);
 855                 else
 856                         OSAddAtomic(-1, &reserved_zalloc_count);
 857         }
 858
 859         zfree(zone, entry);
 860 }
 861
 862 #if MACH_ASSERT
 863 static boolean_t first_free_check = FALSE;
 864 boolean_t
 865 first_free_is_valid(
 866         vm_map_t        map)
 867 {
 868         if (!first_free_check)
 869                 return TRUE;
 870
 871         return( first_free_is_valid_store( map ));
 872 }
 873 #endif /* MACH_ASSERT */
 874
 875
 876 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 877         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 878
 879 #define vm_map_copy_entry_unlink(copy, entry)                           \
 880         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 881
 882 #if     MACH_ASSERT && TASK_SWAPPER
 883 /*
 884  *      vm_map_res_reference:
 885  *
 886  *      Adds another valid residence count to the given map.
 887  *
 888  *      Map is locked so this function can be called from
 889  *      vm_map_swapin.
 890  *
 891  */
 892 void vm_map_res_reference(register vm_map_t map)
 893 {
 894         /* assert map is locked */
 895         assert(map->res_count >= 0);
 896         assert(map->ref_count >= map->res_count);
 897         if (map->res_count == 0) {
 898                 lck_mtx_unlock(&map->s_lock);
 899                 vm_map_lock(map);
 900                 vm_map_swapin(map);
 901                 lck_mtx_lock(&map->s_lock);
 902                 ++map->res_count;
 903                 vm_map_unlock(map);
 904         } else
 905                 ++map->res_count;
 906 }
 907
 908 /*
 909  *      vm_map_reference_swap:
 910  *
 911  *      Adds valid reference and residence counts to the given map.
 912  *
 913  *      The map may not be in memory (i.e. zero residence count).
 914  *
 915  */
 916 void vm_map_reference_swap(register vm_map_t map)
 917 {
 918         assert(map != VM_MAP_NULL);
 919         lck_mtx_lock(&map->s_lock);
 920         assert(map->res_count >= 0);
 921         assert(map->ref_count >= map->res_count);
 922         map->ref_count++;
 923         vm_map_res_reference(map);
 924         lck_mtx_unlock(&map->s_lock);
 925 }
 926
 927 /*
 928  *      vm_map_res_deallocate:
 929  *
 930  *      Decrement residence count on a map; possibly causing swapout.
 931  *
 932  *      The map must be in memory (i.e. non-zero residence count).
 933  *
 934  *      The map is locked, so this function is callable from vm_map_deallocate.
 935  *
 936  */
 937 void vm_map_res_deallocate(register vm_map_t map)
 938 {
 939         assert(map->res_count > 0);
 940         if (--map->res_count == 0) {
 941                 lck_mtx_unlock(&map->s_lock);
 942                 vm_map_lock(map);
 943                 vm_map_swapout(map);
 944                 vm_map_unlock(map);
 945                 lck_mtx_lock(&map->s_lock);
 946         }
 947         assert(map->ref_count >= map->res_count);
 948 }
 949 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 950
 951 /*
 952  *      vm_map_destroy:
 953  *
 954  *      Actually destroy a map.
 955  */
 956 void
 957 vm_map_destroy(
 958         vm_map_t        map,
 959         int             flags)
 960 {
 961         vm_map_lock(map);
 962
 963         /* clean up regular map entries */
 964         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 965                              flags, VM_MAP_NULL);
 966         /* clean up leftover special mappings (commpage, etc...) */
 967         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 968                              flags, VM_MAP_NULL);
 969
 970 #if CONFIG_FREEZE
 971         if (map->default_freezer_handle) {
 972                 default_freezer_handle_deallocate(map->default_freezer_handle);
 973                 map->default_freezer_handle = NULL;
 974         }
 975 #endif
 976         vm_map_unlock(map);
 977
 978         assert(map->hdr.nentries == 0);
 979
 980         if(map->pmap)
 981                 pmap_destroy(map->pmap);
 982
 983         zfree(vm_map_zone, map);
 984 }
 985
 986 #if     TASK_SWAPPER
 987 /*
 988  * vm_map_swapin/vm_map_swapout
 989  *
 990  * Swap a map in and out, either referencing or releasing its resources.
 991  * These functions are internal use only; however, they must be exported
 992  * because they may be called from macros, which are exported.
 993  *
 994  * In the case of swapout, there could be races on the residence count,
 995  * so if the residence count is up, we return, assuming that a
 996  * vm_map_deallocate() call in the near future will bring us back.
 997  *
 998  * Locking:
 999  *      -- We use the map write lock for synchronization among races.
1000  *      -- The map write lock, and not the simple s_lock, protects the
1001  *         swap state of the map.
1002  *      -- If a map entry is a share map, then we hold both locks, in
1003  *         hierarchical order.
1004  *
1005  * Synchronization Notes:
1006  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1007  *      will block on the map lock and proceed when swapout is through.
1008  *      2) A vm_map_reference() call at this time is illegal, and will
1009  *      cause a panic.  vm_map_reference() is only allowed on resident
1010  *      maps, since it refuses to block.
1011  *      3) A vm_map_swapin() call during a swapin will block, and
1012  *      proceeed when the first swapin is done, turning into a nop.
1013  *      This is the reason the res_count is not incremented until
1014  *      after the swapin is complete.
1015  *      4) There is a timing hole after the checks of the res_count, before
1016  *      the map lock is taken, during which a swapin may get the lock
1017  *      before a swapout about to happen.  If this happens, the swapin
1018  *      will detect the state and increment the reference count, causing
1019  *      the swapout to be a nop, thereby delaying it until a later
1020  *      vm_map_deallocate.  If the swapout gets the lock first, then
1021  *      the swapin will simply block until the swapout is done, and
1022  *      then proceed.
1023  *
1024  * Because vm_map_swapin() is potentially an expensive operation, it
1025  * should be used with caution.
1026  *
1027  * Invariants:
1028  *      1) A map with a residence count of zero is either swapped, or
1029  *         being swapped.
1030  *      2) A map with a non-zero residence count is either resident,
1031  *         or being swapped in.
1032  */
1033
1034 int vm_map_swap_enable = 1;
1035
1036 void vm_map_swapin (vm_map_t map)
1037 {
1038         register vm_map_entry_t entry;
1039
1040         if (!vm_map_swap_enable)        /* debug */
1041                 return;
1042
1043         /*
1044          * Map is locked
1045          * First deal with various races.
1046          */
1047         if (map->sw_state == MAP_SW_IN)
1048                 /*
1049                  * we raced with swapout and won.  Returning will incr.
1050                  * the res_count, turning the swapout into a nop.
1051                  */
1052                 return;
1053
1054         /*
1055          * The residence count must be zero.  If we raced with another
1056          * swapin, the state would have been IN; if we raced with a
1057          * swapout (after another competing swapin), we must have lost
1058          * the race to get here (see above comment), in which case
1059          * res_count is still 0.
1060          */
1061         assert(map->res_count == 0);
1062
1063         /*
1064          * There are no intermediate states of a map going out or
1065          * coming in, since the map is locked during the transition.
1066          */
1067         assert(map->sw_state == MAP_SW_OUT);
1068
1069         /*
1070          * We now operate upon each map entry.  If the entry is a sub-
1071          * or share-map, we call vm_map_res_reference upon it.
1072          * If the entry is an object, we call vm_object_res_reference
1073          * (this may iterate through the shadow chain).
1074          * Note that we hold the map locked the entire time,
1075          * even if we get back here via a recursive call in
1076          * vm_map_res_reference.
1077          */
1078         entry = vm_map_first_entry(map);
1079
1080         while (entry != vm_map_to_entry(map)) {
1081                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082                         if (entry->is_sub_map) {
1083                                 vm_map_t lmap = entry->object.sub_map;
1084                                 lck_mtx_lock(&lmap->s_lock);
1085                                 vm_map_res_reference(lmap);
1086                                 lck_mtx_unlock(&lmap->s_lock);
1087                         } else {
1088                                 vm_object_t object = entry->object.vm_object;
1089                                 vm_object_lock(object);
1090                                 /*
1091                                  * This call may iterate through the
1092                                  * shadow chain.
1093                                  */
1094                                 vm_object_res_reference(object);
1095                                 vm_object_unlock(object);
1096                         }
1097                 }
1098                 entry = entry->vme_next;
1099         }
1100         assert(map->sw_state == MAP_SW_OUT);
1101         map->sw_state = MAP_SW_IN;
1102 }
1103
1104 void vm_map_swapout(vm_map_t map)
1105 {
1106         register vm_map_entry_t entry;
1107
1108         /*
1109          * Map is locked
1110          * First deal with various races.
1111          * If we raced with a swapin and lost, the residence count
1112          * will have been incremented to 1, and we simply return.
1113          */
1114         lck_mtx_lock(&map->s_lock);
1115         if (map->res_count != 0) {
1116                 lck_mtx_unlock(&map->s_lock);
1117                 return;
1118         }
1119         lck_mtx_unlock(&map->s_lock);
1120
1121         /*
1122          * There are no intermediate states of a map going out or
1123          * coming in, since the map is locked during the transition.
1124          */
1125         assert(map->sw_state == MAP_SW_IN);
1126
1127         if (!vm_map_swap_enable)
1128                 return;
1129
1130         /*
1131          * We now operate upon each map entry.  If the entry is a sub-
1132          * or share-map, we call vm_map_res_deallocate upon it.
1133          * If the entry is an object, we call vm_object_res_deallocate
1134          * (this may iterate through the shadow chain).
1135          * Note that we hold the map locked the entire time,
1136          * even if we get back here via a recursive call in
1137          * vm_map_res_deallocate.
1138          */
1139         entry = vm_map_first_entry(map);
1140
1141         while (entry != vm_map_to_entry(map)) {
1142                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1143                         if (entry->is_sub_map) {
1144                                 vm_map_t lmap = entry->object.sub_map;
1145                                 lck_mtx_lock(&lmap->s_lock);
1146                                 vm_map_res_deallocate(lmap);
1147                                 lck_mtx_unlock(&lmap->s_lock);
1148                         } else {
1149                                 vm_object_t object = entry->object.vm_object;
1150                                 vm_object_lock(object);
1151                                 /*
1152                                  * This call may take a long time,
1153                                  * since it could actively push
1154                                  * out pages (if we implement it
1155                                  * that way).
1156                                  */
1157                                 vm_object_res_deallocate(object);
1158                                 vm_object_unlock(object);
1159                         }
1160                 }
1161                 entry = entry->vme_next;
1162         }
1163         assert(map->sw_state == MAP_SW_IN);
1164         map->sw_state = MAP_SW_OUT;
1165 }
1166
1167 #endif  /* TASK_SWAPPER */
1168
1169 /*
1170  *      vm_map_lookup_entry:    [ internal use only ]
1171  *
1172  *      Calls into the vm map store layer to find the map
1173  *      entry containing (or immediately preceding) the
1174  *      specified address in the given map; the entry is returned
1175  *      in the "entry" parameter.  The boolean
1176  *      result indicates whether the address is
1177  *      actually contained in the map.
1178  */
1179 boolean_t
1180 vm_map_lookup_entry(
1181         register vm_map_t               map,
1182         register vm_map_offset_t        address,
1183         vm_map_entry_t          *entry)         /* OUT */
1184 {
1185         return ( vm_map_store_lookup_entry( map, address, entry ));
1186 }
1187
1188 /*
1189  *      Routine:        vm_map_find_space
1190  *      Purpose:
1191  *              Allocate a range in the specified virtual address map,
1192  *              returning the entry allocated for that range.
1193  *              Used by kmem_alloc, etc.
1194  *
1195  *              The map must be NOT be locked. It will be returned locked
1196  *              on KERN_SUCCESS, unlocked on failure.
1197  *
1198  *              If an entry is allocated, the object/offset fields
1199  *              are initialized to zero.
1200  */
1201 kern_return_t
1202 vm_map_find_space(
1203         register vm_map_t       map,
1204         vm_map_offset_t         *address,       /* OUT */
1205         vm_map_size_t           size,
1206         vm_map_offset_t         mask,
1207         int                     flags,
1208         vm_map_entry_t          *o_entry)       /* OUT */
1209 {
1210         register vm_map_entry_t entry, new_entry;
1211         register vm_map_offset_t        start;
1212         register vm_map_offset_t        end;
1213
1214         if (size == 0) {
1215                 *address = 0;
1216                 return KERN_INVALID_ARGUMENT;
1217         }
1218
1219         if (flags & VM_FLAGS_GUARD_AFTER) {
1220                 /* account for the back guard page in the size */
1221                 size += VM_MAP_PAGE_SIZE(map);
1222         }
1223
1224         new_entry = vm_map_entry_create(map, FALSE);
1225
1226         /*
1227          *      Look for the first possible address; if there's already
1228          *      something at this address, we have to start after it.
1229          */
1230
1231         vm_map_lock(map);
1232
1233         if( map->disable_vmentry_reuse == TRUE) {
1234                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1235         } else {
1236                 assert(first_free_is_valid(map));
1237                 if ((entry = map->first_free) == vm_map_to_entry(map))
1238                         start = map->min_offset;
1239                 else
1240                         start = entry->vme_end;
1241         }
1242
1243         /*
1244          *      In any case, the "entry" always precedes
1245          *      the proposed new region throughout the loop:
1246          */
1247
1248         while (TRUE) {
1249                 register vm_map_entry_t next;
1250
1251                 /*
1252                  *      Find the end of the proposed new region.
1253                  *      Be sure we didn't go beyond the end, or
1254                  *      wrap around the address.
1255                  */
1256
1257                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1258                         /* reserve space for the front guard page */
1259                         start += VM_MAP_PAGE_SIZE(map);
1260                 }
1261                 end = ((start + mask) & ~mask);
1262
1263                 if (end < start) {
1264                         vm_map_entry_dispose(map, new_entry);
1265                         vm_map_unlock(map);
1266                         return(KERN_NO_SPACE);
1267                 }
1268                 start = end;
1269                 end += size;
1270
1271                 if ((end > map->max_offset) || (end < start)) {
1272                         vm_map_entry_dispose(map, new_entry);
1273                         vm_map_unlock(map);
1274                         return(KERN_NO_SPACE);
1275                 }
1276
1277                 /*
1278                  *      If there are no more entries, we must win.
1279                  */
1280
1281                 next = entry->vme_next;
1282                 if (next == vm_map_to_entry(map))
1283                         break;
1284
1285                 /*
1286                  *      If there is another entry, it must be
1287                  *      after the end of the potential new region.
1288                  */
1289
1290                 if (next->vme_start >= end)
1291                         break;
1292
1293                 /*
1294                  *      Didn't fit -- move to the next entry.
1295                  */
1296
1297                 entry = next;
1298                 start = entry->vme_end;
1299         }
1300
1301         /*
1302          *      At this point,
1303          *              "start" and "end" should define the endpoints of the
1304          *                      available new range, and
1305          *              "entry" should refer to the region before the new
1306          *                      range, and
1307          *
1308          *              the map should be locked.
1309          */
1310
1311         if (flags & VM_FLAGS_GUARD_BEFORE) {
1312                 /* go back for the front guard page */
1313                 start -= VM_MAP_PAGE_SIZE(map);
1314         }
1315         *address = start;
1316
1317         assert(start < end);
1318         new_entry->vme_start = start;
1319         new_entry->vme_end = end;
1320         assert(page_aligned(new_entry->vme_start));
1321         assert(page_aligned(new_entry->vme_end));
1322         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1323                                    VM_MAP_PAGE_MASK(map)));
1324         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1325                                    VM_MAP_PAGE_MASK(map)));
1326
1327         new_entry->is_shared = FALSE;
1328         new_entry->is_sub_map = FALSE;
1329         new_entry->use_pmap = FALSE;
1330         new_entry->object.vm_object = VM_OBJECT_NULL;
1331         new_entry->offset = (vm_object_offset_t) 0;
1332
1333         new_entry->needs_copy = FALSE;
1334
1335         new_entry->inheritance = VM_INHERIT_DEFAULT;
1336         new_entry->protection = VM_PROT_DEFAULT;
1337         new_entry->max_protection = VM_PROT_ALL;
1338         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1339         new_entry->wired_count = 0;
1340         new_entry->user_wired_count = 0;
1341
1342         new_entry->in_transition = FALSE;
1343         new_entry->needs_wakeup = FALSE;
1344         new_entry->no_cache = FALSE;
1345         new_entry->permanent = FALSE;
1346         new_entry->superpage_size = FALSE;
1347         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1348                 new_entry->map_aligned = TRUE;
1349         } else {
1350                 new_entry->map_aligned = FALSE;
1351         }
1352
1353         new_entry->used_for_jit = 0;
1354
1355         new_entry->alias = 0;
1356         new_entry->zero_wired_pages = FALSE;
1357
1358         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1359
1360         /*
1361          *      Insert the new entry into the list
1362          */
1363
1364         vm_map_store_entry_link(map, entry, new_entry);
1365
1366         map->size += size;
1367
1368         /*
1369          *      Update the lookup hint
1370          */
1371         SAVE_HINT_MAP_WRITE(map, new_entry);
1372
1373         *o_entry = new_entry;
1374         return(KERN_SUCCESS);
1375 }
1376
1377 int vm_map_pmap_enter_print = FALSE;
1378 int vm_map_pmap_enter_enable = FALSE;
1379
1380 /*
1381  *      Routine:        vm_map_pmap_enter [internal only]
1382  *
1383  *      Description:
1384  *              Force pages from the specified object to be entered into
1385  *              the pmap at the specified address if they are present.
1386  *              As soon as a page not found in the object the scan ends.
1387  *
1388  *      Returns:
1389  *              Nothing.
1390  *
1391  *      In/out conditions:
1392  *              The source map should not be locked on entry.
1393  */
1394 static void
1395 vm_map_pmap_enter(
1396         vm_map_t                map,
1397         register vm_map_offset_t        addr,
1398         register vm_map_offset_t        end_addr,
1399         register vm_object_t    object,
1400         vm_object_offset_t      offset,
1401         vm_prot_t               protection)
1402 {
1403         int                     type_of_fault;
1404         kern_return_t           kr;
1405
1406         if(map->pmap == 0)
1407                 return;
1408
1409         while (addr < end_addr) {
1410                 register vm_page_t      m;
1411
1412                 vm_object_lock(object);
1413
1414                 m = vm_page_lookup(object, offset);
1415                 /*
1416                  * ENCRYPTED SWAP:
1417                  * The user should never see encrypted data, so do not
1418                  * enter an encrypted page in the page table.
1419                  */
1420                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1421                     m->fictitious ||
1422                     (m->unusual && ( m->error || m->restart || m->absent))) {
1423                         vm_object_unlock(object);
1424                         return;
1425                 }
1426
1427                 if (vm_map_pmap_enter_print) {
1428                         printf("vm_map_pmap_enter:");
1429                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1430                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1431                 }
1432                 type_of_fault = DBG_CACHE_HIT_FAULT;
1433                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1434                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
1435                                     &type_of_fault);
1436
1437                 vm_object_unlock(object);
1438
1439                 offset += PAGE_SIZE_64;
1440                 addr += PAGE_SIZE;
1441         }
1442 }
1443
1444 boolean_t vm_map_pmap_is_empty(
1445         vm_map_t        map,
1446         vm_map_offset_t start,
1447         vm_map_offset_t end);
1448 boolean_t vm_map_pmap_is_empty(
1449         vm_map_t        map,
1450         vm_map_offset_t start,
1451         vm_map_offset_t end)
1452 {
1453 #ifdef MACHINE_PMAP_IS_EMPTY
1454         return pmap_is_empty(map->pmap, start, end);
1455 #else   /* MACHINE_PMAP_IS_EMPTY */
1456         vm_map_offset_t offset;
1457         ppnum_t         phys_page;
1458
1459         if (map->pmap == NULL) {
1460                 return TRUE;
1461         }
1462
1463         for (offset = start;
1464              offset < end;
1465              offset += PAGE_SIZE) {
1466                 phys_page = pmap_find_phys(map->pmap, offset);
1467                 if (phys_page) {
1468                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1469                                 "page %d at 0x%llx\n",
1470                                 map, (long long)start, (long long)end,
1471                                 phys_page, (long long)offset);
1472                         return FALSE;
1473                 }
1474         }
1475         return TRUE;
1476 #endif  /* MACHINE_PMAP_IS_EMPTY */
1477 }
1478
1479 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1480 kern_return_t
1481 vm_map_random_address_for_size(
1482         vm_map_t        map,
1483         vm_map_offset_t *address,
1484         vm_map_size_t   size)
1485 {
1486         kern_return_t   kr = KERN_SUCCESS;
1487         int             tries = 0;
1488         vm_map_offset_t random_addr = 0;
1489         vm_map_offset_t hole_end;
1490
1491         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1492         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1493         vm_map_size_t   vm_hole_size = 0;
1494         vm_map_size_t   addr_space_size;
1495
1496         addr_space_size = vm_map_max(map) - vm_map_min(map);
1497
1498         assert(page_aligned(size));
1499
1500         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1501                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1502                 random_addr = vm_map_trunc_page(
1503                         vm_map_min(map) +(random_addr % addr_space_size),
1504                         VM_MAP_PAGE_MASK(map));
1505
1506                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1507                         if (prev_entry == vm_map_to_entry(map)) {
1508                                 next_entry = vm_map_first_entry(map);
1509                         } else {
1510                                 next_entry = prev_entry->vme_next;
1511                         }
1512                         if (next_entry == vm_map_to_entry(map)) {
1513                                 hole_end = vm_map_max(map);
1514                         } else {
1515                                 hole_end = next_entry->vme_start;
1516                         }
1517                         vm_hole_size = hole_end - random_addr;
1518                         if (vm_hole_size >= size) {
1519                                 *address = random_addr;
1520                                 break;
1521                         }
1522                 }
1523                 tries++;
1524         }
1525
1526         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1527                 kr = KERN_NO_SPACE;
1528         }
1529         return kr;
1530 }
1531
1532 /*
1533  *      Routine:        vm_map_enter
1534  *
1535  *      Description:
1536  *              Allocate a range in the specified virtual address map.
1537  *              The resulting range will refer to memory defined by
1538  *              the given memory object and offset into that object.
1539  *
1540  *              Arguments are as defined in the vm_map call.
1541  */
1542 int _map_enter_debug = 0;
1543 static unsigned int vm_map_enter_restore_successes = 0;
1544 static unsigned int vm_map_enter_restore_failures = 0;
1545 kern_return_t
1546 vm_map_enter(
1547         vm_map_t                map,
1548         vm_map_offset_t         *address,       /* IN/OUT */
1549         vm_map_size_t           size,
1550         vm_map_offset_t         mask,
1551         int                     flags,
1552         vm_object_t             object,
1553         vm_object_offset_t      offset,
1554         boolean_t               needs_copy,
1555         vm_prot_t               cur_protection,
1556         vm_prot_t               max_protection,
1557         vm_inherit_t            inheritance)
1558 {
1559         vm_map_entry_t          entry, new_entry;
1560         vm_map_offset_t         start, tmp_start, tmp_offset;
1561         vm_map_offset_t         end, tmp_end;
1562         vm_map_offset_t         tmp2_start, tmp2_end;
1563         vm_map_offset_t         step;
1564         kern_return_t           result = KERN_SUCCESS;
1565         vm_map_t                zap_old_map = VM_MAP_NULL;
1566         vm_map_t                zap_new_map = VM_MAP_NULL;
1567         boolean_t               map_locked = FALSE;
1568         boolean_t               pmap_empty = TRUE;
1569         boolean_t               new_mapping_established = FALSE;
1570         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1571         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1572         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1573         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1574         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1575         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1576         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1577         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1578         char                    alias;
1579         vm_map_offset_t         effective_min_offset, effective_max_offset;
1580         kern_return_t           kr;
1581         boolean_t               clear_map_aligned = FALSE;
1582
1583         if (superpage_size) {
1584                 switch (superpage_size) {
1585                         /*
1586                          * Note that the current implementation only supports
1587                          * a single size for superpages, SUPERPAGE_SIZE, per
1588                          * architecture. As soon as more sizes are supposed
1589                          * to be supported, SUPERPAGE_SIZE has to be replaced
1590                          * with a lookup of the size depending on superpage_size.
1591                          */
1592 #ifdef __x86_64__
1593                         case SUPERPAGE_SIZE_ANY:
1594                                 /* handle it like 2 MB and round up to page size */
1595                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1596                         case SUPERPAGE_SIZE_2MB:
1597                                 break;
1598 #endif
1599                         default:
1600                                 return KERN_INVALID_ARGUMENT;
1601                 }
1602                 mask = SUPERPAGE_SIZE-1;
1603                 if (size & (SUPERPAGE_SIZE-1))
1604                         return KERN_INVALID_ARGUMENT;
1605                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1606         }
1607
1608
1609
1610         if (is_submap) {
1611                 if (purgable) {
1612                         /* submaps can not be purgeable */
1613                         return KERN_INVALID_ARGUMENT;
1614                 }
1615                 if (object == VM_OBJECT_NULL) {
1616                         /* submaps can not be created lazily */
1617                         return KERN_INVALID_ARGUMENT;
1618                 }
1619         }
1620         if (flags & VM_FLAGS_ALREADY) {
1621                 /*
1622                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1623                  * is already present.  For it to be meaningul, the requested
1624                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1625                  * we shouldn't try and remove what was mapped there first
1626                  * (!VM_FLAGS_OVERWRITE).
1627                  */
1628                 if ((flags & VM_FLAGS_ANYWHERE) ||
1629                     (flags & VM_FLAGS_OVERWRITE)) {
1630                         return KERN_INVALID_ARGUMENT;
1631                 }
1632         }
1633
1634         effective_min_offset = map->min_offset;
1635
1636         if (flags & VM_FLAGS_BEYOND_MAX) {
1637                 /*
1638                  * Allow an insertion beyond the map's max offset.
1639                  */
1640                 if (vm_map_is_64bit(map))
1641                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1642                 else
1643                         effective_max_offset = 0x00000000FFFFF000ULL;
1644         } else {
1645                 effective_max_offset = map->max_offset;
1646         }
1647
1648         if (size == 0 ||
1649             (offset & PAGE_MASK_64) != 0) {
1650                 *address = 0;
1651                 return KERN_INVALID_ARGUMENT;
1652         }
1653
1654         VM_GET_FLAGS_ALIAS(flags, alias);
1655
1656 #define RETURN(value)   { result = value; goto BailOut; }
1657
1658         assert(page_aligned(*address));
1659         assert(page_aligned(size));
1660
1661         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1662                 /*
1663                  * In most cases, the caller rounds the size up to the
1664                  * map's page size.
1665                  * If we get a size that is explicitly not map-aligned here,
1666                  * we'll have to respect the caller's wish and mark the
1667                  * mapping as "not map-aligned" to avoid tripping the
1668                  * map alignment checks later.
1669                  */
1670                 clear_map_aligned = TRUE;
1671         }
1672
1673         /*
1674          * Only zero-fill objects are allowed to be purgable.
1675          * LP64todo - limit purgable objects to 32-bits for now
1676          */
1677         if (purgable &&
1678             (offset != 0 ||
1679              (object != VM_OBJECT_NULL &&
1680               (object->vo_size != size ||
1681                object->purgable == VM_PURGABLE_DENY))
1682              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1683                 return KERN_INVALID_ARGUMENT;
1684
1685         if (!anywhere && overwrite) {
1686                 /*
1687                  * Create a temporary VM map to hold the old mappings in the
1688                  * affected area while we create the new one.
1689                  * This avoids releasing the VM map lock in
1690                  * vm_map_entry_delete() and allows atomicity
1691                  * when we want to replace some mappings with a new one.
1692                  * It also allows us to restore the old VM mappings if the
1693                  * new mapping fails.
1694                  */
1695                 zap_old_map = vm_map_create(PMAP_NULL,
1696                                             *address,
1697                                             *address + size,
1698                                             map->hdr.entries_pageable);
1699                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1700         }
1701
1702 StartAgain: ;
1703
1704         start = *address;
1705
1706         if (anywhere) {
1707                 vm_map_lock(map);
1708                 map_locked = TRUE;
1709
1710                 if (entry_for_jit) {
1711                         if (map->jit_entry_exists) {
1712                                 result = KERN_INVALID_ARGUMENT;
1713                                 goto BailOut;
1714                         }
1715                         /*
1716                          * Get a random start address.
1717                          */
1718                         result = vm_map_random_address_for_size(map, address, size);
1719                         if (result != KERN_SUCCESS) {
1720                                 goto BailOut;
1721                         }
1722                         start = *address;
1723                 }
1724
1725
1726                 /*
1727                  *      Calculate the first possible address.
1728                  */
1729
1730                 if (start < effective_min_offset)
1731                         start = effective_min_offset;
1732                 if (start > effective_max_offset)
1733                         RETURN(KERN_NO_SPACE);
1734
1735                 /*
1736                  *      Look for the first possible address;
1737                  *      if there's already something at this
1738                  *      address, we have to start after it.
1739                  */
1740
1741                 if( map->disable_vmentry_reuse == TRUE) {
1742                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1743                 } else {
1744                         assert(first_free_is_valid(map));
1745
1746                         entry = map->first_free;
1747
1748                         if (entry == vm_map_to_entry(map)) {
1749                                 entry = NULL;
1750                         } else {
1751                                if (entry->vme_next == vm_map_to_entry(map)){
1752                                        /*
1753                                         * Hole at the end of the map.
1754                                         */
1755                                         entry = NULL;
1756                                } else {
1757                                         if (start < (entry->vme_next)->vme_start ) {
1758                                                 start = entry->vme_end;
1759                                                 start = vm_map_round_page(start,
1760                                                                           VM_MAP_PAGE_MASK(map));
1761                                         } else {
1762                                                 /*
1763                                                  * Need to do a lookup.
1764                                                  */
1765                                                 entry = NULL;
1766                                         }
1767                                }
1768                         }
1769
1770                         if (entry == NULL) {
1771                                 vm_map_entry_t  tmp_entry;
1772                                 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1773                                         assert(!entry_for_jit);
1774                                         start = tmp_entry->vme_end;
1775                                         start = vm_map_round_page(start,
1776                                                                   VM_MAP_PAGE_MASK(map));
1777                                 }
1778                                 entry = tmp_entry;
1779                         }
1780                 }
1781
1782                 /*
1783                  *      In any case, the "entry" always precedes
1784                  *      the proposed new region throughout the
1785                  *      loop:
1786                  */
1787
1788                 while (TRUE) {
1789                         register vm_map_entry_t next;
1790
1791                         /*
1792                          *      Find the end of the proposed new region.
1793                          *      Be sure we didn't go beyond the end, or
1794                          *      wrap around the address.
1795                          */
1796
1797                         end = ((start + mask) & ~mask);
1798                         end = vm_map_round_page(end,
1799                                                 VM_MAP_PAGE_MASK(map));
1800                         if (end < start)
1801                                 RETURN(KERN_NO_SPACE);
1802                         start = end;
1803                         assert(VM_MAP_PAGE_ALIGNED(start,
1804                                                    VM_MAP_PAGE_MASK(map)));
1805                         end += size;
1806
1807                         if ((end > effective_max_offset) || (end < start)) {
1808                                 if (map->wait_for_space) {
1809                                         if (size <= (effective_max_offset -
1810                                                      effective_min_offset)) {
1811                                                 assert_wait((event_t)map,
1812                                                             THREAD_ABORTSAFE);
1813                                                 vm_map_unlock(map);
1814                                                 map_locked = FALSE;
1815                                                 thread_block(THREAD_CONTINUE_NULL);
1816                                                 goto StartAgain;
1817                                         }
1818                                 }
1819                                 RETURN(KERN_NO_SPACE);
1820                         }
1821
1822                         /*
1823                          *      If there are no more entries, we must win.
1824                          */
1825
1826                         next = entry->vme_next;
1827                         if (next == vm_map_to_entry(map))
1828                                 break;
1829
1830                         /*
1831                          *      If there is another entry, it must be
1832                          *      after the end of the potential new region.
1833                          */
1834
1835                         if (next->vme_start >= end)
1836                                 break;
1837
1838                         /*
1839                          *      Didn't fit -- move to the next entry.
1840                          */
1841
1842                         entry = next;
1843                         start = entry->vme_end;
1844                         start = vm_map_round_page(start,
1845                                                   VM_MAP_PAGE_MASK(map));
1846                 }
1847                 *address = start;
1848                 assert(VM_MAP_PAGE_ALIGNED(*address,
1849                                            VM_MAP_PAGE_MASK(map)));
1850         } else {
1851                 /*
1852                  *      Verify that:
1853                  *              the address doesn't itself violate
1854                  *              the mask requirement.
1855                  */
1856
1857                 vm_map_lock(map);
1858                 map_locked = TRUE;
1859                 if ((start & mask) != 0)
1860                         RETURN(KERN_NO_SPACE);
1861
1862                 /*
1863                  *      ...     the address is within bounds
1864                  */
1865
1866                 end = start + size;
1867
1868                 if ((start < effective_min_offset) ||
1869                     (end > effective_max_offset) ||
1870                     (start >= end)) {
1871                         RETURN(KERN_INVALID_ADDRESS);
1872                 }
1873
1874                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1875                         /*
1876                          * Fixed mapping and "overwrite" flag: attempt to
1877                          * remove all existing mappings in the specified
1878                          * address range, saving them in our "zap_old_map".
1879                          */
1880                         (void) vm_map_delete(map, start, end,
1881                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1882                                              zap_old_map);
1883                 }
1884
1885                 /*
1886                  *      ...     the starting address isn't allocated
1887                  */
1888
1889                 if (vm_map_lookup_entry(map, start, &entry)) {
1890                         if (! (flags & VM_FLAGS_ALREADY)) {
1891                                 RETURN(KERN_NO_SPACE);
1892                         }
1893                         /*
1894                          * Check if what's already there is what we want.
1895                          */
1896                         tmp_start = start;
1897                         tmp_offset = offset;
1898                         if (entry->vme_start < start) {
1899                                 tmp_start -= start - entry->vme_start;
1900                                 tmp_offset -= start - entry->vme_start;
1901
1902                         }
1903                         for (; entry->vme_start < end;
1904                              entry = entry->vme_next) {
1905                                 /*
1906                                  * Check if the mapping's attributes
1907                                  * match the existing map entry.
1908                                  */
1909                                 if (entry == vm_map_to_entry(map) ||
1910                                     entry->vme_start != tmp_start ||
1911                                     entry->is_sub_map != is_submap ||
1912                                     entry->offset != tmp_offset ||
1913                                     entry->needs_copy != needs_copy ||
1914                                     entry->protection != cur_protection ||
1915                                     entry->max_protection != max_protection ||
1916                                     entry->inheritance != inheritance ||
1917                                     entry->alias != alias) {
1918                                         /* not the same mapping ! */
1919                                         RETURN(KERN_NO_SPACE);
1920                                 }
1921                                 /*
1922                                  * Check if the same object is being mapped.
1923                                  */
1924                                 if (is_submap) {
1925                                         if (entry->object.sub_map !=
1926                                             (vm_map_t) object) {
1927                                                 /* not the same submap */
1928                                                 RETURN(KERN_NO_SPACE);
1929                                         }
1930                                 } else {
1931                                         if (entry->object.vm_object != object) {
1932                                                 /* not the same VM object... */
1933                                                 vm_object_t obj2;
1934
1935                                                 obj2 = entry->object.vm_object;
1936                                                 if ((obj2 == VM_OBJECT_NULL ||
1937                                                      obj2->internal) &&
1938                                                     (object == VM_OBJECT_NULL ||
1939                                                      object->internal)) {
1940                                                         /*
1941                                                          * ... but both are
1942                                                          * anonymous memory,
1943                                                          * so equivalent.
1944                                                          */
1945                                                 } else {
1946                                                         RETURN(KERN_NO_SPACE);
1947                                                 }
1948                                         }
1949                                 }
1950
1951                                 tmp_offset += entry->vme_end - entry->vme_start;
1952                                 tmp_start += entry->vme_end - entry->vme_start;
1953                                 if (entry->vme_end >= end) {
1954                                         /* reached the end of our mapping */
1955                                         break;
1956                                 }
1957                         }
1958                         /* it all matches:  let's use what's already there ! */
1959                         RETURN(KERN_MEMORY_PRESENT);
1960                 }
1961
1962                 /*
1963                  *      ...     the next region doesn't overlap the
1964                  *              end point.
1965                  */
1966
1967                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1968                     (entry->vme_next->vme_start < end))
1969                         RETURN(KERN_NO_SPACE);
1970         }
1971
1972         /*
1973          *      At this point,
1974          *              "start" and "end" should define the endpoints of the
1975          *                      available new range, and
1976          *              "entry" should refer to the region before the new
1977          *                      range, and
1978          *
1979          *              the map should be locked.
1980          */
1981
1982         /*
1983          *      See whether we can avoid creating a new entry (and object) by
1984          *      extending one of our neighbors.  [So far, we only attempt to
1985          *      extend from below.]  Note that we can never extend/join
1986          *      purgable objects because they need to remain distinct
1987          *      entities in order to implement their "volatile object"
1988          *      semantics.
1989          */
1990
1991         if (purgable || entry_for_jit) {
1992                 if (object == VM_OBJECT_NULL) {
1993                         object = vm_object_allocate(size);
1994                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1995                         if (purgable) {
1996                                 object->purgable = VM_PURGABLE_NONVOLATILE;
1997                         }
1998                         offset = (vm_object_offset_t)0;
1999                 }
2000         } else if ((is_submap == FALSE) &&
2001                    (object == VM_OBJECT_NULL) &&
2002                    (entry != vm_map_to_entry(map)) &&
2003                    (entry->vme_end == start) &&
2004                    (!entry->is_shared) &&
2005                    (!entry->is_sub_map) &&
2006                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2007                    (entry->inheritance == inheritance) &&
2008                    (entry->protection == cur_protection) &&
2009                    (entry->max_protection == max_protection) &&
2010                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2011                    (entry->in_transition == 0) &&
2012                    (entry->no_cache == no_cache) &&
2013                    /*
2014                     * No coalescing if not map-aligned, to avoid propagating
2015                     * that condition any further than needed:
2016                     */
2017                    (!entry->map_aligned || !clear_map_aligned) &&
2018                    ((entry->vme_end - entry->vme_start) + size <=
2019                     (alias == VM_MEMORY_REALLOC ?
2020                      ANON_CHUNK_SIZE :
2021                      NO_COALESCE_LIMIT)) &&
2022                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2023                 if (vm_object_coalesce(entry->object.vm_object,
2024                                        VM_OBJECT_NULL,
2025                                        entry->offset,
2026                                        (vm_object_offset_t) 0,
2027                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2028                                        (vm_map_size_t)(end - entry->vme_end))) {
2029
2030                         /*
2031                          *      Coalesced the two objects - can extend
2032                          *      the previous map entry to include the
2033                          *      new range.
2034                          */
2035                         map->size += (end - entry->vme_end);
2036                         assert(entry->vme_start < end);
2037                         assert(VM_MAP_PAGE_ALIGNED(end,
2038                                                    VM_MAP_PAGE_MASK(map)));
2039                         entry->vme_end = end;
2040                         vm_map_store_update_first_free(map, map->first_free);
2041                         RETURN(KERN_SUCCESS);
2042                 }
2043         }
2044
2045         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2046         new_entry = NULL;
2047
2048         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2049                 tmp2_end = tmp2_start + step;
2050                 /*
2051                  *      Create a new entry
2052                  *      LP64todo - for now, we can only allocate 4GB internal objects
2053                  *      because the default pager can't page bigger ones.  Remove this
2054                  *      when it can.
2055                  *
2056                  * XXX FBDP
2057                  * The reserved "page zero" in each process's address space can
2058                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2059                  * therefore different VM map entries serves no purpose and just
2060                  * slows down operations on the VM map, so let's not split the
2061                  * allocation into 4GB chunks if the max protection is NONE.  That
2062                  * memory should never be accessible, so it will never get to the
2063                  * default pager.
2064                  */
2065                 tmp_start = tmp2_start;
2066                 if (object == VM_OBJECT_NULL &&
2067                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2068                     max_protection != VM_PROT_NONE &&
2069                     superpage_size == 0)
2070                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2071                 else
2072                         tmp_end = tmp2_end;
2073                 do {
2074                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2075                                                         object, offset, needs_copy,
2076                                                         FALSE, FALSE,
2077                                                         cur_protection, max_protection,
2078                                                         VM_BEHAVIOR_DEFAULT,
2079                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2080                                                         0, no_cache,
2081                                                         permanent,
2082                                                         superpage_size,
2083                                                         clear_map_aligned);
2084                         new_entry->alias = alias;
2085                         if (entry_for_jit){
2086                                 if (!(map->jit_entry_exists)){
2087                                         new_entry->used_for_jit = TRUE;
2088                                         map->jit_entry_exists = TRUE;
2089                                 }
2090                         }
2091
2092                         if (is_submap) {
2093                                 vm_map_t        submap;
2094                                 boolean_t       submap_is_64bit;
2095                                 boolean_t       use_pmap;
2096
2097                                 new_entry->is_sub_map = TRUE;
2098                                 submap = (vm_map_t) object;
2099                                 submap_is_64bit = vm_map_is_64bit(submap);
2100                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2101         #ifndef NO_NESTED_PMAP
2102                                 if (use_pmap && submap->pmap == NULL) {
2103                                         ledger_t ledger = map->pmap->ledger;
2104                                         /* we need a sub pmap to nest... */
2105                                         submap->pmap = pmap_create(ledger, 0,
2106                                             submap_is_64bit);
2107                                         if (submap->pmap == NULL) {
2108                                                 /* let's proceed without nesting... */
2109                                         }
2110                                 }
2111                                 if (use_pmap && submap->pmap != NULL) {
2112                                         kr = pmap_nest(map->pmap,
2113                                                        submap->pmap,
2114                                                        tmp_start,
2115                                                        tmp_start,
2116                                                        tmp_end - tmp_start);
2117                                         if (kr != KERN_SUCCESS) {
2118                                                 printf("vm_map_enter: "
2119                                                        "pmap_nest(0x%llx,0x%llx) "
2120                                                        "error 0x%x\n",
2121                                                        (long long)tmp_start,
2122                                                        (long long)tmp_end,
2123                                                        kr);
2124                                         } else {
2125                                                 /* we're now nested ! */
2126                                                 new_entry->use_pmap = TRUE;
2127                                                 pmap_empty = FALSE;
2128                                         }
2129                                 }
2130         #endif /* NO_NESTED_PMAP */
2131                         }
2132                         entry = new_entry;
2133
2134                         if (superpage_size) {
2135                                 vm_page_t pages, m;
2136                                 vm_object_t sp_object;
2137
2138                                 entry->offset = 0;
2139
2140                                 /* allocate one superpage */
2141                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2142                                 if (kr != KERN_SUCCESS) {
2143                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
2144                                         RETURN(kr);
2145                                 }
2146
2147                                 /* create one vm_object per superpage */
2148                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2149                                 sp_object->phys_contiguous = TRUE;
2150                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2151                                 entry->object.vm_object = sp_object;
2152
2153                                 /* enter the base pages into the object */
2154                                 vm_object_lock(sp_object);
2155                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2156                                         m = pages;
2157                                         pmap_zero_page(m->phys_page);
2158                                         pages = NEXT_PAGE(m);
2159                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2160                                         vm_page_insert(m, sp_object, offset);
2161                                 }
2162                                 vm_object_unlock(sp_object);
2163                         }
2164                 } while (tmp_end != tmp2_end &&
2165                          (tmp_start = tmp_end) &&
2166                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2167                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2168         }
2169
2170         vm_map_unlock(map);
2171         map_locked = FALSE;
2172
2173         new_mapping_established = TRUE;
2174
2175         /*      Wire down the new entry if the user
2176          *      requested all new map entries be wired.
2177          */
2178         if ((map->wiring_required)||(superpage_size)) {
2179                 pmap_empty = FALSE; /* pmap won't be empty */
2180                 kr = vm_map_wire(map, start, end,
2181                                      new_entry->protection, TRUE);
2182                 RETURN(kr);
2183         }
2184
2185         if ((object != VM_OBJECT_NULL) &&
2186             (vm_map_pmap_enter_enable) &&
2187             (!anywhere)  &&
2188             (!needs_copy) &&
2189             (size < (128*1024))) {
2190                 pmap_empty = FALSE; /* pmap won't be empty */
2191
2192                 if (override_nx(map, alias) && cur_protection)
2193                         cur_protection |= VM_PROT_EXECUTE;
2194
2195                 vm_map_pmap_enter(map, start, end,
2196                                   object, offset, cur_protection);
2197         }
2198
2199 BailOut: ;
2200         if (result == KERN_SUCCESS) {
2201                 vm_prot_t pager_prot;
2202                 memory_object_t pager;
2203
2204                 if (pmap_empty &&
2205                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2206                         assert(vm_map_pmap_is_empty(map,
2207                                                     *address,
2208                                                     *address+size));
2209                 }
2210
2211                 /*
2212                  * For "named" VM objects, let the pager know that the
2213                  * memory object is being mapped.  Some pagers need to keep
2214                  * track of this, to know when they can reclaim the memory
2215                  * object, for example.
2216                  * VM calls memory_object_map() for each mapping (specifying
2217                  * the protection of each mapping) and calls
2218                  * memory_object_last_unmap() when all the mappings are gone.
2219                  */
2220                 pager_prot = max_protection;
2221                 if (needs_copy) {
2222                         /*
2223                          * Copy-On-Write mapping: won't modify
2224                          * the memory object.
2225                          */
2226                         pager_prot &= ~VM_PROT_WRITE;
2227                 }
2228                 if (!is_submap &&
2229                     object != VM_OBJECT_NULL &&
2230                     object->named &&
2231                     object->pager != MEMORY_OBJECT_NULL) {
2232                         vm_object_lock(object);
2233                         pager = object->pager;
2234                         if (object->named &&
2235                             pager != MEMORY_OBJECT_NULL) {
2236                                 assert(object->pager_ready);
2237                                 vm_object_mapping_wait(object, THREAD_UNINT);
2238                                 vm_object_mapping_begin(object);
2239                                 vm_object_unlock(object);
2240
2241                                 kr = memory_object_map(pager, pager_prot);
2242                                 assert(kr == KERN_SUCCESS);
2243
2244                                 vm_object_lock(object);
2245                                 vm_object_mapping_end(object);
2246                         }
2247                         vm_object_unlock(object);
2248                 }
2249         } else {
2250                 if (new_mapping_established) {
2251                         /*
2252                          * We have to get rid of the new mappings since we
2253                          * won't make them available to the user.
2254                          * Try and do that atomically, to minimize the risk
2255                          * that someone else create new mappings that range.
2256                          */
2257                         zap_new_map = vm_map_create(PMAP_NULL,
2258                                                     *address,
2259                                                     *address + size,
2260                                                     map->hdr.entries_pageable);
2261                         vm_map_set_page_shift(zap_new_map,
2262                                               VM_MAP_PAGE_SHIFT(map));
2263                         if (!map_locked) {
2264                                 vm_map_lock(map);
2265                                 map_locked = TRUE;
2266                         }
2267                         (void) vm_map_delete(map, *address, *address+size,
2268                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2269                                              zap_new_map);
2270                 }
2271                 if (zap_old_map != VM_MAP_NULL &&
2272                     zap_old_map->hdr.nentries != 0) {
2273                         vm_map_entry_t  entry1, entry2;
2274
2275                         /*
2276                          * The new mapping failed.  Attempt to restore
2277                          * the old mappings, saved in the "zap_old_map".
2278                          */
2279                         if (!map_locked) {
2280                                 vm_map_lock(map);
2281                                 map_locked = TRUE;
2282                         }
2283
2284                         /* first check if the coast is still clear */
2285                         start = vm_map_first_entry(zap_old_map)->vme_start;
2286                         end = vm_map_last_entry(zap_old_map)->vme_end;
2287                         if (vm_map_lookup_entry(map, start, &entry1) ||
2288                             vm_map_lookup_entry(map, end, &entry2) ||
2289                             entry1 != entry2) {
2290                                 /*
2291                                  * Part of that range has already been
2292                                  * re-mapped:  we can't restore the old
2293                                  * mappings...
2294                                  */
2295                                 vm_map_enter_restore_failures++;
2296                         } else {
2297                                 /*
2298                                  * Transfer the saved map entries from
2299                                  * "zap_old_map" to the original "map",
2300                                  * inserting them all after "entry1".
2301                                  */
2302                                 for (entry2 = vm_map_first_entry(zap_old_map);
2303                                      entry2 != vm_map_to_entry(zap_old_map);
2304                                      entry2 = vm_map_first_entry(zap_old_map)) {
2305                                         vm_map_size_t entry_size;
2306
2307                                         entry_size = (entry2->vme_end -
2308                                                       entry2->vme_start);
2309                                         vm_map_store_entry_unlink(zap_old_map,
2310                                                             entry2);
2311                                         zap_old_map->size -= entry_size;
2312                                         vm_map_store_entry_link(map, entry1, entry2);
2313                                         map->size += entry_size;
2314                                         entry1 = entry2;
2315                                 }
2316                                 if (map->wiring_required) {
2317                                         /*
2318                                          * XXX TODO: we should rewire the
2319                                          * old pages here...
2320                                          */
2321                                 }
2322                                 vm_map_enter_restore_successes++;
2323                         }
2324                 }
2325         }
2326
2327         if (map_locked) {
2328                 vm_map_unlock(map);
2329         }
2330
2331         /*
2332          * Get rid of the "zap_maps" and all the map entries that
2333          * they may still contain.
2334          */
2335         if (zap_old_map != VM_MAP_NULL) {
2336                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2337                 zap_old_map = VM_MAP_NULL;
2338         }
2339         if (zap_new_map != VM_MAP_NULL) {
2340                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2341                 zap_new_map = VM_MAP_NULL;
2342         }
2343
2344         return result;
2345
2346 #undef  RETURN
2347 }
2348
2349 kern_return_t
2350 vm_map_enter_mem_object(
2351         vm_map_t                target_map,
2352         vm_map_offset_t         *address,
2353         vm_map_size_t           initial_size,
2354         vm_map_offset_t         mask,
2355         int                     flags,
2356         ipc_port_t              port,
2357         vm_object_offset_t      offset,
2358         boolean_t               copy,
2359         vm_prot_t               cur_protection,
2360         vm_prot_t               max_protection,
2361         vm_inherit_t            inheritance)
2362 {
2363         vm_map_address_t        map_addr;
2364         vm_map_size_t           map_size;
2365         vm_object_t             object;
2366         vm_object_size_t        size;
2367         kern_return_t           result;
2368         boolean_t               mask_cur_protection, mask_max_protection;
2369         vm_map_offset_t         offset_in_mapping;
2370
2371         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2372         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2373         cur_protection &= ~VM_PROT_IS_MASK;
2374         max_protection &= ~VM_PROT_IS_MASK;
2375
2376         /*
2377          * Check arguments for validity
2378          */
2379         if ((target_map == VM_MAP_NULL) ||
2380             (cur_protection & ~VM_PROT_ALL) ||
2381             (max_protection & ~VM_PROT_ALL) ||
2382             (inheritance > VM_INHERIT_LAST_VALID) ||
2383             initial_size == 0)
2384                 return KERN_INVALID_ARGUMENT;
2385
2386         map_addr = vm_map_trunc_page(*address,
2387                                      VM_MAP_PAGE_MASK(target_map));
2388         map_size = vm_map_round_page(initial_size,
2389                                      VM_MAP_PAGE_MASK(target_map));
2390         size = vm_object_round_page(initial_size);
2391
2392         /*
2393          * Find the vm object (if any) corresponding to this port.
2394          */
2395         if (!IP_VALID(port)) {
2396                 object = VM_OBJECT_NULL;
2397                 offset = 0;
2398                 copy = FALSE;
2399         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2400                 vm_named_entry_t        named_entry;
2401
2402                 named_entry = (vm_named_entry_t) port->ip_kobject;
2403
2404                 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2405                         offset += named_entry->data_offset;
2406                 }
2407
2408                 /* a few checks to make sure user is obeying rules */
2409                 if (size == 0) {
2410                         if (offset >= named_entry->size)
2411                                 return KERN_INVALID_RIGHT;
2412                         size = named_entry->size - offset;
2413                 }
2414                 if (mask_max_protection) {
2415                         max_protection &= named_entry->protection;
2416                 }
2417                 if (mask_cur_protection) {
2418                         cur_protection &= named_entry->protection;
2419                 }
2420                 if ((named_entry->protection & max_protection) !=
2421                     max_protection)
2422                         return KERN_INVALID_RIGHT;
2423                 if ((named_entry->protection & cur_protection) !=
2424                     cur_protection)
2425                         return KERN_INVALID_RIGHT;
2426                 if (offset + size < offset) {
2427                         /* overflow */
2428                         return KERN_INVALID_ARGUMENT;
2429                 }
2430                 if (named_entry->size < (offset + size))
2431                         return KERN_INVALID_ARGUMENT;
2432
2433                 if (named_entry->is_copy) {
2434                         /* for a vm_map_copy, we can only map it whole */
2435                         if ((size != named_entry->size) &&
2436                             (vm_map_round_page(size,
2437                                                VM_MAP_PAGE_MASK(target_map)) ==
2438                              named_entry->size)) {
2439                                 /* XXX FBDP use the rounded size... */
2440                                 size = vm_map_round_page(
2441                                         size,
2442                                         VM_MAP_PAGE_MASK(target_map));
2443                         }
2444
2445                         if (offset != 0 ||
2446                             size != named_entry->size) {
2447                                 return KERN_INVALID_ARGUMENT;
2448                         }
2449                 }
2450
2451                 /* the callers parameter offset is defined to be the */
2452                 /* offset from beginning of named entry offset in object */
2453                 offset = offset + named_entry->offset;
2454
2455                 if (! VM_MAP_PAGE_ALIGNED(size,
2456                                           VM_MAP_PAGE_MASK(target_map))) {
2457                         /*
2458                          * Let's not map more than requested;
2459                          * vm_map_enter() will handle this "not map-aligned"
2460                          * case.
2461                          */
2462                         map_size = size;
2463                 }
2464
2465                 named_entry_lock(named_entry);
2466                 if (named_entry->is_sub_map) {
2467                         vm_map_t                submap;
2468
2469                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2470                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2471                         }
2472
2473                         submap = named_entry->backing.map;
2474                         vm_map_lock(submap);
2475                         vm_map_reference(submap);
2476                         vm_map_unlock(submap);
2477                         named_entry_unlock(named_entry);
2478
2479                         result = vm_map_enter(target_map,
2480                                               &map_addr,
2481                                               map_size,
2482                                               mask,
2483                                               flags | VM_FLAGS_SUBMAP,
2484                                               (vm_object_t) submap,
2485                                               offset,
2486                                               copy,
2487                                               cur_protection,
2488                                               max_protection,
2489                                               inheritance);
2490                         if (result != KERN_SUCCESS) {
2491                                 vm_map_deallocate(submap);
2492                         } else {
2493                                 /*
2494                                  * No need to lock "submap" just to check its
2495                                  * "mapped" flag: that flag is never reset
2496                                  * once it's been set and if we race, we'll
2497                                  * just end up setting it twice, which is OK.
2498                                  */
2499                                 if (submap->mapped_in_other_pmaps == FALSE &&
2500                                     vm_map_pmap(submap) != PMAP_NULL &&
2501                                     vm_map_pmap(submap) !=
2502                                     vm_map_pmap(target_map)) {
2503                                         /*
2504                                          * This submap is being mapped in a map
2505                                          * that uses a different pmap.
2506                                          * Set its "mapped_in_other_pmaps" flag
2507                                          * to indicate that we now need to
2508                                          * remove mappings from all pmaps rather
2509                                          * than just the submap's pmap.
2510                                          */
2511                                         vm_map_lock(submap);
2512                                         submap->mapped_in_other_pmaps = TRUE;
2513                                         vm_map_unlock(submap);
2514                                 }
2515                                 *address = map_addr;
2516                         }
2517                         return result;
2518
2519                 } else if (named_entry->is_pager) {
2520                         unsigned int    access;
2521                         vm_prot_t       protections;
2522                         unsigned int    wimg_mode;
2523
2524                         protections = named_entry->protection & VM_PROT_ALL;
2525                         access = GET_MAP_MEM(named_entry->protection);
2526
2527                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2528                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2529                         }
2530
2531                         object = vm_object_enter(named_entry->backing.pager,
2532                                                  named_entry->size,
2533                                                  named_entry->internal,
2534                                                  FALSE,
2535                                                  FALSE);
2536                         if (object == VM_OBJECT_NULL) {
2537                                 named_entry_unlock(named_entry);
2538                                 return KERN_INVALID_OBJECT;
2539                         }
2540
2541                         /* JMM - drop reference on pager here */
2542
2543                         /* create an extra ref for the named entry */
2544                         vm_object_lock(object);
2545                         vm_object_reference_locked(object);
2546                         named_entry->backing.object = object;
2547                         named_entry->is_pager = FALSE;
2548                         named_entry_unlock(named_entry);
2549
2550                         wimg_mode = object->wimg_bits;
2551
2552                         if (access == MAP_MEM_IO) {
2553                                 wimg_mode = VM_WIMG_IO;
2554                         } else if (access == MAP_MEM_COPYBACK) {
2555                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2556                         } else if (access == MAP_MEM_INNERWBACK) {
2557                                 wimg_mode = VM_WIMG_INNERWBACK;
2558                         } else if (access == MAP_MEM_WTHRU) {
2559                                 wimg_mode = VM_WIMG_WTHRU;
2560                         } else if (access == MAP_MEM_WCOMB) {
2561                                 wimg_mode = VM_WIMG_WCOMB;
2562                         }
2563
2564                         /* wait for object (if any) to be ready */
2565                         if (!named_entry->internal) {
2566                                 while (!object->pager_ready) {
2567                                         vm_object_wait(
2568                                                 object,
2569                                                 VM_OBJECT_EVENT_PAGER_READY,
2570                                                 THREAD_UNINT);
2571                                         vm_object_lock(object);
2572                                 }
2573                         }
2574
2575                         if (object->wimg_bits != wimg_mode)
2576                                 vm_object_change_wimg_mode(object, wimg_mode);
2577
2578                         object->true_share = TRUE;
2579
2580                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2581                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2582                         vm_object_unlock(object);
2583
2584                 } else if (named_entry->is_copy) {
2585                         kern_return_t   kr;
2586                         vm_map_copy_t   copy_map;
2587                         vm_map_entry_t  copy_entry;
2588                         vm_map_offset_t copy_addr;
2589
2590                         if (flags & ~(VM_FLAGS_FIXED |
2591                                       VM_FLAGS_ANYWHERE |
2592                                       VM_FLAGS_OVERWRITE |
2593                                       VM_FLAGS_RETURN_DATA_ADDR)) {
2594                                 named_entry_unlock(named_entry);
2595                                 return KERN_INVALID_ARGUMENT;
2596                         }
2597
2598                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2599                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
2600                                 offset = vm_object_trunc_page(offset);
2601                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2602                         }
2603
2604                         copy_map = named_entry->backing.copy;
2605                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
2606                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
2607                                 /* unsupported type; should not happen */
2608                                 printf("vm_map_enter_mem_object: "
2609                                        "memory_entry->backing.copy "
2610                                        "unsupported type 0x%x\n",
2611                                        copy_map->type);
2612                                 named_entry_unlock(named_entry);
2613                                 return KERN_INVALID_ARGUMENT;
2614                         }
2615
2616                         /* reserve a contiguous range */
2617                         kr = vm_map_enter(target_map,
2618                                           &map_addr,
2619                                           map_size,
2620                                           mask,
2621                                           flags & (VM_FLAGS_ANYWHERE |
2622                                                    VM_FLAGS_OVERWRITE |
2623                                                    VM_FLAGS_RETURN_DATA_ADDR),
2624                                           VM_OBJECT_NULL,
2625                                           0,
2626                                           FALSE, /* copy */
2627                                           cur_protection,
2628                                           max_protection,
2629                                           inheritance);
2630                         if (kr != KERN_SUCCESS) {
2631                                 named_entry_unlock(named_entry);
2632                                 return kr;
2633                         }
2634
2635                         copy_addr = map_addr;
2636
2637                         for (copy_entry = vm_map_copy_first_entry(copy_map);
2638                              copy_entry != vm_map_copy_to_entry(copy_map);
2639                              copy_entry = copy_entry->vme_next) {
2640                                 int                     remap_flags = 0;
2641                                 vm_map_t                copy_submap;
2642                                 vm_object_t             copy_object;
2643                                 vm_map_size_t           copy_size;
2644                                 vm_object_offset_t      copy_offset;
2645
2646                                 copy_offset = copy_entry->offset;
2647                                 copy_size = (copy_entry->vme_end -
2648                                              copy_entry->vme_start);
2649
2650                                 /* sanity check */
2651                                 if (copy_addr + copy_size >
2652                                     map_addr + map_size) {
2653                                         /* over-mapping too much !? */
2654                                         kr = KERN_INVALID_ARGUMENT;
2655                                         /* abort */
2656                                         break;
2657                                 }
2658
2659                                 /* take a reference on the object */
2660                                 if (copy_entry->is_sub_map) {
2661                                         remap_flags |= VM_FLAGS_SUBMAP;
2662                                         copy_submap =
2663                                                 copy_entry->object.sub_map;
2664                                         vm_map_lock(copy_submap);
2665                                         vm_map_reference(copy_submap);
2666                                         vm_map_unlock(copy_submap);
2667                                         copy_object = (vm_object_t) copy_submap;
2668                                 } else {
2669                                         copy_object =
2670                                                 copy_entry->object.vm_object;
2671                                         vm_object_reference(copy_object);
2672                                 }
2673
2674                                 /* over-map the object into destination */
2675                                 remap_flags |= flags;
2676                                 remap_flags |= VM_FLAGS_FIXED;
2677                                 remap_flags |= VM_FLAGS_OVERWRITE;
2678                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
2679                                 kr = vm_map_enter(target_map,
2680                                                   &copy_addr,
2681                                                   copy_size,
2682                                                   (vm_map_offset_t) 0,
2683                                                   remap_flags,
2684                                                   copy_object,
2685                                                   copy_offset,
2686                                                   copy,
2687                                                   cur_protection,
2688                                                   max_protection,
2689                                                   inheritance);
2690                                 if (kr != KERN_SUCCESS) {
2691                                         if (copy_entry->is_sub_map) {
2692                                                 vm_map_deallocate(copy_submap);
2693                                         } else {
2694                                                 vm_object_deallocate(copy_object);
2695                                         }
2696                                         /* abort */
2697                                         break;
2698                                 }
2699
2700                                 /* next mapping */
2701                                 copy_addr += copy_size;
2702                         }
2703
2704                         if (kr == KERN_SUCCESS) {
2705                                 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2706                                         *address = map_addr + offset_in_mapping;
2707                                 } else {
2708                                         *address = map_addr;
2709                                 }
2710                         }
2711                         named_entry_unlock(named_entry);
2712
2713                         if (kr != KERN_SUCCESS) {
2714                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
2715                                         /* deallocate the contiguous range */
2716                                         (void) vm_deallocate(target_map,
2717                                                              map_addr,
2718                                                              map_size);
2719                                 }
2720                         }
2721
2722                         return kr;
2723
2724                 } else {
2725                         /* This is the case where we are going to map */
2726                         /* an already mapped object.  If the object is */
2727                         /* not ready it is internal.  An external     */
2728                         /* object cannot be mapped until it is ready  */
2729                         /* we can therefore avoid the ready check     */
2730                         /* in this case.  */
2731                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2732                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
2733                                 offset = vm_object_trunc_page(offset);
2734                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2735                         }
2736
2737                         object = named_entry->backing.object;
2738                         assert(object != VM_OBJECT_NULL);
2739                         named_entry_unlock(named_entry);
2740                         vm_object_reference(object);
2741                 }
2742         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2743                 /*
2744                  * JMM - This is temporary until we unify named entries
2745                  * and raw memory objects.
2746                  *
2747                  * Detected fake ip_kotype for a memory object.  In
2748                  * this case, the port isn't really a port at all, but
2749                  * instead is just a raw memory object.
2750                  */
2751                 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2752                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2753                 }
2754
2755                 object = vm_object_enter((memory_object_t)port,
2756                                          size, FALSE, FALSE, FALSE);
2757                 if (object == VM_OBJECT_NULL)
2758                         return KERN_INVALID_OBJECT;
2759
2760                 /* wait for object (if any) to be ready */
2761                 if (object != VM_OBJECT_NULL) {
2762                         if (object == kernel_object) {
2763                                 printf("Warning: Attempt to map kernel object"
2764                                         " by a non-private kernel entity\n");
2765                                 return KERN_INVALID_OBJECT;
2766                         }
2767                         if (!object->pager_ready) {
2768                                 vm_object_lock(object);
2769
2770                                 while (!object->pager_ready) {
2771                                         vm_object_wait(object,
2772                                                        VM_OBJECT_EVENT_PAGER_READY,
2773                                                        THREAD_UNINT);
2774                                         vm_object_lock(object);
2775                                 }
2776                                 vm_object_unlock(object);
2777                         }
2778                 }
2779         } else {
2780                 return KERN_INVALID_OBJECT;
2781         }
2782
2783         if (object != VM_OBJECT_NULL &&
2784             object->named &&
2785             object->pager != MEMORY_OBJECT_NULL &&
2786             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2787                 memory_object_t pager;
2788                 vm_prot_t       pager_prot;
2789                 kern_return_t   kr;
2790
2791                 /*
2792                  * For "named" VM objects, let the pager know that the
2793                  * memory object is being mapped.  Some pagers need to keep
2794                  * track of this, to know when they can reclaim the memory
2795                  * object, for example.
2796                  * VM calls memory_object_map() for each mapping (specifying
2797                  * the protection of each mapping) and calls
2798                  * memory_object_last_unmap() when all the mappings are gone.
2799                  */
2800                 pager_prot = max_protection;
2801                 if (copy) {
2802                         /*
2803                          * Copy-On-Write mapping: won't modify the
2804                          * memory object.
2805                          */
2806                         pager_prot &= ~VM_PROT_WRITE;
2807                 }
2808                 vm_object_lock(object);
2809                 pager = object->pager;
2810                 if (object->named &&
2811                     pager != MEMORY_OBJECT_NULL &&
2812                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2813                         assert(object->pager_ready);
2814                         vm_object_mapping_wait(object, THREAD_UNINT);
2815                         vm_object_mapping_begin(object);
2816                         vm_object_unlock(object);
2817
2818                         kr = memory_object_map(pager, pager_prot);
2819                         assert(kr == KERN_SUCCESS);
2820
2821                         vm_object_lock(object);
2822                         vm_object_mapping_end(object);
2823                 }
2824                 vm_object_unlock(object);
2825         }
2826
2827         /*
2828          *      Perform the copy if requested
2829          */
2830
2831         if (copy) {
2832                 vm_object_t             new_object;
2833                 vm_object_offset_t      new_offset;
2834
2835                 result = vm_object_copy_strategically(object, offset, size,
2836                                                       &new_object, &new_offset,
2837                                                       &copy);
2838
2839
2840                 if (result == KERN_MEMORY_RESTART_COPY) {
2841                         boolean_t success;
2842                         boolean_t src_needs_copy;
2843
2844                         /*
2845                          * XXX
2846                          * We currently ignore src_needs_copy.
2847                          * This really is the issue of how to make
2848                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2849                          * non-kernel users to use. Solution forthcoming.
2850                          * In the meantime, since we don't allow non-kernel
2851                          * memory managers to specify symmetric copy,
2852                          * we won't run into problems here.
2853                          */
2854                         new_object = object;
2855                         new_offset = offset;
2856                         success = vm_object_copy_quickly(&new_object,
2857                                                          new_offset, size,
2858                                                          &src_needs_copy,
2859                                                          &copy);
2860                         assert(success);
2861                         result = KERN_SUCCESS;
2862                 }
2863                 /*
2864                  *      Throw away the reference to the
2865                  *      original object, as it won't be mapped.
2866                  */
2867
2868                 vm_object_deallocate(object);
2869
2870                 if (result != KERN_SUCCESS)
2871                         return result;
2872
2873                 object = new_object;
2874                 offset = new_offset;
2875         }
2876
2877         result = vm_map_enter(target_map,
2878                               &map_addr, map_size,
2879                               (vm_map_offset_t)mask,
2880                               flags,
2881                               object, offset,
2882                               copy,
2883                               cur_protection, max_protection, inheritance);
2884         if (result != KERN_SUCCESS)
2885                 vm_object_deallocate(object);
2886
2887         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2888                 *address = map_addr + offset_in_mapping;
2889         } else {
2890                 *address = map_addr;
2891         }
2892         return result;
2893 }
2894
2895
2896
2897
2898 kern_return_t
2899 vm_map_enter_mem_object_control(
2900         vm_map_t                target_map,
2901         vm_map_offset_t         *address,
2902         vm_map_size_t           initial_size,
2903         vm_map_offset_t         mask,
2904         int                     flags,
2905         memory_object_control_t control,
2906         vm_object_offset_t      offset,
2907         boolean_t               copy,
2908         vm_prot_t               cur_protection,
2909         vm_prot_t               max_protection,
2910         vm_inherit_t            inheritance)
2911 {
2912         vm_map_address_t        map_addr;
2913         vm_map_size_t           map_size;
2914         vm_object_t             object;
2915         vm_object_size_t        size;
2916         kern_return_t           result;
2917         memory_object_t         pager;
2918         vm_prot_t               pager_prot;
2919         kern_return_t           kr;
2920
2921         /*
2922          * Check arguments for validity
2923          */
2924         if ((target_map == VM_MAP_NULL) ||
2925             (cur_protection & ~VM_PROT_ALL) ||
2926             (max_protection & ~VM_PROT_ALL) ||
2927             (inheritance > VM_INHERIT_LAST_VALID) ||
2928             initial_size == 0)
2929                 return KERN_INVALID_ARGUMENT;
2930
2931         map_addr = vm_map_trunc_page(*address,
2932                                      VM_MAP_PAGE_MASK(target_map));
2933         map_size = vm_map_round_page(initial_size,
2934                                      VM_MAP_PAGE_MASK(target_map));
2935         size = vm_object_round_page(initial_size);
2936
2937         object = memory_object_control_to_vm_object(control);
2938
2939         if (object == VM_OBJECT_NULL)
2940                 return KERN_INVALID_OBJECT;
2941
2942         if (object == kernel_object) {
2943                 printf("Warning: Attempt to map kernel object"
2944                        " by a non-private kernel entity\n");
2945                 return KERN_INVALID_OBJECT;
2946         }
2947
2948         vm_object_lock(object);
2949         object->ref_count++;
2950         vm_object_res_reference(object);
2951
2952         /*
2953          * For "named" VM objects, let the pager know that the
2954          * memory object is being mapped.  Some pagers need to keep
2955          * track of this, to know when they can reclaim the memory
2956          * object, for example.
2957          * VM calls memory_object_map() for each mapping (specifying
2958          * the protection of each mapping) and calls
2959          * memory_object_last_unmap() when all the mappings are gone.
2960          */
2961         pager_prot = max_protection;
2962         if (copy) {
2963                 pager_prot &= ~VM_PROT_WRITE;
2964         }
2965         pager = object->pager;
2966         if (object->named &&
2967             pager != MEMORY_OBJECT_NULL &&
2968             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2969                 assert(object->pager_ready);
2970                 vm_object_mapping_wait(object, THREAD_UNINT);
2971                 vm_object_mapping_begin(object);
2972                 vm_object_unlock(object);
2973
2974                 kr = memory_object_map(pager, pager_prot);
2975                 assert(kr == KERN_SUCCESS);
2976
2977                 vm_object_lock(object);
2978                 vm_object_mapping_end(object);
2979         }
2980         vm_object_unlock(object);
2981
2982         /*
2983          *      Perform the copy if requested
2984          */
2985
2986         if (copy) {
2987                 vm_object_t             new_object;
2988                 vm_object_offset_t      new_offset;
2989
2990                 result = vm_object_copy_strategically(object, offset, size,
2991                                                       &new_object, &new_offset,
2992                                                       &copy);
2993
2994
2995                 if (result == KERN_MEMORY_RESTART_COPY) {
2996                         boolean_t success;
2997                         boolean_t src_needs_copy;
2998
2999                         /*
3000                          * XXX
3001                          * We currently ignore src_needs_copy.
3002                          * This really is the issue of how to make
3003                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3004                          * non-kernel users to use. Solution forthcoming.
3005                          * In the meantime, since we don't allow non-kernel
3006                          * memory managers to specify symmetric copy,
3007                          * we won't run into problems here.
3008                          */
3009                         new_object = object;
3010                         new_offset = offset;
3011                         success = vm_object_copy_quickly(&new_object,
3012                                                          new_offset, size,
3013                                                          &src_needs_copy,
3014                                                          &copy);
3015                         assert(success);
3016                         result = KERN_SUCCESS;
3017                 }
3018                 /*
3019                  *      Throw away the reference to the
3020                  *      original object, as it won't be mapped.
3021                  */
3022
3023                 vm_object_deallocate(object);
3024
3025                 if (result != KERN_SUCCESS)
3026                         return result;
3027
3028                 object = new_object;
3029                 offset = new_offset;
3030         }
3031
3032         result = vm_map_enter(target_map,
3033                               &map_addr, map_size,
3034                               (vm_map_offset_t)mask,
3035                               flags,
3036                               object, offset,
3037                               copy,
3038                               cur_protection, max_protection, inheritance);
3039         if (result != KERN_SUCCESS)
3040                 vm_object_deallocate(object);
3041         *address = map_addr;
3042
3043         return result;
3044 }
3045
3046
3047 #if     VM_CPM
3048
3049 #ifdef MACH_ASSERT
3050 extern pmap_paddr_t     avail_start, avail_end;
3051 #endif
3052
3053 /*
3054  *      Allocate memory in the specified map, with the caveat that
3055  *      the memory is physically contiguous.  This call may fail
3056  *      if the system can't find sufficient contiguous memory.
3057  *      This call may cause or lead to heart-stopping amounts of
3058  *      paging activity.
3059  *
3060  *      Memory obtained from this call should be freed in the
3061  *      normal way, viz., via vm_deallocate.
3062  */
3063 kern_return_t
3064 vm_map_enter_cpm(
3065         vm_map_t                map,
3066         vm_map_offset_t *addr,
3067         vm_map_size_t           size,
3068         int                     flags)
3069 {
3070         vm_object_t             cpm_obj;
3071         pmap_t                  pmap;
3072         vm_page_t               m, pages;
3073         kern_return_t           kr;
3074         vm_map_offset_t         va, start, end, offset;
3075 #if     MACH_ASSERT
3076         vm_map_offset_t         prev_addr = 0;
3077 #endif  /* MACH_ASSERT */
3078
3079         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3080
3081         if (size == 0) {
3082                 *addr = 0;
3083                 return KERN_SUCCESS;
3084         }
3085         if (anywhere)
3086                 *addr = vm_map_min(map);
3087         else
3088                 *addr = vm_map_trunc_page(*addr,
3089                                           VM_MAP_PAGE_MASK(map));
3090         size = vm_map_round_page(size,
3091                                  VM_MAP_PAGE_MASK(map));
3092
3093         /*
3094          * LP64todo - cpm_allocate should probably allow
3095          * allocations of >4GB, but not with the current
3096          * algorithm, so just cast down the size for now.
3097          */
3098         if (size > VM_MAX_ADDRESS)
3099                 return KERN_RESOURCE_SHORTAGE;
3100         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3101                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3102                 return kr;
3103
3104         cpm_obj = vm_object_allocate((vm_object_size_t)size);
3105         assert(cpm_obj != VM_OBJECT_NULL);
3106         assert(cpm_obj->internal);
3107         assert(cpm_obj->vo_size == (vm_object_size_t)size);
3108         assert(cpm_obj->can_persist == FALSE);
3109         assert(cpm_obj->pager_created == FALSE);
3110         assert(cpm_obj->pageout == FALSE);
3111         assert(cpm_obj->shadow == VM_OBJECT_NULL);
3112
3113         /*
3114          *      Insert pages into object.
3115          */
3116
3117         vm_object_lock(cpm_obj);
3118         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3119                 m = pages;
3120                 pages = NEXT_PAGE(m);
3121                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3122
3123                 assert(!m->gobbled);
3124                 assert(!m->wanted);
3125                 assert(!m->pageout);
3126                 assert(!m->tabled);
3127                 assert(VM_PAGE_WIRED(m));
3128                 /*
3129                  * ENCRYPTED SWAP:
3130                  * "m" is not supposed to be pageable, so it
3131                  * should not be encrypted.  It wouldn't be safe
3132                  * to enter it in a new VM object while encrypted.
3133                  */
3134                 ASSERT_PAGE_DECRYPTED(m);
3135                 assert(m->busy);
3136                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3137
3138                 m->busy = FALSE;
3139                 vm_page_insert(m, cpm_obj, offset);
3140         }
3141         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3142         vm_object_unlock(cpm_obj);
3143
3144         /*
3145          *      Hang onto a reference on the object in case a
3146          *      multi-threaded application for some reason decides
3147          *      to deallocate the portion of the address space into
3148          *      which we will insert this object.
3149          *
3150          *      Unfortunately, we must insert the object now before
3151          *      we can talk to the pmap module about which addresses
3152          *      must be wired down.  Hence, the race with a multi-
3153          *      threaded app.
3154          */
3155         vm_object_reference(cpm_obj);
3156
3157         /*
3158          *      Insert object into map.
3159          */
3160
3161         kr = vm_map_enter(
3162                 map,
3163                 addr,
3164                 size,
3165                 (vm_map_offset_t)0,
3166                 flags,
3167                 cpm_obj,
3168                 (vm_object_offset_t)0,
3169                 FALSE,
3170                 VM_PROT_ALL,
3171                 VM_PROT_ALL,
3172                 VM_INHERIT_DEFAULT);
3173
3174         if (kr != KERN_SUCCESS) {
3175                 /*
3176                  *      A CPM object doesn't have can_persist set,
3177                  *      so all we have to do is deallocate it to
3178                  *      free up these pages.
3179                  */
3180                 assert(cpm_obj->pager_created == FALSE);
3181                 assert(cpm_obj->can_persist == FALSE);
3182                 assert(cpm_obj->pageout == FALSE);
3183                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3184                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3185                 vm_object_deallocate(cpm_obj); /* kill creation ref */
3186         }
3187
3188         /*
3189          *      Inform the physical mapping system that the
3190          *      range of addresses may not fault, so that
3191          *      page tables and such can be locked down as well.
3192          */
3193         start = *addr;
3194         end = start + size;
3195         pmap = vm_map_pmap(map);
3196         pmap_pageable(pmap, start, end, FALSE);
3197
3198         /*
3199          *      Enter each page into the pmap, to avoid faults.
3200          *      Note that this loop could be coded more efficiently,
3201          *      if the need arose, rather than looking up each page
3202          *      again.
3203          */
3204         for (offset = 0, va = start; offset < size;
3205              va += PAGE_SIZE, offset += PAGE_SIZE) {
3206                 int type_of_fault;
3207
3208                 vm_object_lock(cpm_obj);
3209                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3210                 assert(m != VM_PAGE_NULL);
3211
3212                 vm_page_zero_fill(m);
3213
3214                 type_of_fault = DBG_ZERO_FILL_FAULT;
3215
3216                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3217                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
3218                                &type_of_fault);
3219
3220                 vm_object_unlock(cpm_obj);
3221         }
3222
3223 #if     MACH_ASSERT
3224         /*
3225          *      Verify ordering in address space.
3226          */
3227         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3228                 vm_object_lock(cpm_obj);
3229                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3230                 vm_object_unlock(cpm_obj);
3231                 if (m == VM_PAGE_NULL)
3232                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3233                               cpm_obj, (uint64_t)offset);
3234                 assert(m->tabled);
3235                 assert(!m->busy);
3236                 assert(!m->wanted);
3237                 assert(!m->fictitious);
3238                 assert(!m->private);
3239                 assert(!m->absent);
3240                 assert(!m->error);
3241                 assert(!m->cleaning);
3242                 assert(!m->laundry);
3243                 assert(!m->precious);
3244                 assert(!m->clustered);
3245                 if (offset != 0) {
3246                         if (m->phys_page != prev_addr + 1) {
3247                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3248                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
3249                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3250                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3251                                 panic("vm_allocate_cpm:  pages not contig!");
3252                         }
3253                 }
3254                 prev_addr = m->phys_page;
3255         }
3256 #endif  /* MACH_ASSERT */
3257
3258         vm_object_deallocate(cpm_obj); /* kill extra ref */
3259
3260         return kr;
3261 }
3262
3263
3264 #else   /* VM_CPM */
3265
3266 /*
3267  *      Interface is defined in all cases, but unless the kernel
3268  *      is built explicitly for this option, the interface does
3269  *      nothing.
3270  */
3271
3272 kern_return_t
3273 vm_map_enter_cpm(
3274         __unused vm_map_t       map,
3275         __unused vm_map_offset_t        *addr,
3276         __unused vm_map_size_t  size,
3277         __unused int            flags)
3278 {
3279         return KERN_FAILURE;
3280 }
3281 #endif /* VM_CPM */
3282
3283 /* Not used without nested pmaps */
3284 #ifndef NO_NESTED_PMAP
3285 /*
3286  * Clip and unnest a portion of a nested submap mapping.
3287  */
3288
3289
3290 static void
3291 vm_map_clip_unnest(
3292         vm_map_t        map,
3293         vm_map_entry_t  entry,
3294         vm_map_offset_t start_unnest,
3295         vm_map_offset_t end_unnest)
3296 {
3297         vm_map_offset_t old_start_unnest = start_unnest;
3298         vm_map_offset_t old_end_unnest = end_unnest;
3299
3300         assert(entry->is_sub_map);
3301         assert(entry->object.sub_map != NULL);
3302
3303         /*
3304          * Query the platform for the optimal unnest range.
3305          * DRK: There's some duplication of effort here, since
3306          * callers may have adjusted the range to some extent. This
3307          * routine was introduced to support 1GiB subtree nesting
3308          * for x86 platforms, which can also nest on 2MiB boundaries
3309          * depending on size/alignment.
3310          */
3311         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3312                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3313         }
3314
3315         if (entry->vme_start > start_unnest ||
3316             entry->vme_end < end_unnest) {
3317                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3318                       "bad nested entry: start=0x%llx end=0x%llx\n",
3319                       (long long)start_unnest, (long long)end_unnest,
3320                       (long long)entry->vme_start, (long long)entry->vme_end);
3321         }
3322
3323         if (start_unnest > entry->vme_start) {
3324                 _vm_map_clip_start(&map->hdr,
3325                                    entry,
3326                                    start_unnest);
3327                 vm_map_store_update_first_free(map, map->first_free);
3328         }
3329         if (entry->vme_end > end_unnest) {
3330                 _vm_map_clip_end(&map->hdr,
3331                                  entry,
3332                                  end_unnest);
3333                 vm_map_store_update_first_free(map, map->first_free);
3334         }
3335
3336         pmap_unnest(map->pmap,
3337                     entry->vme_start,
3338                     entry->vme_end - entry->vme_start);
3339         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3340                 /* clean up parent map/maps */
3341                 vm_map_submap_pmap_clean(
3342                         map, entry->vme_start,
3343                         entry->vme_end,
3344                         entry->object.sub_map,
3345                         entry->offset);
3346         }
3347         entry->use_pmap = FALSE;
3348         if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3349                 entry->alias = VM_MEMORY_UNSHARED_PMAP;
3350         }
3351 }
3352 #endif  /* NO_NESTED_PMAP */
3353
3354 /*
3355  *      vm_map_clip_start:      [ internal use only ]
3356  *
3357  *      Asserts that the given entry begins at or after
3358  *      the specified address; if necessary,
3359  *      it splits the entry into two.
3360  */
3361 void
3362 vm_map_clip_start(
3363         vm_map_t        map,
3364         vm_map_entry_t  entry,
3365         vm_map_offset_t startaddr)
3366 {
3367 #ifndef NO_NESTED_PMAP
3368         if (entry->use_pmap &&
3369             startaddr >= entry->vme_start) {
3370                 vm_map_offset_t start_unnest, end_unnest;
3371
3372                 /*
3373                  * Make sure "startaddr" is no longer in a nested range
3374                  * before we clip.  Unnest only the minimum range the platform
3375                  * can handle.
3376                  * vm_map_clip_unnest may perform additional adjustments to
3377                  * the unnest range.
3378                  */
3379                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3380                 end_unnest = start_unnest + pmap_nesting_size_min;
3381                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3382         }
3383 #endif /* NO_NESTED_PMAP */
3384         if (startaddr > entry->vme_start) {
3385                 if (entry->object.vm_object &&
3386                     !entry->is_sub_map &&
3387                     entry->object.vm_object->phys_contiguous) {
3388                         pmap_remove(map->pmap,
3389                                     (addr64_t)(entry->vme_start),
3390                                     (addr64_t)(entry->vme_end));
3391                 }
3392                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3393                 vm_map_store_update_first_free(map, map->first_free);
3394         }
3395 }
3396
3397
3398 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3399         MACRO_BEGIN \
3400         if ((startaddr) > (entry)->vme_start) \
3401                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3402         MACRO_END
3403
3404 /*
3405  *      This routine is called only when it is known that
3406  *      the entry must be split.
3407  */
3408 static void
3409 _vm_map_clip_start(
3410         register struct vm_map_header   *map_header,
3411         register vm_map_entry_t         entry,
3412         register vm_map_offset_t                start)
3413 {
3414         register vm_map_entry_t new_entry;
3415
3416         /*
3417          *      Split off the front portion --
3418          *      note that we must insert the new
3419          *      entry BEFORE this one, so that
3420          *      this entry has the specified starting
3421          *      address.
3422          */
3423
3424         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3425         vm_map_entry_copy_full(new_entry, entry);
3426
3427         assert(VM_MAP_PAGE_ALIGNED(start,
3428                                    VM_MAP_HDR_PAGE_MASK(map_header)));
3429         new_entry->vme_end = start;
3430         assert(new_entry->vme_start < new_entry->vme_end);
3431         entry->offset += (start - entry->vme_start);
3432         assert(start < entry->vme_end);
3433         assert(VM_MAP_PAGE_ALIGNED(start,
3434                                    VM_MAP_HDR_PAGE_MASK(map_header)));
3435         entry->vme_start = start;
3436
3437         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3438
3439         if (entry->is_sub_map)
3440                 vm_map_reference(new_entry->object.sub_map);
3441         else
3442                 vm_object_reference(new_entry->object.vm_object);
3443 }
3444
3445
3446 /*
3447  *      vm_map_clip_end:        [ internal use only ]
3448  *
3449  *      Asserts that the given entry ends at or before
3450  *      the specified address; if necessary,
3451  *      it splits the entry into two.
3452  */
3453 void
3454 vm_map_clip_end(
3455         vm_map_t        map,
3456         vm_map_entry_t  entry,
3457         vm_map_offset_t endaddr)
3458 {
3459         if (endaddr > entry->vme_end) {
3460                 /*
3461                  * Within the scope of this clipping, limit "endaddr" to
3462                  * the end of this map entry...
3463                  */
3464                 endaddr = entry->vme_end;
3465         }
3466 #ifndef NO_NESTED_PMAP
3467         if (entry->use_pmap) {
3468                 vm_map_offset_t start_unnest, end_unnest;
3469
3470                 /*
3471                  * Make sure the range between the start of this entry and
3472                  * the new "endaddr" is no longer nested before we clip.
3473                  * Unnest only the minimum range the platform can handle.
3474                  * vm_map_clip_unnest may perform additional adjustments to
3475                  * the unnest range.
3476                  */
3477                 start_unnest = entry->vme_start;
3478                 end_unnest =
3479                         (endaddr + pmap_nesting_size_min - 1) &
3480                         ~(pmap_nesting_size_min - 1);
3481                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3482         }
3483 #endif /* NO_NESTED_PMAP */
3484         if (endaddr < entry->vme_end) {
3485                 if (entry->object.vm_object &&
3486                     !entry->is_sub_map &&
3487                     entry->object.vm_object->phys_contiguous) {
3488                         pmap_remove(map->pmap,
3489                                     (addr64_t)(entry->vme_start),
3490                                     (addr64_t)(entry->vme_end));
3491                 }
3492                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3493                 vm_map_store_update_first_free(map, map->first_free);
3494         }
3495 }
3496
3497
3498 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3499         MACRO_BEGIN \
3500         if ((endaddr) < (entry)->vme_end) \
3501                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3502         MACRO_END
3503
3504 /*
3505  *      This routine is called only when it is known that
3506  *      the entry must be split.
3507  */
3508 static void
3509 _vm_map_clip_end(
3510         register struct vm_map_header   *map_header,
3511         register vm_map_entry_t         entry,
3512         register vm_map_offset_t        end)
3513 {
3514         register vm_map_entry_t new_entry;
3515
3516         /*
3517          *      Create a new entry and insert it
3518          *      AFTER the specified entry
3519          */
3520
3521         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3522         vm_map_entry_copy_full(new_entry, entry);
3523
3524         assert(entry->vme_start < end);
3525         assert(VM_MAP_PAGE_ALIGNED(end,
3526                                    VM_MAP_HDR_PAGE_MASK(map_header)));
3527         new_entry->vme_start = entry->vme_end = end;
3528         new_entry->offset += (end - entry->vme_start);
3529         assert(new_entry->vme_start < new_entry->vme_end);
3530
3531         _vm_map_store_entry_link(map_header, entry, new_entry);
3532
3533         if (entry->is_sub_map)
3534                 vm_map_reference(new_entry->object.sub_map);
3535         else
3536                 vm_object_reference(new_entry->object.vm_object);
3537 }
3538
3539
3540 /*
3541  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3542  *
3543  *      Asserts that the starting and ending region
3544  *      addresses fall within the valid range of the map.
3545  */
3546 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3547         MACRO_BEGIN                             \
3548         if (start < vm_map_min(map))            \
3549                 start = vm_map_min(map);        \
3550         if (end > vm_map_max(map))              \
3551                 end = vm_map_max(map);          \
3552         if (start > end)                        \
3553                 start = end;                    \
3554         MACRO_END
3555
3556 /*
3557  *      vm_map_range_check:     [ internal use only ]
3558  *
3559  *      Check that the region defined by the specified start and
3560  *      end addresses are wholly contained within a single map
3561  *      entry or set of adjacent map entries of the spacified map,
3562  *      i.e. the specified region contains no unmapped space.
3563  *      If any or all of the region is unmapped, FALSE is returned.
3564  *      Otherwise, TRUE is returned and if the output argument 'entry'
3565  *      is not NULL it points to the map entry containing the start
3566  *      of the region.
3567  *
3568  *      The map is locked for reading on entry and is left locked.
3569  */
3570 static boolean_t
3571 vm_map_range_check(
3572         register vm_map_t       map,
3573         register vm_map_offset_t        start,
3574         register vm_map_offset_t        end,
3575         vm_map_entry_t          *entry)
3576 {
3577         vm_map_entry_t          cur;
3578         register vm_map_offset_t        prev;
3579
3580         /*
3581          *      Basic sanity checks first
3582          */
3583         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3584                 return (FALSE);
3585
3586         /*
3587          *      Check first if the region starts within a valid
3588          *      mapping for the map.
3589          */
3590         if (!vm_map_lookup_entry(map, start, &cur))
3591                 return (FALSE);
3592
3593         /*
3594          *      Optimize for the case that the region is contained
3595          *      in a single map entry.
3596          */
3597         if (entry != (vm_map_entry_t *) NULL)
3598                 *entry = cur;
3599         if (end <= cur->vme_end)
3600                 return (TRUE);
3601
3602         /*
3603          *      If the region is not wholly contained within a
3604          *      single entry, walk the entries looking for holes.
3605          */
3606         prev = cur->vme_end;
3607         cur = cur->vme_next;
3608         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3609                 if (end <= cur->vme_end)
3610                         return (TRUE);
3611                 prev = cur->vme_end;
3612                 cur = cur->vme_next;
3613         }
3614         return (FALSE);
3615 }
3616
3617 /*
3618  *      vm_map_submap:          [ kernel use only ]
3619  *
3620  *      Mark the given range as handled by a subordinate map.
3621  *
3622  *      This range must have been created with vm_map_find using
3623  *      the vm_submap_object, and no other operations may have been
3624  *      performed on this range prior to calling vm_map_submap.
3625  *
3626  *      Only a limited number of operations can be performed
3627  *      within this rage after calling vm_map_submap:
3628  *              vm_fault
3629  *      [Don't try vm_map_copyin!]
3630  *
3631  *      To remove a submapping, one must first remove the
3632  *      range from the superior map, and then destroy the
3633  *      submap (if desired).  [Better yet, don't try it.]
3634  */
3635 kern_return_t
3636 vm_map_submap(
3637         vm_map_t                map,
3638         vm_map_offset_t start,
3639         vm_map_offset_t end,
3640         vm_map_t                submap,
3641         vm_map_offset_t offset,
3642 #ifdef NO_NESTED_PMAP
3643         __unused
3644 #endif  /* NO_NESTED_PMAP */
3645         boolean_t               use_pmap)
3646 {
3647         vm_map_entry_t          entry;
3648         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3649         register vm_object_t    object;
3650
3651         vm_map_lock(map);
3652
3653         if (! vm_map_lookup_entry(map, start, &entry)) {
3654                 entry = entry->vme_next;
3655         }
3656
3657         if (entry == vm_map_to_entry(map) ||
3658             entry->is_sub_map) {
3659                 vm_map_unlock(map);
3660                 return KERN_INVALID_ARGUMENT;
3661         }
3662
3663         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3664         vm_map_clip_start(map, entry, start);
3665         vm_map_clip_end(map, entry, end);
3666
3667         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3668             (!entry->is_sub_map) &&
3669             ((object = entry->object.vm_object) == vm_submap_object) &&
3670             (object->resident_page_count == 0) &&
3671             (object->copy == VM_OBJECT_NULL) &&
3672             (object->shadow == VM_OBJECT_NULL) &&
3673             (!object->pager_created)) {
3674                 entry->offset = (vm_object_offset_t)offset;
3675                 entry->object.vm_object = VM_OBJECT_NULL;
3676                 vm_object_deallocate(object);
3677                 entry->is_sub_map = TRUE;
3678                 entry->object.sub_map = submap;
3679                 vm_map_reference(submap);
3680                 if (submap->mapped_in_other_pmaps == FALSE &&
3681                     vm_map_pmap(submap) != PMAP_NULL &&
3682                     vm_map_pmap(submap) != vm_map_pmap(map)) {
3683                         /*
3684                          * This submap is being mapped in a map
3685                          * that uses a different pmap.
3686                          * Set its "mapped_in_other_pmaps" flag
3687                          * to indicate that we now need to
3688                          * remove mappings from all pmaps rather
3689                          * than just the submap's pmap.
3690                          */
3691                         submap->mapped_in_other_pmaps = TRUE;
3692                 }
3693
3694 #ifndef NO_NESTED_PMAP
3695                 if (use_pmap) {
3696                         /* nest if platform code will allow */
3697                         if(submap->pmap == NULL) {
3698                                 ledger_t ledger = map->pmap->ledger;
3699                                 submap->pmap = pmap_create(ledger,
3700                                                 (vm_map_size_t) 0, FALSE);
3701                                 if(submap->pmap == PMAP_NULL) {
3702                                         vm_map_unlock(map);
3703                                         return(KERN_NO_SPACE);
3704                                 }
3705                         }
3706                         result = pmap_nest(map->pmap,
3707                                            (entry->object.sub_map)->pmap,
3708                                            (addr64_t)start,
3709                                            (addr64_t)start,
3710                                            (uint64_t)(end - start));
3711                         if(result)
3712                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3713                         entry->use_pmap = TRUE;
3714                 }
3715 #else   /* NO_NESTED_PMAP */
3716                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3717 #endif  /* NO_NESTED_PMAP */
3718                 result = KERN_SUCCESS;
3719         }
3720         vm_map_unlock(map);
3721
3722         return(result);
3723 }
3724
3725 /*
3726  *      vm_map_protect:
3727  *
3728  *      Sets the protection of the specified address
3729  *      region in the target map.  If "set_max" is
3730  *      specified, the maximum protection is to be set;
3731  *      otherwise, only the current protection is affected.
3732  */
3733 kern_return_t
3734 vm_map_protect(
3735         register vm_map_t       map,
3736         register vm_map_offset_t        start,
3737         register vm_map_offset_t        end,
3738         register vm_prot_t      new_prot,
3739         register boolean_t      set_max)
3740 {
3741         register vm_map_entry_t         current;
3742         register vm_map_offset_t        prev;
3743         vm_map_entry_t                  entry;
3744         vm_prot_t                       new_max;
3745
3746         XPR(XPR_VM_MAP,
3747             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3748             map, start, end, new_prot, set_max);
3749
3750         vm_map_lock(map);
3751
3752         /* LP64todo - remove this check when vm_map_commpage64()
3753          * no longer has to stuff in a map_entry for the commpage
3754          * above the map's max_offset.
3755          */
3756         if (start >= map->max_offset) {
3757                 vm_map_unlock(map);
3758                 return(KERN_INVALID_ADDRESS);
3759         }
3760
3761         while(1) {
3762                 /*
3763                  *      Lookup the entry.  If it doesn't start in a valid
3764                  *      entry, return an error.
3765                  */
3766                 if (! vm_map_lookup_entry(map, start, &entry)) {
3767                         vm_map_unlock(map);
3768                         return(KERN_INVALID_ADDRESS);
3769                 }
3770
3771                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3772                         start = SUPERPAGE_ROUND_DOWN(start);
3773                         continue;
3774                 }
3775                 break;
3776         }
3777         if (entry->superpage_size)
3778                 end = SUPERPAGE_ROUND_UP(end);
3779
3780         /*
3781          *      Make a first pass to check for protection and address
3782          *      violations.
3783          */
3784
3785         current = entry;
3786         prev = current->vme_start;
3787         while ((current != vm_map_to_entry(map)) &&
3788                (current->vme_start < end)) {
3789
3790                 /*
3791                  * If there is a hole, return an error.
3792                  */
3793                 if (current->vme_start != prev) {
3794                         vm_map_unlock(map);
3795                         return(KERN_INVALID_ADDRESS);
3796                 }
3797
3798                 new_max = current->max_protection;
3799                 if(new_prot & VM_PROT_COPY) {
3800                         new_max |= VM_PROT_WRITE;
3801                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3802                                 vm_map_unlock(map);
3803                                 return(KERN_PROTECTION_FAILURE);
3804                         }
3805                 } else {
3806                         if ((new_prot & new_max) != new_prot) {
3807                                 vm_map_unlock(map);
3808                                 return(KERN_PROTECTION_FAILURE);
3809                         }
3810                 }
3811
3812
3813                 prev = current->vme_end;
3814                 current = current->vme_next;
3815         }
3816         if (end > prev) {
3817                 vm_map_unlock(map);
3818                 return(KERN_INVALID_ADDRESS);
3819         }
3820
3821         /*
3822          *      Go back and fix up protections.
3823          *      Clip to start here if the range starts within
3824          *      the entry.
3825          */
3826
3827         current = entry;
3828         if (current != vm_map_to_entry(map)) {
3829                 /* clip and unnest if necessary */
3830                 vm_map_clip_start(map, current, start);
3831         }
3832
3833         while ((current != vm_map_to_entry(map)) &&
3834                (current->vme_start < end)) {
3835
3836                 vm_prot_t       old_prot;
3837
3838                 vm_map_clip_end(map, current, end);
3839
3840                 assert(!current->use_pmap); /* clipping did unnest if needed */
3841
3842                 old_prot = current->protection;
3843
3844                 if(new_prot & VM_PROT_COPY) {
3845                         /* caller is asking specifically to copy the      */
3846                         /* mapped data, this implies that max protection  */
3847                         /* will include write.  Caller must be prepared   */
3848                         /* for loss of shared memory communication in the */
3849                         /* target area after taking this step */
3850
3851                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3852                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3853                                 current->offset = 0;
3854                         }
3855                         current->needs_copy = TRUE;
3856                         current->max_protection |= VM_PROT_WRITE;
3857                 }
3858
3859                 if (set_max)
3860                         current->protection =
3861                                 (current->max_protection =
3862                                  new_prot & ~VM_PROT_COPY) &
3863                                 old_prot;
3864                 else
3865                         current->protection = new_prot & ~VM_PROT_COPY;
3866
3867                 /*
3868                  *      Update physical map if necessary.
3869                  *      If the request is to turn off write protection,
3870                  *      we won't do it for real (in pmap). This is because
3871                  *      it would cause copy-on-write to fail.  We've already
3872                  *      set, the new protection in the map, so if a
3873                  *      write-protect fault occurred, it will be fixed up
3874                  *      properly, COW or not.
3875                  */
3876                 if (current->protection != old_prot) {
3877                         /* Look one level in we support nested pmaps */
3878                         /* from mapped submaps which are direct entries */
3879                         /* in our map */
3880
3881                         vm_prot_t prot;
3882
3883                         prot = current->protection & ~VM_PROT_WRITE;
3884
3885                         if (override_nx(map, current->alias) && prot)
3886                                 prot |= VM_PROT_EXECUTE;
3887
3888                         if (current->is_sub_map && current->use_pmap) {
3889                                 pmap_protect(current->object.sub_map->pmap,
3890                                              current->vme_start,
3891                                              current->vme_end,
3892                                              prot);
3893                         } else {
3894                                 pmap_protect(map->pmap,
3895                                              current->vme_start,
3896                                              current->vme_end,
3897                                              prot);
3898                         }
3899                 }
3900                 current = current->vme_next;
3901         }
3902
3903         current = entry;
3904         while ((current != vm_map_to_entry(map)) &&
3905                (current->vme_start <= end)) {
3906                 vm_map_simplify_entry(map, current);
3907                 current = current->vme_next;
3908         }
3909
3910         vm_map_unlock(map);
3911         return(KERN_SUCCESS);
3912 }
3913
3914 /*
3915  *      vm_map_inherit:
3916  *
3917  *      Sets the inheritance of the specified address
3918  *      range in the target map.  Inheritance
3919  *      affects how the map will be shared with
3920  *      child maps at the time of vm_map_fork.
3921  */
3922 kern_return_t
3923 vm_map_inherit(
3924         register vm_map_t       map,
3925         register vm_map_offset_t        start,
3926         register vm_map_offset_t        end,
3927         register vm_inherit_t   new_inheritance)
3928 {
3929         register vm_map_entry_t entry;
3930         vm_map_entry_t  temp_entry;
3931
3932         vm_map_lock(map);
3933
3934         VM_MAP_RANGE_CHECK(map, start, end);
3935
3936         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3937                 entry = temp_entry;
3938         }
3939         else {
3940                 temp_entry = temp_entry->vme_next;
3941                 entry = temp_entry;
3942         }
3943
3944         /* first check entire range for submaps which can't support the */
3945         /* given inheritance. */
3946         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3947                 if(entry->is_sub_map) {
3948                         if(new_inheritance == VM_INHERIT_COPY) {
3949                                 vm_map_unlock(map);
3950                                 return(KERN_INVALID_ARGUMENT);
3951                         }
3952                 }
3953
3954                 entry = entry->vme_next;
3955         }
3956
3957         entry = temp_entry;
3958         if (entry != vm_map_to_entry(map)) {
3959                 /* clip and unnest if necessary */
3960                 vm_map_clip_start(map, entry, start);
3961         }
3962
3963         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3964                 vm_map_clip_end(map, entry, end);
3965                 assert(!entry->use_pmap); /* clip did unnest if needed */
3966
3967                 entry->inheritance = new_inheritance;
3968
3969                 entry = entry->vme_next;
3970         }
3971
3972         vm_map_unlock(map);
3973         return(KERN_SUCCESS);
3974 }
3975
3976 /*
3977  * Update the accounting for the amount of wired memory in this map.  If the user has
3978  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3979  */
3980
3981 static kern_return_t
3982 add_wire_counts(
3983         vm_map_t        map,
3984         vm_map_entry_t  entry,
3985         boolean_t       user_wire)
3986 {
3987         vm_map_size_t   size;
3988
3989         if (user_wire) {
3990                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3991
3992                 /*
3993                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3994                  * this map entry.
3995                  */
3996
3997                 if (entry->user_wired_count == 0) {
3998                         size = entry->vme_end - entry->vme_start;
3999
4000                         /*
4001                          * Since this is the first time the user is wiring this map entry, check to see if we're
4002                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4003                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4004                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4005                          * limit, then we fail.
4006                          */
4007
4008                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4009                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4010                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4011                                 return KERN_RESOURCE_SHORTAGE;
4012
4013                         /*
4014                          * The first time the user wires an entry, we also increment the wired_count and add this to
4015                          * the total that has been wired in the map.
4016                          */
4017
4018                         if (entry->wired_count >= MAX_WIRE_COUNT)
4019                                 return KERN_FAILURE;
4020
4021                         entry->wired_count++;
4022                         map->user_wire_size += size;
4023                 }
4024
4025                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4026                         return KERN_FAILURE;
4027
4028                 entry->user_wired_count++;
4029
4030         } else {
4031
4032                 /*
4033                  * The kernel's wiring the memory.  Just bump the count and continue.
4034                  */
4035
4036                 if (entry->wired_count >= MAX_WIRE_COUNT)
4037                         panic("vm_map_wire: too many wirings");
4038
4039                 entry->wired_count++;
4040         }
4041
4042         return KERN_SUCCESS;
4043 }
4044
4045 /*
4046  * Update the memory wiring accounting now that the given map entry is being unwired.
4047  */
4048
4049 static void
4050 subtract_wire_counts(
4051         vm_map_t        map,
4052         vm_map_entry_t  entry,
4053         boolean_t       user_wire)
4054 {
4055
4056         if (user_wire) {
4057
4058                 /*
4059                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4060                  */
4061
4062                 if (entry->user_wired_count == 1) {
4063
4064                         /*
4065                          * We're removing the last user wire reference.  Decrement the wired_count and the total
4066                          * user wired memory for this map.
4067                          */
4068
4069                         assert(entry->wired_count >= 1);
4070                         entry->wired_count--;
4071                         map->user_wire_size -= entry->vme_end - entry->vme_start;
4072                 }
4073
4074                 assert(entry->user_wired_count >= 1);
4075                 entry->user_wired_count--;
4076
4077         } else {
4078
4079                 /*
4080                  * The kernel is unwiring the memory.   Just update the count.
4081                  */
4082
4083                 assert(entry->wired_count >= 1);
4084                 entry->wired_count--;
4085         }
4086 }
4087
4088 /*
4089  *      vm_map_wire:
4090  *
4091  *      Sets the pageability of the specified address range in the
4092  *      target map as wired.  Regions specified as not pageable require
4093  *      locked-down physical memory and physical page maps.  The
4094  *      access_type variable indicates types of accesses that must not
4095  *      generate page faults.  This is checked against protection of
4096  *      memory being locked-down.
4097  *
4098  *      The map must not be locked, but a reference must remain to the
4099  *      map throughout the call.
4100  */
4101 static kern_return_t
4102 vm_map_wire_nested(
4103         register vm_map_t       map,
4104         register vm_map_offset_t        start,
4105         register vm_map_offset_t        end,
4106         register vm_prot_t      access_type,
4107         boolean_t               user_wire,
4108         pmap_t                  map_pmap,
4109         vm_map_offset_t         pmap_addr)
4110 {
4111         register vm_map_entry_t entry;
4112         struct vm_map_entry     *first_entry, tmp_entry;
4113         vm_map_t                real_map;
4114         register vm_map_offset_t        s,e;
4115         kern_return_t           rc;
4116         boolean_t               need_wakeup;
4117         boolean_t               main_map = FALSE;
4118         wait_interrupt_t        interruptible_state;
4119         thread_t                cur_thread;
4120         unsigned int            last_timestamp;
4121         vm_map_size_t           size;
4122
4123         vm_map_lock(map);
4124         if(map_pmap == NULL)
4125                 main_map = TRUE;
4126         last_timestamp = map->timestamp;
4127
4128         VM_MAP_RANGE_CHECK(map, start, end);
4129         assert(page_aligned(start));
4130         assert(page_aligned(end));
4131         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4132         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4133         if (start == end) {
4134                 /* We wired what the caller asked for, zero pages */
4135                 vm_map_unlock(map);
4136                 return KERN_SUCCESS;
4137         }
4138
4139         need_wakeup = FALSE;
4140         cur_thread = current_thread();
4141
4142         s = start;
4143         rc = KERN_SUCCESS;
4144
4145         if (vm_map_lookup_entry(map, s, &first_entry)) {
4146                 entry = first_entry;
4147                 /*
4148                  * vm_map_clip_start will be done later.
4149                  * We don't want to unnest any nested submaps here !
4150                  */
4151         } else {
4152                 /* Start address is not in map */
4153                 rc = KERN_INVALID_ADDRESS;
4154                 goto done;
4155         }
4156
4157         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4158                 /*
4159                  * At this point, we have wired from "start" to "s".
4160                  * We still need to wire from "s" to "end".
4161                  *
4162                  * "entry" hasn't been clipped, so it could start before "s"
4163                  * and/or end after "end".
4164                  */
4165
4166                 /* "e" is how far we want to wire in this entry */
4167                 e = entry->vme_end;
4168                 if (e > end)
4169                         e = end;
4170
4171                 /*
4172                  * If another thread is wiring/unwiring this entry then
4173                  * block after informing other thread to wake us up.
4174                  */
4175                 if (entry->in_transition) {
4176                         wait_result_t wait_result;
4177
4178                         /*
4179                          * We have not clipped the entry.  Make sure that
4180                          * the start address is in range so that the lookup
4181                          * below will succeed.
4182                          * "s" is the current starting point: we've already
4183                          * wired from "start" to "s" and we still have
4184                          * to wire from "s" to "end".
4185                          */
4186
4187                         entry->needs_wakeup = TRUE;
4188
4189                         /*
4190                          * wake up anybody waiting on entries that we have
4191                          * already wired.
4192                          */
4193                         if (need_wakeup) {
4194                                 vm_map_entry_wakeup(map);
4195                                 need_wakeup = FALSE;
4196                         }
4197                         /*
4198                          * User wiring is interruptible
4199                          */
4200                         wait_result = vm_map_entry_wait(map,
4201                                                         (user_wire) ? THREAD_ABORTSAFE :
4202                                                         THREAD_UNINT);
4203                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
4204                                 /*
4205                                  * undo the wirings we have done so far
4206                                  * We do not clear the needs_wakeup flag,
4207                                  * because we cannot tell if we were the
4208                                  * only one waiting.
4209                                  */
4210                                 rc = KERN_FAILURE;
4211                                 goto done;
4212                         }
4213
4214                         /*
4215                          * Cannot avoid a lookup here. reset timestamp.
4216                          */
4217                         last_timestamp = map->timestamp;
4218
4219                         /*
4220                          * The entry could have been clipped, look it up again.
4221                          * Worse that can happen is, it may not exist anymore.
4222                          */
4223                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4224                                 /*
4225                                  * User: undo everything upto the previous
4226                                  * entry.  let vm_map_unwire worry about
4227                                  * checking the validity of the range.
4228                                  */
4229                                 rc = KERN_FAILURE;
4230                                 goto done;
4231                         }
4232                         entry = first_entry;
4233                         continue;
4234                 }
4235
4236                 if (entry->is_sub_map) {
4237                         vm_map_offset_t sub_start;
4238                         vm_map_offset_t sub_end;
4239                         vm_map_offset_t local_start;
4240                         vm_map_offset_t local_end;
4241                         pmap_t          pmap;
4242
4243                         vm_map_clip_start(map, entry, s);
4244                         vm_map_clip_end(map, entry, end);
4245
4246                         sub_start = entry->offset;
4247                         sub_end = entry->vme_end;
4248                         sub_end += entry->offset - entry->vme_start;
4249
4250                         local_end = entry->vme_end;
4251                         if(map_pmap == NULL) {
4252                                 vm_object_t             object;
4253                                 vm_object_offset_t      offset;
4254                                 vm_prot_t               prot;
4255                                 boolean_t               wired;
4256                                 vm_map_entry_t          local_entry;
4257                                 vm_map_version_t         version;
4258                                 vm_map_t                lookup_map;
4259
4260                                 if(entry->use_pmap) {
4261                                         pmap = entry->object.sub_map->pmap;
4262                                         /* ppc implementation requires that */
4263                                         /* submaps pmap address ranges line */
4264                                         /* up with parent map */
4265 #ifdef notdef
4266                                         pmap_addr = sub_start;
4267 #endif
4268                                         pmap_addr = s;
4269                                 } else {
4270                                         pmap = map->pmap;
4271                                         pmap_addr = s;
4272                                 }
4273
4274                                 if (entry->wired_count) {
4275                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4276                                                 goto done;
4277
4278                                         /*
4279                                          * The map was not unlocked:
4280                                          * no need to goto re-lookup.
4281                                          * Just go directly to next entry.
4282                                          */
4283                                         entry = entry->vme_next;
4284                                         s = entry->vme_start;
4285                                         continue;
4286
4287                                 }
4288
4289                                 /* call vm_map_lookup_locked to */
4290                                 /* cause any needs copy to be   */
4291                                 /* evaluated */
4292                                 local_start = entry->vme_start;
4293                                 lookup_map = map;
4294                                 vm_map_lock_write_to_read(map);
4295                                 if(vm_map_lookup_locked(
4296                                            &lookup_map, local_start,
4297                                            access_type,
4298                                            OBJECT_LOCK_EXCLUSIVE,
4299                                            &version, &object,
4300                                            &offset, &prot, &wired,
4301                                            NULL,
4302                                            &real_map)) {
4303
4304                                         vm_map_unlock_read(lookup_map);
4305                                         vm_map_unwire(map, start,
4306                                                       s, user_wire);
4307                                         return(KERN_FAILURE);
4308                                 }
4309                                 vm_object_unlock(object);
4310                                 if(real_map != lookup_map)
4311                                         vm_map_unlock(real_map);
4312                                 vm_map_unlock_read(lookup_map);
4313                                 vm_map_lock(map);
4314
4315                                 /* we unlocked, so must re-lookup */
4316                                 if (!vm_map_lookup_entry(map,
4317                                                          local_start,
4318                                                          &local_entry)) {
4319                                         rc = KERN_FAILURE;
4320                                         goto done;
4321                                 }
4322
4323                                 /*
4324                                  * entry could have been "simplified",
4325                                  * so re-clip
4326                                  */
4327                                 entry = local_entry;
4328                                 assert(s == local_start);
4329                                 vm_map_clip_start(map, entry, s);
4330                                 vm_map_clip_end(map, entry, end);
4331                                 /* re-compute "e" */
4332                                 e = entry->vme_end;
4333                                 if (e > end)
4334                                         e = end;
4335
4336                                 /* did we have a change of type? */
4337                                 if (!entry->is_sub_map) {
4338                                         last_timestamp = map->timestamp;
4339                                         continue;
4340                                 }
4341                         } else {
4342                                 local_start = entry->vme_start;
4343                                 pmap = map_pmap;
4344                         }
4345
4346                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4347                                 goto done;
4348
4349                         entry->in_transition = TRUE;
4350
4351                         vm_map_unlock(map);
4352                         rc = vm_map_wire_nested(entry->object.sub_map,
4353                                                 sub_start, sub_end,
4354                                                 access_type,
4355                                                 user_wire, pmap, pmap_addr);
4356                         vm_map_lock(map);
4357
4358                         /*
4359                          * Find the entry again.  It could have been clipped
4360                          * after we unlocked the map.
4361                          */
4362                         if (!vm_map_lookup_entry(map, local_start,
4363                                                  &first_entry))
4364                                 panic("vm_map_wire: re-lookup failed");
4365                         entry = first_entry;
4366
4367                         assert(local_start == s);
4368                         /* re-compute "e" */
4369                         e = entry->vme_end;
4370                         if (e > end)
4371                                 e = end;
4372
4373                         last_timestamp = map->timestamp;
4374                         while ((entry != vm_map_to_entry(map)) &&
4375                                (entry->vme_start < e)) {
4376                                 assert(entry->in_transition);
4377                                 entry->in_transition = FALSE;
4378                                 if (entry->needs_wakeup) {
4379                                         entry->needs_wakeup = FALSE;
4380                                         need_wakeup = TRUE;
4381                                 }
4382                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4383                                         subtract_wire_counts(map, entry, user_wire);
4384                                 }
4385                                 entry = entry->vme_next;
4386                         }
4387                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4388                                 goto done;
4389                         }
4390
4391                         /* no need to relookup again */
4392                         s = entry->vme_start;
4393                         continue;
4394                 }
4395
4396                 /*
4397                  * If this entry is already wired then increment
4398                  * the appropriate wire reference count.
4399                  */
4400                 if (entry->wired_count) {
4401                         /*
4402                          * entry is already wired down, get our reference
4403                          * after clipping to our range.
4404                          */
4405                         vm_map_clip_start(map, entry, s);
4406                         vm_map_clip_end(map, entry, end);
4407
4408                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4409                                 goto done;
4410
4411                         /* map was not unlocked: no need to relookup */
4412                         entry = entry->vme_next;
4413                         s = entry->vme_start;
4414                         continue;
4415                 }
4416
4417                 /*
4418                  * Unwired entry or wire request transmitted via submap
4419                  */
4420
4421
4422                 /*
4423                  * Perform actions of vm_map_lookup that need the write
4424                  * lock on the map: create a shadow object for a
4425                  * copy-on-write region, or an object for a zero-fill
4426                  * region.
4427                  */
4428                 size = entry->vme_end - entry->vme_start;
4429                 /*
4430                  * If wiring a copy-on-write page, we need to copy it now
4431                  * even if we're only (currently) requesting read access.
4432                  * This is aggressive, but once it's wired we can't move it.
4433                  */
4434                 if (entry->needs_copy) {
4435                         vm_object_shadow(&entry->object.vm_object,
4436                                          &entry->offset, size);
4437                         entry->needs_copy = FALSE;
4438                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4439                         entry->object.vm_object = vm_object_allocate(size);
4440                         entry->offset = (vm_object_offset_t)0;
4441                 }
4442
4443                 vm_map_clip_start(map, entry, s);
4444                 vm_map_clip_end(map, entry, end);
4445
4446                 /* re-compute "e" */
4447                 e = entry->vme_end;
4448                 if (e > end)
4449                         e = end;
4450
4451                 /*
4452                  * Check for holes and protection mismatch.
4453                  * Holes: Next entry should be contiguous unless this
4454                  *        is the end of the region.
4455                  * Protection: Access requested must be allowed, unless
4456                  *      wiring is by protection class
4457                  */
4458                 if ((entry->vme_end < end) &&
4459                     ((entry->vme_next == vm_map_to_entry(map)) ||
4460                      (entry->vme_next->vme_start > entry->vme_end))) {
4461                         /* found a hole */
4462                         rc = KERN_INVALID_ADDRESS;
4463                         goto done;
4464                 }
4465                 if ((entry->protection & access_type) != access_type) {
4466                         /* found a protection problem */
4467                         rc = KERN_PROTECTION_FAILURE;
4468                         goto done;
4469                 }
4470
4471                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4472
4473                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4474                         goto done;
4475
4476                 entry->in_transition = TRUE;
4477
4478                 /*
4479                  * This entry might get split once we unlock the map.
4480                  * In vm_fault_wire(), we need the current range as
4481                  * defined by this entry.  In order for this to work
4482                  * along with a simultaneous clip operation, we make a
4483                  * temporary copy of this entry and use that for the
4484                  * wiring.  Note that the underlying objects do not
4485                  * change during a clip.
4486                  */
4487                 tmp_entry = *entry;
4488
4489                 /*
4490                  * The in_transition state guarentees that the entry
4491                  * (or entries for this range, if split occured) will be
4492                  * there when the map lock is acquired for the second time.
4493                  */
4494                 vm_map_unlock(map);
4495
4496                 if (!user_wire && cur_thread != THREAD_NULL)
4497                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4498                 else
4499                         interruptible_state = THREAD_UNINT;
4500
4501                 if(map_pmap)
4502                         rc = vm_fault_wire(map,
4503                                            &tmp_entry, map_pmap, pmap_addr);
4504                 else
4505                         rc = vm_fault_wire(map,
4506                                            &tmp_entry, map->pmap,
4507                                            tmp_entry.vme_start);
4508
4509                 if (!user_wire && cur_thread != THREAD_NULL)
4510                         thread_interrupt_level(interruptible_state);
4511
4512                 vm_map_lock(map);
4513
4514                 if (last_timestamp+1 != map->timestamp) {
4515                         /*
4516                          * Find the entry again.  It could have been clipped
4517                          * after we unlocked the map.
4518                          */
4519                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4520                                                  &first_entry))
4521                                 panic("vm_map_wire: re-lookup failed");
4522
4523                         entry = first_entry;
4524                 }
4525
4526                 last_timestamp = map->timestamp;
4527
4528                 while ((entry != vm_map_to_entry(map)) &&
4529                        (entry->vme_start < tmp_entry.vme_end)) {
4530                         assert(entry->in_transition);
4531                         entry->in_transition = FALSE;
4532                         if (entry->needs_wakeup) {
4533                                 entry->needs_wakeup = FALSE;
4534                                 need_wakeup = TRUE;
4535                         }
4536                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4537                                 subtract_wire_counts(map, entry, user_wire);
4538                         }
4539                         entry = entry->vme_next;
4540                 }
4541
4542                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4543                         goto done;
4544                 }
4545
4546                 s = entry->vme_start;
4547         } /* end while loop through map entries */
4548
4549 done:
4550         if (rc == KERN_SUCCESS) {
4551                 /* repair any damage we may have made to the VM map */
4552                 vm_map_simplify_range(map, start, end);
4553         }
4554
4555         vm_map_unlock(map);
4556
4557         /*
4558          * wake up anybody waiting on entries we wired.
4559          */
4560         if (need_wakeup)
4561                 vm_map_entry_wakeup(map);
4562
4563         if (rc != KERN_SUCCESS) {
4564                 /* undo what has been wired so far */
4565                 vm_map_unwire(map, start, s, user_wire);
4566         }
4567
4568         return rc;
4569
4570 }
4571
4572 kern_return_t
4573 vm_map_wire(
4574         register vm_map_t       map,
4575         register vm_map_offset_t        start,
4576         register vm_map_offset_t        end,
4577         register vm_prot_t      access_type,
4578         boolean_t               user_wire)
4579 {
4580
4581         kern_return_t   kret;
4582
4583         kret = vm_map_wire_nested(map, start, end, access_type,
4584                                   user_wire, (pmap_t)NULL, 0);
4585         return kret;
4586 }
4587
4588 /*
4589  *      vm_map_unwire:
4590  *
4591  *      Sets the pageability of the specified address range in the target
4592  *      as pageable.  Regions specified must have been wired previously.
4593  *
4594  *      The map must not be locked, but a reference must remain to the map
4595  *      throughout the call.
4596  *
4597  *      Kernel will panic on failures.  User unwire ignores holes and
4598  *      unwired and intransition entries to avoid losing memory by leaving
4599  *      it unwired.
4600  */
4601 static kern_return_t
4602 vm_map_unwire_nested(
4603         register vm_map_t       map,
4604         register vm_map_offset_t        start,
4605         register vm_map_offset_t        end,
4606         boolean_t               user_wire,
4607         pmap_t                  map_pmap,
4608         vm_map_offset_t         pmap_addr)
4609 {
4610         register vm_map_entry_t entry;
4611         struct vm_map_entry     *first_entry, tmp_entry;
4612         boolean_t               need_wakeup;
4613         boolean_t               main_map = FALSE;
4614         unsigned int            last_timestamp;
4615
4616         vm_map_lock(map);
4617         if(map_pmap == NULL)
4618                 main_map = TRUE;
4619         last_timestamp = map->timestamp;
4620
4621         VM_MAP_RANGE_CHECK(map, start, end);
4622         assert(page_aligned(start));
4623         assert(page_aligned(end));
4624         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4625         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4626
4627         if (start == end) {
4628                 /* We unwired what the caller asked for: zero pages */
4629                 vm_map_unlock(map);
4630                 return KERN_SUCCESS;
4631         }
4632
4633         if (vm_map_lookup_entry(map, start, &first_entry)) {
4634                 entry = first_entry;
4635                 /*
4636                  * vm_map_clip_start will be done later.
4637                  * We don't want to unnest any nested sub maps here !
4638                  */
4639         }
4640         else {
4641                 if (!user_wire) {
4642                         panic("vm_map_unwire: start not found");
4643                 }
4644                 /*      Start address is not in map. */
4645                 vm_map_unlock(map);
4646                 return(KERN_INVALID_ADDRESS);
4647         }
4648
4649         if (entry->superpage_size) {
4650                 /* superpages are always wired */
4651                 vm_map_unlock(map);
4652                 return KERN_INVALID_ADDRESS;
4653         }
4654
4655         need_wakeup = FALSE;
4656         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4657                 if (entry->in_transition) {
4658                         /*
4659                          * 1)
4660                          * Another thread is wiring down this entry. Note
4661                          * that if it is not for the other thread we would
4662                          * be unwiring an unwired entry.  This is not
4663                          * permitted.  If we wait, we will be unwiring memory
4664                          * we did not wire.
4665                          *
4666                          * 2)
4667                          * Another thread is unwiring this entry.  We did not
4668                          * have a reference to it, because if we did, this
4669                          * entry will not be getting unwired now.
4670                          */
4671                         if (!user_wire) {
4672                                 /*
4673                                  * XXX FBDP
4674                                  * This could happen:  there could be some
4675                                  * overlapping vslock/vsunlock operations
4676                                  * going on.
4677                                  * We should probably just wait and retry,
4678                                  * but then we have to be careful that this
4679                                  * entry could get "simplified" after
4680                                  * "in_transition" gets unset and before
4681                                  * we re-lookup the entry, so we would
4682                                  * have to re-clip the entry to avoid
4683                                  * re-unwiring what we have already unwired...
4684                                  * See vm_map_wire_nested().
4685                                  *
4686                                  * Or we could just ignore "in_transition"
4687                                  * here and proceed to decement the wired
4688                                  * count(s) on this entry.  That should be fine
4689                                  * as long as "wired_count" doesn't drop all
4690                                  * the way to 0 (and we should panic if THAT
4691                                  * happens).
4692                                  */
4693                                 panic("vm_map_unwire: in_transition entry");
4694                         }
4695
4696                         entry = entry->vme_next;
4697                         continue;
4698                 }
4699
4700                 if (entry->is_sub_map) {
4701                         vm_map_offset_t sub_start;
4702                         vm_map_offset_t sub_end;
4703                         vm_map_offset_t local_end;
4704                         pmap_t          pmap;
4705
4706                         vm_map_clip_start(map, entry, start);
4707                         vm_map_clip_end(map, entry, end);
4708
4709                         sub_start = entry->offset;
4710                         sub_end = entry->vme_end - entry->vme_start;
4711                         sub_end += entry->offset;
4712                         local_end = entry->vme_end;
4713                         if(map_pmap == NULL) {
4714                                 if(entry->use_pmap) {
4715                                         pmap = entry->object.sub_map->pmap;
4716                                         pmap_addr = sub_start;
4717                                 } else {
4718                                         pmap = map->pmap;
4719                                         pmap_addr = start;
4720                                 }
4721                                 if (entry->wired_count == 0 ||
4722                                     (user_wire && entry->user_wired_count == 0)) {
4723                                         if (!user_wire)
4724                                                 panic("vm_map_unwire: entry is unwired");
4725                                         entry = entry->vme_next;
4726                                         continue;
4727                                 }
4728
4729                                 /*
4730                                  * Check for holes
4731                                  * Holes: Next entry should be contiguous unless
4732                                  * this is the end of the region.
4733                                  */
4734                                 if (((entry->vme_end < end) &&
4735                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4736                                       (entry->vme_next->vme_start
4737                                        > entry->vme_end)))) {
4738                                         if (!user_wire)
4739                                                 panic("vm_map_unwire: non-contiguous region");
4740 /*
4741                                         entry = entry->vme_next;
4742                                         continue;
4743 */
4744                                 }
4745
4746                                 subtract_wire_counts(map, entry, user_wire);
4747
4748                                 if (entry->wired_count != 0) {
4749                                         entry = entry->vme_next;
4750                                         continue;
4751                                 }
4752
4753                                 entry->in_transition = TRUE;
4754                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4755
4756                                 /*
4757                                  * We can unlock the map now. The in_transition state
4758                                  * guarantees existance of the entry.
4759                                  */
4760                                 vm_map_unlock(map);
4761                                 vm_map_unwire_nested(entry->object.sub_map,
4762                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4763                                 vm_map_lock(map);
4764
4765                                 if (last_timestamp+1 != map->timestamp) {
4766                                         /*
4767                                          * Find the entry again.  It could have been
4768                                          * clipped or deleted after we unlocked the map.
4769                                          */
4770                                         if (!vm_map_lookup_entry(map,
4771                                                                  tmp_entry.vme_start,
4772                                                                  &first_entry)) {
4773                                                 if (!user_wire)
4774                                                         panic("vm_map_unwire: re-lookup failed");
4775                                                 entry = first_entry->vme_next;
4776                                         } else
4777                                                 entry = first_entry;
4778                                 }
4779                                 last_timestamp = map->timestamp;
4780
4781                                 /*
4782                                  * clear transition bit for all constituent entries
4783                                  * that were in the original entry (saved in
4784                                  * tmp_entry).  Also check for waiters.
4785                                  */
4786                                 while ((entry != vm_map_to_entry(map)) &&
4787                                        (entry->vme_start < tmp_entry.vme_end)) {
4788                                         assert(entry->in_transition);
4789                                         entry->in_transition = FALSE;
4790                                         if (entry->needs_wakeup) {
4791                                                 entry->needs_wakeup = FALSE;
4792                                                 need_wakeup = TRUE;
4793                                         }
4794                                         entry = entry->vme_next;
4795                                 }
4796                                 continue;
4797                         } else {
4798                                 vm_map_unlock(map);
4799                                 vm_map_unwire_nested(entry->object.sub_map,
4800                                                      sub_start, sub_end, user_wire, map_pmap,
4801                                                      pmap_addr);
4802                                 vm_map_lock(map);
4803
4804                                 if (last_timestamp+1 != map->timestamp) {
4805                                         /*
4806                                          * Find the entry again.  It could have been
4807                                          * clipped or deleted after we unlocked the map.
4808                                          */
4809                                         if (!vm_map_lookup_entry(map,
4810                                                                  tmp_entry.vme_start,
4811                                                                  &first_entry)) {
4812                                                 if (!user_wire)
4813                                                         panic("vm_map_unwire: re-lookup failed");
4814                                                 entry = first_entry->vme_next;
4815                                         } else
4816                                                 entry = first_entry;
4817                                 }
4818                                 last_timestamp = map->timestamp;
4819                         }
4820                 }
4821
4822
4823                 if ((entry->wired_count == 0) ||
4824                     (user_wire && entry->user_wired_count == 0)) {
4825                         if (!user_wire)
4826                                 panic("vm_map_unwire: entry is unwired");
4827
4828                         entry = entry->vme_next;
4829                         continue;
4830                 }
4831
4832                 assert(entry->wired_count > 0 &&
4833                        (!user_wire || entry->user_wired_count > 0));
4834
4835                 vm_map_clip_start(map, entry, start);
4836                 vm_map_clip_end(map, entry, end);
4837
4838                 /*
4839                  * Check for holes
4840                  * Holes: Next entry should be contiguous unless
4841                  *        this is the end of the region.
4842                  */
4843                 if (((entry->vme_end < end) &&
4844                      ((entry->vme_next == vm_map_to_entry(map)) ||
4845                       (entry->vme_next->vme_start > entry->vme_end)))) {
4846
4847                         if (!user_wire)
4848                                 panic("vm_map_unwire: non-contiguous region");
4849                         entry = entry->vme_next;
4850                         continue;
4851                 }
4852
4853                 subtract_wire_counts(map, entry, user_wire);
4854
4855                 if (entry->wired_count != 0) {
4856                         entry = entry->vme_next;
4857                         continue;
4858                 }
4859
4860                 if(entry->zero_wired_pages) {
4861                         entry->zero_wired_pages = FALSE;
4862                 }
4863
4864                 entry->in_transition = TRUE;
4865                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4866
4867                 /*
4868                  * We can unlock the map now. The in_transition state
4869                  * guarantees existance of the entry.
4870                  */
4871                 vm_map_unlock(map);
4872                 if(map_pmap) {
4873                         vm_fault_unwire(map,
4874                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4875                 } else {
4876                         vm_fault_unwire(map,
4877                                         &tmp_entry, FALSE, map->pmap,
4878                                         tmp_entry.vme_start);
4879                 }
4880                 vm_map_lock(map);
4881
4882                 if (last_timestamp+1 != map->timestamp) {
4883                         /*
4884                          * Find the entry again.  It could have been clipped
4885                          * or deleted after we unlocked the map.
4886                          */
4887                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4888                                                  &first_entry)) {
4889                                 if (!user_wire)
4890                                         panic("vm_map_unwire: re-lookup failed");
4891                                 entry = first_entry->vme_next;
4892                         } else
4893                                 entry = first_entry;
4894                 }
4895                 last_timestamp = map->timestamp;
4896
4897                 /*
4898                  * clear transition bit for all constituent entries that
4899                  * were in the original entry (saved in tmp_entry).  Also
4900                  * check for waiters.
4901                  */
4902                 while ((entry != vm_map_to_entry(map)) &&
4903                        (entry->vme_start < tmp_entry.vme_end)) {
4904                         assert(entry->in_transition);
4905                         entry->in_transition = FALSE;
4906                         if (entry->needs_wakeup) {
4907                                 entry->needs_wakeup = FALSE;
4908                                 need_wakeup = TRUE;
4909                         }
4910                         entry = entry->vme_next;
4911                 }
4912         }
4913
4914         /*
4915          * We might have fragmented the address space when we wired this
4916          * range of addresses.  Attempt to re-coalesce these VM map entries
4917          * with their neighbors now that they're no longer wired.
4918          * Under some circumstances, address space fragmentation can
4919          * prevent VM object shadow chain collapsing, which can cause
4920          * swap space leaks.
4921          */
4922         vm_map_simplify_range(map, start, end);
4923
4924         vm_map_unlock(map);
4925         /*
4926          * wake up anybody waiting on entries that we have unwired.
4927          */
4928         if (need_wakeup)
4929                 vm_map_entry_wakeup(map);
4930         return(KERN_SUCCESS);
4931
4932 }
4933
4934 kern_return_t
4935 vm_map_unwire(
4936         register vm_map_t       map,
4937         register vm_map_offset_t        start,
4938         register vm_map_offset_t        end,
4939         boolean_t               user_wire)
4940 {
4941         return vm_map_unwire_nested(map, start, end,
4942                                     user_wire, (pmap_t)NULL, 0);
4943 }
4944
4945
4946 /*
4947  *      vm_map_entry_delete:    [ internal use only ]
4948  *
4949  *      Deallocate the given entry from the target map.
4950  */
4951 static void
4952 vm_map_entry_delete(
4953         register vm_map_t       map,
4954         register vm_map_entry_t entry)
4955 {
4956         register vm_map_offset_t        s, e;
4957         register vm_object_t    object;
4958         register vm_map_t       submap;
4959
4960         s = entry->vme_start;
4961         e = entry->vme_end;
4962         assert(page_aligned(s));
4963         assert(page_aligned(e));
4964         if (entry->map_aligned == TRUE) {
4965                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
4966                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
4967         }
4968         assert(entry->wired_count == 0);
4969         assert(entry->user_wired_count == 0);
4970         assert(!entry->permanent);
4971
4972         if (entry->is_sub_map) {
4973                 object = NULL;
4974                 submap = entry->object.sub_map;
4975         } else {
4976                 submap = NULL;
4977                 object = entry->object.vm_object;
4978         }
4979
4980         vm_map_store_entry_unlink(map, entry);
4981         map->size -= e - s;
4982
4983         vm_map_entry_dispose(map, entry);
4984
4985         vm_map_unlock(map);
4986         /*
4987          *      Deallocate the object only after removing all
4988          *      pmap entries pointing to its pages.
4989          */
4990         if (submap)
4991                 vm_map_deallocate(submap);
4992         else
4993                 vm_object_deallocate(object);
4994
4995 }
4996
4997 void
4998 vm_map_submap_pmap_clean(
4999         vm_map_t        map,
5000         vm_map_offset_t start,
5001         vm_map_offset_t end,
5002         vm_map_t        sub_map,
5003         vm_map_offset_t offset)
5004 {
5005         vm_map_offset_t submap_start;
5006         vm_map_offset_t submap_end;
5007         vm_map_size_t   remove_size;
5008         vm_map_entry_t  entry;
5009
5010         submap_end = offset + (end - start);
5011         submap_start = offset;
5012
5013         vm_map_lock_read(sub_map);
5014         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5015
5016                 remove_size = (entry->vme_end - entry->vme_start);
5017                 if(offset > entry->vme_start)
5018                         remove_size -= offset - entry->vme_start;
5019
5020
5021                 if(submap_end < entry->vme_end) {
5022                         remove_size -=
5023                                 entry->vme_end - submap_end;
5024                 }
5025                 if(entry->is_sub_map) {
5026                         vm_map_submap_pmap_clean(
5027                                 sub_map,
5028                                 start,
5029                                 start + remove_size,
5030                                 entry->object.sub_map,
5031                                 entry->offset);
5032                 } else {
5033
5034                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5035                            && (entry->object.vm_object != NULL)) {
5036                                 vm_object_pmap_protect(
5037                                         entry->object.vm_object,
5038                                         entry->offset+(offset-entry->vme_start),
5039                                         remove_size,
5040                                         PMAP_NULL,
5041                                         entry->vme_start,
5042                                         VM_PROT_NONE);
5043                         } else {
5044                                 pmap_remove(map->pmap,
5045                                             (addr64_t)start,
5046                                             (addr64_t)(start + remove_size));
5047                         }
5048                 }
5049         }
5050
5051         entry = entry->vme_next;
5052
5053         while((entry != vm_map_to_entry(sub_map))
5054               && (entry->vme_start < submap_end)) {
5055                 remove_size = (entry->vme_end - entry->vme_start);
5056                 if(submap_end < entry->vme_end) {
5057                         remove_size -= entry->vme_end - submap_end;
5058                 }
5059                 if(entry->is_sub_map) {
5060                         vm_map_submap_pmap_clean(
5061                                 sub_map,
5062                                 (start + entry->vme_start) - offset,
5063                                 ((start + entry->vme_start) - offset) + remove_size,
5064                                 entry->object.sub_map,
5065                                 entry->offset);
5066                 } else {
5067                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5068                            && (entry->object.vm_object != NULL)) {
5069                                 vm_object_pmap_protect(
5070                                         entry->object.vm_object,
5071                                         entry->offset,
5072                                         remove_size,
5073                                         PMAP_NULL,
5074                                         entry->vme_start,
5075                                         VM_PROT_NONE);
5076                         } else {
5077                                 pmap_remove(map->pmap,
5078                                             (addr64_t)((start + entry->vme_start)
5079                                                        - offset),
5080                                             (addr64_t)(((start + entry->vme_start)
5081                                                         - offset) + remove_size));
5082                         }
5083                 }
5084                 entry = entry->vme_next;
5085         }
5086         vm_map_unlock_read(sub_map);
5087         return;
5088 }
5089
5090 /*
5091  *      vm_map_delete:  [ internal use only ]
5092  *
5093  *      Deallocates the given address range from the target map.
5094  *      Removes all user wirings. Unwires one kernel wiring if
5095  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
5096  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
5097  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5098  *
5099  *      This routine is called with map locked and leaves map locked.
5100  */
5101 static kern_return_t
5102 vm_map_delete(
5103         vm_map_t                map,
5104         vm_map_offset_t         start,
5105         vm_map_offset_t         end,
5106         int                     flags,
5107         vm_map_t                zap_map)
5108 {
5109         vm_map_entry_t          entry, next;
5110         struct   vm_map_entry   *first_entry, tmp_entry;
5111         register vm_map_offset_t s;
5112         register vm_object_t    object;
5113         boolean_t               need_wakeup;
5114         unsigned int            last_timestamp = ~0; /* unlikely value */
5115         int                     interruptible;
5116
5117         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5118                 THREAD_ABORTSAFE : THREAD_UNINT;
5119
5120         /*
5121          * All our DMA I/O operations in IOKit are currently done by
5122          * wiring through the map entries of the task requesting the I/O.
5123          * Because of this, we must always wait for kernel wirings
5124          * to go away on the entries before deleting them.
5125          *
5126          * Any caller who wants to actually remove a kernel wiring
5127          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5128          * properly remove one wiring instead of blasting through
5129          * them all.
5130          */
5131         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5132
5133         while(1) {
5134                 /*
5135                  *      Find the start of the region, and clip it
5136                  */
5137                 if (vm_map_lookup_entry(map, start, &first_entry)) {
5138                         entry = first_entry;
5139                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
5140                                 start = SUPERPAGE_ROUND_DOWN(start);
5141                                 continue;
5142                         }
5143                         if (start == entry->vme_start) {
5144                                 /*
5145                                  * No need to clip.  We don't want to cause
5146                                  * any unnecessary unnesting in this case...
5147                                  */
5148                         } else {
5149                                 vm_map_clip_start(map, entry, start);
5150                         }
5151
5152                         /*
5153                          *      Fix the lookup hint now, rather than each
5154                          *      time through the loop.
5155                          */
5156                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5157                 } else {
5158                         entry = first_entry->vme_next;
5159                 }
5160                 break;
5161         }
5162         if (entry->superpage_size)
5163                 end = SUPERPAGE_ROUND_UP(end);
5164
5165         need_wakeup = FALSE;
5166         /*
5167          *      Step through all entries in this region
5168          */
5169         s = entry->vme_start;
5170         while ((entry != vm_map_to_entry(map)) && (s < end)) {
5171                 /*
5172                  * At this point, we have deleted all the memory entries
5173                  * between "start" and "s".  We still need to delete
5174                  * all memory entries between "s" and "end".
5175                  * While we were blocked and the map was unlocked, some
5176                  * new memory entries could have been re-allocated between
5177                  * "start" and "s" and we don't want to mess with those.
5178                  * Some of those entries could even have been re-assembled
5179                  * with an entry after "s" (in vm_map_simplify_entry()), so
5180                  * we may have to vm_map_clip_start() again.
5181                  */
5182
5183                 if (entry->vme_start >= s) {
5184                         /*
5185                          * This entry starts on or after "s"
5186                          * so no need to clip its start.
5187                          */
5188                 } else {
5189                         /*
5190                          * This entry has been re-assembled by a
5191                          * vm_map_simplify_entry().  We need to
5192                          * re-clip its start.
5193                          */
5194                         vm_map_clip_start(map, entry, s);
5195                 }
5196                 if (entry->vme_end <= end) {
5197                         /*
5198                          * This entry is going away completely, so no need
5199                          * to clip and possibly cause an unnecessary unnesting.
5200                          */
5201                 } else {
5202                         vm_map_clip_end(map, entry, end);
5203                 }
5204
5205                 if (entry->permanent) {
5206                         panic("attempt to remove permanent VM map entry "
5207                               "%p [0x%llx:0x%llx]\n",
5208                               entry, (uint64_t) s, (uint64_t) end);
5209                 }
5210
5211
5212                 if (entry->in_transition) {
5213                         wait_result_t wait_result;
5214
5215                         /*
5216                          * Another thread is wiring/unwiring this entry.
5217                          * Let the other thread know we are waiting.
5218                          */
5219                         assert(s == entry->vme_start);
5220                         entry->needs_wakeup = TRUE;
5221
5222                         /*
5223                          * wake up anybody waiting on entries that we have
5224                          * already unwired/deleted.
5225                          */
5226                         if (need_wakeup) {
5227                                 vm_map_entry_wakeup(map);
5228                                 need_wakeup = FALSE;
5229                         }
5230
5231                         wait_result = vm_map_entry_wait(map, interruptible);
5232
5233                         if (interruptible &&
5234                             wait_result == THREAD_INTERRUPTED) {
5235                                 /*
5236                                  * We do not clear the needs_wakeup flag,
5237                                  * since we cannot tell if we were the only one.
5238                                  */
5239                                 vm_map_unlock(map);
5240                                 return KERN_ABORTED;
5241                         }
5242
5243                         /*
5244                          * The entry could have been clipped or it
5245                          * may not exist anymore.  Look it up again.
5246                          */
5247                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
5248                                 assert((map != kernel_map) &&
5249                                        (!entry->is_sub_map));
5250                                 /*
5251                                  * User: use the next entry
5252                                  */
5253                                 entry = first_entry->vme_next;
5254                                 s = entry->vme_start;
5255                         } else {
5256                                 entry = first_entry;
5257                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5258                         }
5259                         last_timestamp = map->timestamp;
5260                         continue;
5261                 } /* end in_transition */
5262
5263                 if (entry->wired_count) {
5264                         boolean_t       user_wire;
5265
5266                         user_wire = entry->user_wired_count > 0;
5267
5268                         /*
5269                          *      Remove a kernel wiring if requested
5270                          */
5271                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
5272                                 entry->wired_count--;
5273                         }
5274
5275                         /*
5276                          *      Remove all user wirings for proper accounting
5277                          */
5278                         if (entry->user_wired_count > 0) {
5279                                 while (entry->user_wired_count)
5280                                         subtract_wire_counts(map, entry, user_wire);
5281                         }
5282
5283                         if (entry->wired_count != 0) {
5284                                 assert(map != kernel_map);
5285                                 /*
5286                                  * Cannot continue.  Typical case is when
5287                                  * a user thread has physical io pending on
5288                                  * on this page.  Either wait for the
5289                                  * kernel wiring to go away or return an
5290                                  * error.
5291                                  */
5292                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5293                                         wait_result_t wait_result;
5294
5295                                         assert(s == entry->vme_start);
5296                                         entry->needs_wakeup = TRUE;
5297                                         wait_result = vm_map_entry_wait(map,
5298                                                                         interruptible);
5299
5300                                         if (interruptible &&
5301                                             wait_result == THREAD_INTERRUPTED) {
5302                                                 /*
5303                                                  * We do not clear the
5304                                                  * needs_wakeup flag, since we
5305                                                  * cannot tell if we were the
5306                                                  * only one.
5307                                                  */
5308                                                 vm_map_unlock(map);
5309                                                 return KERN_ABORTED;
5310                                         }
5311
5312                                         /*
5313                                          * The entry could have been clipped or
5314                                          * it may not exist anymore.  Look it
5315                                          * up again.
5316                                          */
5317                                         if (!vm_map_lookup_entry(map, s,
5318                                                                  &first_entry)) {
5319                                                 assert(map != kernel_map);
5320                                                 /*
5321                                                  * User: use the next entry
5322                                                  */
5323                                                 entry = first_entry->vme_next;
5324                                                 s = entry->vme_start;
5325                                         } else {
5326                                                 entry = first_entry;
5327                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5328                                         }
5329                                         last_timestamp = map->timestamp;
5330                                         continue;
5331                                 }
5332                                 else {
5333                                         return KERN_FAILURE;
5334                                 }
5335                         }
5336
5337                         entry->in_transition = TRUE;
5338                         /*
5339                          * copy current entry.  see comment in vm_map_wire()
5340                          */
5341                         tmp_entry = *entry;
5342                         assert(s == entry->vme_start);
5343
5344                         /*
5345                          * We can unlock the map now. The in_transition
5346                          * state guarentees existance of the entry.
5347                          */
5348                         vm_map_unlock(map);
5349
5350                         if (tmp_entry.is_sub_map) {
5351                                 vm_map_t sub_map;
5352                                 vm_map_offset_t sub_start, sub_end;
5353                                 pmap_t pmap;
5354                                 vm_map_offset_t pmap_addr;
5355
5356
5357                                 sub_map = tmp_entry.object.sub_map;
5358                                 sub_start = tmp_entry.offset;
5359                                 sub_end = sub_start + (tmp_entry.vme_end -
5360                                                        tmp_entry.vme_start);
5361                                 if (tmp_entry.use_pmap) {
5362                                         pmap = sub_map->pmap;
5363                                         pmap_addr = tmp_entry.vme_start;
5364                                 } else {
5365                                         pmap = map->pmap;
5366                                         pmap_addr = tmp_entry.vme_start;
5367                                 }
5368                                 (void) vm_map_unwire_nested(sub_map,
5369                                                             sub_start, sub_end,
5370                                                             user_wire,
5371                                                             pmap, pmap_addr);
5372                         } else {
5373
5374                                 if (tmp_entry.object.vm_object == kernel_object) {
5375                                         pmap_protect_options(
5376                                                 map->pmap,
5377                                                 tmp_entry.vme_start,
5378                                                 tmp_entry.vme_end,
5379                                                 VM_PROT_NONE,
5380                                                 PMAP_OPTIONS_REMOVE,
5381                                                 NULL);
5382                                 }
5383                                 vm_fault_unwire(map, &tmp_entry,
5384                                                 tmp_entry.object.vm_object == kernel_object,
5385                                                 map->pmap, tmp_entry.vme_start);
5386                         }
5387
5388                         vm_map_lock(map);
5389
5390                         if (last_timestamp+1 != map->timestamp) {
5391                                 /*
5392                                  * Find the entry again.  It could have
5393                                  * been clipped after we unlocked the map.
5394                                  */
5395                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
5396                                         assert((map != kernel_map) &&
5397                                                (!entry->is_sub_map));
5398                                         first_entry = first_entry->vme_next;
5399                                         s = first_entry->vme_start;
5400                                 } else {
5401                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5402                                 }
5403                         } else {
5404                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5405                                 first_entry = entry;
5406                         }
5407
5408                         last_timestamp = map->timestamp;
5409
5410                         entry = first_entry;
5411                         while ((entry != vm_map_to_entry(map)) &&
5412                                (entry->vme_start < tmp_entry.vme_end)) {
5413                                 assert(entry->in_transition);
5414                                 entry->in_transition = FALSE;
5415                                 if (entry->needs_wakeup) {
5416                                         entry->needs_wakeup = FALSE;
5417                                         need_wakeup = TRUE;
5418                                 }
5419                                 entry = entry->vme_next;
5420                         }
5421                         /*
5422                          * We have unwired the entry(s).  Go back and
5423                          * delete them.
5424                          */
5425                         entry = first_entry;
5426                         continue;
5427                 }
5428
5429                 /* entry is unwired */
5430                 assert(entry->wired_count == 0);
5431                 assert(entry->user_wired_count == 0);
5432
5433                 assert(s == entry->vme_start);
5434
5435                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5436                         /*
5437                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5438                          * vm_map_delete(), some map entries might have been
5439                          * transferred to a "zap_map", which doesn't have a
5440                          * pmap.  The original pmap has already been flushed
5441                          * in the vm_map_delete() call targeting the original
5442                          * map, but when we get to destroying the "zap_map",
5443                          * we don't have any pmap to flush, so let's just skip
5444                          * all this.
5445                          */
5446                 } else if (entry->is_sub_map) {
5447                         if (entry->use_pmap) {
5448 #ifndef NO_NESTED_PMAP
5449                                 pmap_unnest(map->pmap,
5450                                             (addr64_t)entry->vme_start,
5451                                             entry->vme_end - entry->vme_start);
5452 #endif  /* NO_NESTED_PMAP */
5453                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5454                                         /* clean up parent map/maps */
5455                                         vm_map_submap_pmap_clean(
5456                                                 map, entry->vme_start,
5457                                                 entry->vme_end,
5458                                                 entry->object.sub_map,
5459                                                 entry->offset);
5460                                 }
5461                         } else {
5462                                 vm_map_submap_pmap_clean(
5463                                         map, entry->vme_start, entry->vme_end,
5464                                         entry->object.sub_map,
5465                                         entry->offset);
5466                         }
5467                 } else if (entry->object.vm_object != kernel_object &&
5468                            entry->object.vm_object != compressor_object) {
5469                         object = entry->object.vm_object;
5470                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5471                                 vm_object_pmap_protect_options(
5472                                         object, entry->offset,
5473                                         entry->vme_end - entry->vme_start,
5474                                         PMAP_NULL,
5475                                         entry->vme_start,
5476                                         VM_PROT_NONE,
5477                                         PMAP_OPTIONS_REMOVE);
5478                         } else if ((entry->object.vm_object !=
5479                                     VM_OBJECT_NULL) ||
5480                                    (map->pmap == kernel_pmap)) {
5481                                 /* Remove translations associated
5482                                  * with this range unless the entry
5483                                  * does not have an object, or
5484                                  * it's the kernel map or a descendant
5485                                  * since the platform could potentially
5486                                  * create "backdoor" mappings invisible
5487                                  * to the VM. It is expected that
5488                                  * objectless, non-kernel ranges
5489                                  * do not have such VM invisible
5490                                  * translations.
5491                                  */
5492                                 pmap_remove_options(map->pmap,
5493                                                     (addr64_t)entry->vme_start,
5494                                                     (addr64_t)entry->vme_end,
5495                                                     PMAP_OPTIONS_REMOVE);
5496                         }
5497                 }
5498
5499                 /*
5500                  * All pmap mappings for this map entry must have been
5501                  * cleared by now.
5502                  */
5503                 assert(vm_map_pmap_is_empty(map,
5504                                             entry->vme_start,
5505                                             entry->vme_end));
5506
5507                 next = entry->vme_next;
5508                 s = next->vme_start;
5509                 last_timestamp = map->timestamp;
5510
5511                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5512                     zap_map != VM_MAP_NULL) {
5513                         vm_map_size_t entry_size;
5514                         /*
5515                          * The caller wants to save the affected VM map entries
5516                          * into the "zap_map".  The caller will take care of
5517                          * these entries.
5518                          */
5519                         /* unlink the entry from "map" ... */
5520                         vm_map_store_entry_unlink(map, entry);
5521                         /* ... and add it to the end of the "zap_map" */
5522                         vm_map_store_entry_link(zap_map,
5523                                           vm_map_last_entry(zap_map),
5524                                           entry);
5525                         entry_size = entry->vme_end - entry->vme_start;
5526                         map->size -= entry_size;
5527                         zap_map->size += entry_size;
5528                         /* we didn't unlock the map, so no timestamp increase */
5529                         last_timestamp--;
5530                 } else {
5531                         vm_map_entry_delete(map, entry);
5532                         /* vm_map_entry_delete unlocks the map */
5533                         vm_map_lock(map);
5534                 }
5535
5536                 entry = next;
5537
5538                 if(entry == vm_map_to_entry(map)) {
5539                         break;
5540                 }
5541                 if (last_timestamp+1 != map->timestamp) {
5542                         /*
5543                          * we are responsible for deleting everything
5544                          * from the give space, if someone has interfered
5545                          * we pick up where we left off, back fills should
5546                          * be all right for anyone except map_delete and
5547                          * we have to assume that the task has been fully
5548                          * disabled before we get here
5549                          */
5550                         if (!vm_map_lookup_entry(map, s, &entry)){
5551                                 entry = entry->vme_next;
5552                                 s = entry->vme_start;
5553                         } else {
5554                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5555                         }
5556                         /*
5557                          * others can not only allocate behind us, we can
5558                          * also see coalesce while we don't have the map lock
5559                          */
5560                         if(entry == vm_map_to_entry(map)) {
5561                                 break;
5562                         }
5563                 }
5564                 last_timestamp = map->timestamp;
5565         }
5566
5567         if (map->wait_for_space)
5568                 thread_wakeup((event_t) map);
5569         /*
5570          * wake up anybody waiting on entries that we have already deleted.
5571          */
5572         if (need_wakeup)
5573                 vm_map_entry_wakeup(map);
5574
5575         return KERN_SUCCESS;
5576 }
5577
5578 /*
5579  *      vm_map_remove:
5580  *
5581  *      Remove the given address range from the target map.
5582  *      This is the exported form of vm_map_delete.
5583  */
5584 kern_return_t
5585 vm_map_remove(
5586         register vm_map_t       map,
5587         register vm_map_offset_t        start,
5588         register vm_map_offset_t        end,
5589         register boolean_t      flags)
5590 {
5591         register kern_return_t  result;
5592
5593         vm_map_lock(map);
5594         VM_MAP_RANGE_CHECK(map, start, end);
5595         /*
5596          * For the zone_map, the kernel controls the allocation/freeing of memory.
5597          * Any free to the zone_map should be within the bounds of the map and
5598          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
5599          * free to the zone_map into a no-op, there is a problem and we should
5600          * panic.
5601          */
5602         if ((map == zone_map) && (start == end))
5603                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
5604         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5605         vm_map_unlock(map);
5606
5607         return(result);
5608 }
5609
5610
5611 /*
5612  *      Routine:        vm_map_copy_discard
5613  *
5614  *      Description:
5615  *              Dispose of a map copy object (returned by
5616  *              vm_map_copyin).
5617  */
5618 void
5619 vm_map_copy_discard(
5620         vm_map_copy_t   copy)
5621 {
5622         if (copy == VM_MAP_COPY_NULL)
5623                 return;
5624
5625         switch (copy->type) {
5626         case VM_MAP_COPY_ENTRY_LIST:
5627                 while (vm_map_copy_first_entry(copy) !=
5628                        vm_map_copy_to_entry(copy)) {
5629                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5630
5631                         vm_map_copy_entry_unlink(copy, entry);
5632                         if (entry->is_sub_map) {
5633                                 vm_map_deallocate(entry->object.sub_map);
5634                         } else {
5635                                 vm_object_deallocate(entry->object.vm_object);
5636                         }
5637                         vm_map_copy_entry_dispose(copy, entry);
5638                 }
5639                 break;
5640         case VM_MAP_COPY_OBJECT:
5641                 vm_object_deallocate(copy->cpy_object);
5642                 break;
5643         case VM_MAP_COPY_KERNEL_BUFFER:
5644
5645                 /*
5646                  * The vm_map_copy_t and possibly the data buffer were
5647                  * allocated by a single call to kalloc(), i.e. the
5648                  * vm_map_copy_t was not allocated out of the zone.
5649                  */
5650                 kfree(copy, copy->cpy_kalloc_size);
5651                 return;
5652         }
5653         zfree(vm_map_copy_zone, copy);
5654 }
5655
5656 /*
5657  *      Routine:        vm_map_copy_copy
5658  *
5659  *      Description:
5660  *                      Move the information in a map copy object to
5661  *                      a new map copy object, leaving the old one
5662  *                      empty.
5663  *
5664  *                      This is used by kernel routines that need
5665  *                      to look at out-of-line data (in copyin form)
5666  *                      before deciding whether to return SUCCESS.
5667  *                      If the routine returns FAILURE, the original
5668  *                      copy object will be deallocated; therefore,
5669  *                      these routines must make a copy of the copy
5670  *                      object and leave the original empty so that
5671  *                      deallocation will not fail.
5672  */
5673 vm_map_copy_t
5674 vm_map_copy_copy(
5675         vm_map_copy_t   copy)
5676 {
5677         vm_map_copy_t   new_copy;
5678
5679         if (copy == VM_MAP_COPY_NULL)
5680                 return VM_MAP_COPY_NULL;
5681
5682         /*
5683          * Allocate a new copy object, and copy the information
5684          * from the old one into it.
5685          */
5686
5687         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5688         *new_copy = *copy;
5689
5690         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5691                 /*
5692                  * The links in the entry chain must be
5693                  * changed to point to the new copy object.
5694                  */
5695                 vm_map_copy_first_entry(copy)->vme_prev
5696                         = vm_map_copy_to_entry(new_copy);
5697                 vm_map_copy_last_entry(copy)->vme_next
5698                         = vm_map_copy_to_entry(new_copy);
5699         }
5700
5701         /*
5702          * Change the old copy object into one that contains
5703          * nothing to be deallocated.
5704          */
5705         copy->type = VM_MAP_COPY_OBJECT;
5706         copy->cpy_object = VM_OBJECT_NULL;
5707
5708         /*
5709          * Return the new object.
5710          */
5711         return new_copy;
5712 }
5713
5714 static kern_return_t
5715 vm_map_overwrite_submap_recurse(
5716         vm_map_t        dst_map,
5717         vm_map_offset_t dst_addr,
5718         vm_map_size_t   dst_size)
5719 {
5720         vm_map_offset_t dst_end;
5721         vm_map_entry_t  tmp_entry;
5722         vm_map_entry_t  entry;
5723         kern_return_t   result;
5724         boolean_t       encountered_sub_map = FALSE;
5725
5726
5727
5728         /*
5729          *      Verify that the destination is all writeable
5730          *      initially.  We have to trunc the destination
5731          *      address and round the copy size or we'll end up
5732          *      splitting entries in strange ways.
5733          */
5734
5735         dst_end = vm_map_round_page(dst_addr + dst_size,
5736                                     VM_MAP_PAGE_MASK(dst_map));
5737         vm_map_lock(dst_map);
5738
5739 start_pass_1:
5740         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5741                 vm_map_unlock(dst_map);
5742                 return(KERN_INVALID_ADDRESS);
5743         }
5744
5745         vm_map_clip_start(dst_map,
5746                           tmp_entry,
5747                           vm_map_trunc_page(dst_addr,
5748                                             VM_MAP_PAGE_MASK(dst_map)));
5749         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5750
5751         for (entry = tmp_entry;;) {
5752                 vm_map_entry_t  next;
5753
5754                 next = entry->vme_next;
5755                 while(entry->is_sub_map) {
5756                         vm_map_offset_t sub_start;
5757                         vm_map_offset_t sub_end;
5758                         vm_map_offset_t local_end;
5759
5760                         if (entry->in_transition) {
5761                                 /*
5762                                  * Say that we are waiting, and wait for entry.
5763                                  */
5764                                 entry->needs_wakeup = TRUE;
5765                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5766
5767                                 goto start_pass_1;
5768                         }
5769
5770                         encountered_sub_map = TRUE;
5771                         sub_start = entry->offset;
5772
5773                         if(entry->vme_end < dst_end)
5774                                 sub_end = entry->vme_end;
5775                         else
5776                                 sub_end = dst_end;
5777                         sub_end -= entry->vme_start;
5778                         sub_end += entry->offset;
5779                         local_end = entry->vme_end;
5780                         vm_map_unlock(dst_map);
5781
5782                         result = vm_map_overwrite_submap_recurse(
5783                                 entry->object.sub_map,
5784                                 sub_start,
5785                                 sub_end - sub_start);
5786
5787                         if(result != KERN_SUCCESS)
5788                                 return result;
5789                         if (dst_end <= entry->vme_end)
5790                                 return KERN_SUCCESS;
5791                         vm_map_lock(dst_map);
5792                         if(!vm_map_lookup_entry(dst_map, local_end,
5793                                                 &tmp_entry)) {
5794                                 vm_map_unlock(dst_map);
5795                                 return(KERN_INVALID_ADDRESS);
5796                         }
5797                         entry = tmp_entry;
5798                         next = entry->vme_next;
5799                 }
5800
5801                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5802                         vm_map_unlock(dst_map);
5803                         return(KERN_PROTECTION_FAILURE);
5804                 }
5805
5806                 /*
5807                  *      If the entry is in transition, we must wait
5808                  *      for it to exit that state.  Anything could happen
5809                  *      when we unlock the map, so start over.
5810                  */
5811                 if (entry->in_transition) {
5812
5813                         /*
5814                          * Say that we are waiting, and wait for entry.
5815                          */
5816                         entry->needs_wakeup = TRUE;
5817                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5818
5819                         goto start_pass_1;
5820                 }
5821
5822 /*
5823  *              our range is contained completely within this map entry
5824  */
5825                 if (dst_end <= entry->vme_end) {
5826                         vm_map_unlock(dst_map);
5827                         return KERN_SUCCESS;
5828                 }
5829 /*
5830  *              check that range specified is contiguous region
5831  */
5832                 if ((next == vm_map_to_entry(dst_map)) ||
5833                     (next->vme_start != entry->vme_end)) {
5834                         vm_map_unlock(dst_map);
5835                         return(KERN_INVALID_ADDRESS);
5836                 }
5837
5838                 /*
5839                  *      Check for permanent objects in the destination.
5840                  */
5841                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5842                     ((!entry->object.vm_object->internal) ||
5843                      (entry->object.vm_object->true_share))) {
5844                         if(encountered_sub_map) {
5845                                 vm_map_unlock(dst_map);
5846                                 return(KERN_FAILURE);
5847                         }
5848                 }
5849
5850
5851                 entry = next;
5852         }/* for */
5853         vm_map_unlock(dst_map);
5854         return(KERN_SUCCESS);
5855 }
5856
5857 /*
5858  *      Routine:        vm_map_copy_overwrite
5859  *
5860  *      Description:
5861  *              Copy the memory described by the map copy
5862  *              object (copy; returned by vm_map_copyin) onto
5863  *              the specified destination region (dst_map, dst_addr).
5864  *              The destination must be writeable.
5865  *
5866  *              Unlike vm_map_copyout, this routine actually
5867  *              writes over previously-mapped memory.  If the
5868  *              previous mapping was to a permanent (user-supplied)
5869  *              memory object, it is preserved.
5870  *
5871  *              The attributes (protection and inheritance) of the
5872  *              destination region are preserved.
5873  *
5874  *              If successful, consumes the copy object.
5875  *              Otherwise, the caller is responsible for it.
5876  *
5877  *      Implementation notes:
5878  *              To overwrite aligned temporary virtual memory, it is
5879  *              sufficient to remove the previous mapping and insert
5880  *              the new copy.  This replacement is done either on
5881  *              the whole region (if no permanent virtual memory
5882  *              objects are embedded in the destination region) or
5883  *              in individual map entries.
5884  *
5885  *              To overwrite permanent virtual memory , it is necessary
5886  *              to copy each page, as the external memory management
5887  *              interface currently does not provide any optimizations.
5888  *
5889  *              Unaligned memory also has to be copied.  It is possible
5890  *              to use 'vm_trickery' to copy the aligned data.  This is
5891  *              not done but not hard to implement.
5892  *
5893  *              Once a page of permanent memory has been overwritten,
5894  *              it is impossible to interrupt this function; otherwise,
5895  *              the call would be neither atomic nor location-independent.
5896  *              The kernel-state portion of a user thread must be
5897  *              interruptible.
5898  *
5899  *              It may be expensive to forward all requests that might
5900  *              overwrite permanent memory (vm_write, vm_copy) to
5901  *              uninterruptible kernel threads.  This routine may be
5902  *              called by interruptible threads; however, success is
5903  *              not guaranteed -- if the request cannot be performed
5904  *              atomically and interruptibly, an error indication is
5905  *              returned.
5906  */
5907
5908 static kern_return_t
5909 vm_map_copy_overwrite_nested(
5910         vm_map_t                dst_map,
5911         vm_map_address_t        dst_addr,
5912         vm_map_copy_t           copy,
5913         boolean_t               interruptible,
5914         pmap_t                  pmap,
5915         boolean_t               discard_on_success)
5916 {
5917         vm_map_offset_t         dst_end;
5918         vm_map_entry_t          tmp_entry;
5919         vm_map_entry_t          entry;
5920         kern_return_t           kr;
5921         boolean_t               aligned = TRUE;
5922         boolean_t               contains_permanent_objects = FALSE;
5923         boolean_t               encountered_sub_map = FALSE;
5924         vm_map_offset_t         base_addr;
5925         vm_map_size_t           copy_size;
5926         vm_map_size_t           total_size;
5927
5928
5929         /*
5930          *      Check for null copy object.
5931          */
5932
5933         if (copy == VM_MAP_COPY_NULL)
5934                 return(KERN_SUCCESS);
5935
5936         /*
5937          *      Check for special kernel buffer allocated
5938          *      by new_ipc_kmsg_copyin.
5939          */
5940
5941         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5942                 return(vm_map_copyout_kernel_buffer(
5943                                dst_map, &dst_addr,
5944                                copy, TRUE, discard_on_success));
5945         }
5946
5947         /*
5948          *      Only works for entry lists at the moment.  Will
5949          *      support page lists later.
5950          */
5951
5952         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5953
5954         if (copy->size == 0) {
5955                 if (discard_on_success)
5956                         vm_map_copy_discard(copy);
5957                 return(KERN_SUCCESS);
5958         }
5959
5960         /*
5961          *      Verify that the destination is all writeable
5962          *      initially.  We have to trunc the destination
5963          *      address and round the copy size or we'll end up
5964          *      splitting entries in strange ways.
5965          */
5966
5967         if (!VM_MAP_PAGE_ALIGNED(copy->size,
5968                                  VM_MAP_PAGE_MASK(dst_map)) ||
5969             !VM_MAP_PAGE_ALIGNED(copy->offset,
5970                                  VM_MAP_PAGE_MASK(dst_map)) ||
5971             !VM_MAP_PAGE_ALIGNED(dst_addr,
5972                                  VM_MAP_PAGE_MASK(dst_map)) ||
5973             dst_map->hdr.page_shift != copy->cpy_hdr.page_shift)
5974         {
5975                 aligned = FALSE;
5976                 dst_end = vm_map_round_page(dst_addr + copy->size,
5977                                             VM_MAP_PAGE_MASK(dst_map));
5978         } else {
5979                 dst_end = dst_addr + copy->size;
5980         }
5981
5982         vm_map_lock(dst_map);
5983
5984         /* LP64todo - remove this check when vm_map_commpage64()
5985          * no longer has to stuff in a map_entry for the commpage
5986          * above the map's max_offset.
5987          */
5988         if (dst_addr >= dst_map->max_offset) {
5989                 vm_map_unlock(dst_map);
5990                 return(KERN_INVALID_ADDRESS);
5991         }
5992
5993 start_pass_1:
5994         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5995                 vm_map_unlock(dst_map);
5996                 return(KERN_INVALID_ADDRESS);
5997         }
5998         vm_map_clip_start(dst_map,
5999                           tmp_entry,
6000                           vm_map_trunc_page(dst_addr,
6001                                             VM_MAP_PAGE_MASK(dst_map)));
6002         for (entry = tmp_entry;;) {
6003                 vm_map_entry_t  next = entry->vme_next;
6004
6005                 while(entry->is_sub_map) {
6006                         vm_map_offset_t sub_start;
6007                         vm_map_offset_t sub_end;
6008                         vm_map_offset_t local_end;
6009
6010                         if (entry->in_transition) {
6011
6012                                 /*
6013                                  * Say that we are waiting, and wait for entry.
6014                                  */
6015                                 entry->needs_wakeup = TRUE;
6016                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6017
6018                                 goto start_pass_1;
6019                         }
6020
6021                         local_end = entry->vme_end;
6022                         if (!(entry->needs_copy)) {
6023                                 /* if needs_copy we are a COW submap */
6024                                 /* in such a case we just replace so */
6025                                 /* there is no need for the follow-  */
6026                                 /* ing check.                        */
6027                                 encountered_sub_map = TRUE;
6028                                 sub_start = entry->offset;
6029
6030                                 if(entry->vme_end < dst_end)
6031                                         sub_end = entry->vme_end;
6032                                 else
6033                                         sub_end = dst_end;
6034                                 sub_end -= entry->vme_start;
6035                                 sub_end += entry->offset;
6036                                 vm_map_unlock(dst_map);
6037
6038                                 kr = vm_map_overwrite_submap_recurse(
6039                                         entry->object.sub_map,
6040                                         sub_start,
6041                                         sub_end - sub_start);
6042                                 if(kr != KERN_SUCCESS)
6043                                         return kr;
6044                                 vm_map_lock(dst_map);
6045                         }
6046
6047                         if (dst_end <= entry->vme_end)
6048                                 goto start_overwrite;
6049                         if(!vm_map_lookup_entry(dst_map, local_end,
6050                                                 &entry)) {
6051                                 vm_map_unlock(dst_map);
6052                                 return(KERN_INVALID_ADDRESS);
6053                         }
6054                         next = entry->vme_next;
6055                 }
6056
6057                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6058                         vm_map_unlock(dst_map);
6059                         return(KERN_PROTECTION_FAILURE);
6060                 }
6061
6062                 /*
6063                  *      If the entry is in transition, we must wait
6064                  *      for it to exit that state.  Anything could happen
6065                  *      when we unlock the map, so start over.
6066                  */
6067                 if (entry->in_transition) {
6068
6069                         /*
6070                          * Say that we are waiting, and wait for entry.
6071                          */
6072                         entry->needs_wakeup = TRUE;
6073                         vm_map_entry_wait(dst_map, THREAD_UNINT);
6074
6075                         goto start_pass_1;
6076                 }
6077
6078 /*
6079  *              our range is contained completely within this map entry
6080  */
6081                 if (dst_end <= entry->vme_end)
6082                         break;
6083 /*
6084  *              check that range specified is contiguous region
6085  */
6086                 if ((next == vm_map_to_entry(dst_map)) ||
6087                     (next->vme_start != entry->vme_end)) {
6088                         vm_map_unlock(dst_map);
6089                         return(KERN_INVALID_ADDRESS);
6090                 }
6091
6092
6093                 /*
6094                  *      Check for permanent objects in the destination.
6095                  */
6096                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6097                     ((!entry->object.vm_object->internal) ||
6098                      (entry->object.vm_object->true_share))) {
6099                         contains_permanent_objects = TRUE;
6100                 }
6101
6102                 entry = next;
6103         }/* for */
6104
6105 start_overwrite:
6106         /*
6107          *      If there are permanent objects in the destination, then
6108          *      the copy cannot be interrupted.
6109          */
6110
6111         if (interruptible && contains_permanent_objects) {
6112                 vm_map_unlock(dst_map);
6113                 return(KERN_FAILURE);   /* XXX */
6114         }
6115
6116         /*
6117          *
6118          *      Make a second pass, overwriting the data
6119          *      At the beginning of each loop iteration,
6120          *      the next entry to be overwritten is "tmp_entry"
6121          *      (initially, the value returned from the lookup above),
6122          *      and the starting address expected in that entry
6123          *      is "start".
6124          */
6125
6126         total_size = copy->size;
6127         if(encountered_sub_map) {
6128                 copy_size = 0;
6129                 /* re-calculate tmp_entry since we've had the map */
6130                 /* unlocked */
6131                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
6132                         vm_map_unlock(dst_map);
6133                         return(KERN_INVALID_ADDRESS);
6134                 }
6135         } else {
6136                 copy_size = copy->size;
6137         }
6138
6139         base_addr = dst_addr;
6140         while(TRUE) {
6141                 /* deconstruct the copy object and do in parts */
6142                 /* only in sub_map, interruptable case */
6143                 vm_map_entry_t  copy_entry;
6144                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
6145                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
6146                 int             nentries;
6147                 int             remaining_entries = 0;
6148                 vm_map_offset_t new_offset = 0;
6149
6150                 for (entry = tmp_entry; copy_size == 0;) {
6151                         vm_map_entry_t  next;
6152
6153                         next = entry->vme_next;
6154
6155                         /* tmp_entry and base address are moved along */
6156                         /* each time we encounter a sub-map.  Otherwise */
6157                         /* entry can outpase tmp_entry, and the copy_size */
6158                         /* may reflect the distance between them */
6159                         /* if the current entry is found to be in transition */
6160                         /* we will start over at the beginning or the last */
6161                         /* encounter of a submap as dictated by base_addr */
6162                         /* we will zero copy_size accordingly. */
6163                         if (entry->in_transition) {
6164                                 /*
6165                                  * Say that we are waiting, and wait for entry.
6166                                  */
6167                                 entry->needs_wakeup = TRUE;
6168                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6169
6170                                 if(!vm_map_lookup_entry(dst_map, base_addr,
6171                                                         &tmp_entry)) {
6172                                         vm_map_unlock(dst_map);
6173                                         return(KERN_INVALID_ADDRESS);
6174                                 }
6175                                 copy_size = 0;
6176                                 entry = tmp_entry;
6177                                 continue;
6178                         }
6179                         if(entry->is_sub_map) {
6180                                 vm_map_offset_t sub_start;
6181                                 vm_map_offset_t sub_end;
6182                                 vm_map_offset_t local_end;
6183
6184                                 if (entry->needs_copy) {
6185                                         /* if this is a COW submap */
6186                                         /* just back the range with a */
6187                                         /* anonymous entry */
6188                                         if(entry->vme_end < dst_end)
6189                                                 sub_end = entry->vme_end;
6190                                         else
6191                                                 sub_end = dst_end;
6192                                         if(entry->vme_start < base_addr)
6193                                                 sub_start = base_addr;
6194                                         else
6195                                                 sub_start = entry->vme_start;
6196                                         vm_map_clip_end(
6197                                                 dst_map, entry, sub_end);
6198                                         vm_map_clip_start(
6199                                                 dst_map, entry, sub_start);
6200                                         assert(!entry->use_pmap);
6201                                         entry->is_sub_map = FALSE;
6202                                         vm_map_deallocate(
6203                                                 entry->object.sub_map);
6204                                         entry->object.sub_map = NULL;
6205                                         entry->is_shared = FALSE;
6206                                         entry->needs_copy = FALSE;
6207                                         entry->offset = 0;
6208                                         /*
6209                                          * XXX FBDP
6210                                          * We should propagate the protections
6211                                          * of the submap entry here instead
6212                                          * of forcing them to VM_PROT_ALL...
6213                                          * Or better yet, we should inherit
6214                                          * the protection of the copy_entry.
6215                                          */
6216                                         entry->protection = VM_PROT_ALL;
6217                                         entry->max_protection = VM_PROT_ALL;
6218                                         entry->wired_count = 0;
6219                                         entry->user_wired_count = 0;
6220                                         if(entry->inheritance
6221                                            == VM_INHERIT_SHARE)
6222                                                 entry->inheritance = VM_INHERIT_COPY;
6223                                         continue;
6224                                 }
6225                                 /* first take care of any non-sub_map */
6226                                 /* entries to send */
6227                                 if(base_addr < entry->vme_start) {
6228                                         /* stuff to send */
6229                                         copy_size =
6230                                                 entry->vme_start - base_addr;
6231                                         break;
6232                                 }
6233                                 sub_start = entry->offset;
6234
6235                                 if(entry->vme_end < dst_end)
6236                                         sub_end = entry->vme_end;
6237                                 else
6238                                         sub_end = dst_end;
6239                                 sub_end -= entry->vme_start;
6240                                 sub_end += entry->offset;
6241                                 local_end = entry->vme_end;
6242                                 vm_map_unlock(dst_map);
6243                                 copy_size = sub_end - sub_start;
6244
6245                                 /* adjust the copy object */
6246                                 if (total_size > copy_size) {
6247                                         vm_map_size_t   local_size = 0;
6248                                         vm_map_size_t   entry_size;
6249
6250                                         nentries = 1;
6251                                         new_offset = copy->offset;
6252                                         copy_entry = vm_map_copy_first_entry(copy);
6253                                         while(copy_entry !=
6254                                               vm_map_copy_to_entry(copy)){
6255                                                 entry_size = copy_entry->vme_end -
6256                                                         copy_entry->vme_start;
6257                                                 if((local_size < copy_size) &&
6258                                                    ((local_size + entry_size)
6259                                                     >= copy_size)) {
6260                                                         vm_map_copy_clip_end(copy,
6261                                                                              copy_entry,
6262                                                                              copy_entry->vme_start +
6263                                                                              (copy_size - local_size));
6264                                                         entry_size = copy_entry->vme_end -
6265                                                                 copy_entry->vme_start;
6266                                                         local_size += entry_size;
6267                                                         new_offset += entry_size;
6268                                                 }
6269                                                 if(local_size >= copy_size) {
6270                                                         next_copy = copy_entry->vme_next;
6271                                                         copy_entry->vme_next =
6272                                                                 vm_map_copy_to_entry(copy);
6273                                                         previous_prev =
6274                                                                 copy->cpy_hdr.links.prev;
6275                                                         copy->cpy_hdr.links.prev = copy_entry;
6276                                                         copy->size = copy_size;
6277                                                         remaining_entries =
6278                                                                 copy->cpy_hdr.nentries;
6279                                                         remaining_entries -= nentries;
6280                                                         copy->cpy_hdr.nentries = nentries;
6281                                                         break;
6282                                                 } else {
6283                                                         local_size += entry_size;
6284                                                         new_offset += entry_size;
6285                                                         nentries++;
6286                                                 }
6287                                                 copy_entry = copy_entry->vme_next;
6288                                         }
6289                                 }
6290
6291                                 if((entry->use_pmap) && (pmap == NULL)) {
6292                                         kr = vm_map_copy_overwrite_nested(
6293                                                 entry->object.sub_map,
6294                                                 sub_start,
6295                                                 copy,
6296                                                 interruptible,
6297                                                 entry->object.sub_map->pmap,
6298                                                 TRUE);
6299                                 } else if (pmap != NULL) {
6300                                         kr = vm_map_copy_overwrite_nested(
6301                                                 entry->object.sub_map,
6302                                                 sub_start,
6303                                                 copy,
6304                                                 interruptible, pmap,
6305                                                 TRUE);
6306                                 } else {
6307                                         kr = vm_map_copy_overwrite_nested(
6308                                                 entry->object.sub_map,
6309                                                 sub_start,
6310                                                 copy,
6311                                                 interruptible,
6312                                                 dst_map->pmap,
6313                                                 TRUE);
6314                                 }
6315                                 if(kr != KERN_SUCCESS) {
6316                                         if(next_copy != NULL) {
6317                                                 copy->cpy_hdr.nentries +=
6318                                                         remaining_entries;
6319                                                 copy->cpy_hdr.links.prev->vme_next =
6320                                                         next_copy;
6321                                                 copy->cpy_hdr.links.prev
6322                                                         = previous_prev;
6323                                                 copy->size = total_size;
6324                                         }
6325                                         return kr;
6326                                 }
6327                                 if (dst_end <= local_end) {
6328                                         return(KERN_SUCCESS);
6329                                 }
6330                                 /* otherwise copy no longer exists, it was */
6331                                 /* destroyed after successful copy_overwrite */
6332                                 copy = (vm_map_copy_t)
6333                                         zalloc(vm_map_copy_zone);
6334                                 vm_map_copy_first_entry(copy) =
6335                                         vm_map_copy_last_entry(copy) =
6336                                         vm_map_copy_to_entry(copy);
6337                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
6338                                 copy->offset = new_offset;
6339
6340                                 /*
6341                                  * XXX FBDP
6342                                  * this does not seem to deal with
6343                                  * the VM map store (R&B tree)
6344                                  */
6345
6346                                 total_size -= copy_size;
6347                                 copy_size = 0;
6348                                 /* put back remainder of copy in container */
6349                                 if(next_copy != NULL) {
6350                                         copy->cpy_hdr.nentries = remaining_entries;
6351                                         copy->cpy_hdr.links.next = next_copy;
6352                                         copy->cpy_hdr.links.prev = previous_prev;
6353                                         copy->size = total_size;
6354                                         next_copy->vme_prev =
6355                                                 vm_map_copy_to_entry(copy);
6356                                         next_copy = NULL;
6357                                 }
6358                                 base_addr = local_end;
6359                                 vm_map_lock(dst_map);
6360                                 if(!vm_map_lookup_entry(dst_map,
6361                                                         local_end, &tmp_entry)) {
6362                                         vm_map_unlock(dst_map);
6363                                         return(KERN_INVALID_ADDRESS);
6364                                 }
6365                                 entry = tmp_entry;
6366                                 continue;
6367                         }
6368                         if (dst_end <= entry->vme_end) {
6369                                 copy_size = dst_end - base_addr;
6370                                 break;
6371                         }
6372
6373                         if ((next == vm_map_to_entry(dst_map)) ||
6374                             (next->vme_start != entry->vme_end)) {
6375                                 vm_map_unlock(dst_map);
6376                                 return(KERN_INVALID_ADDRESS);
6377                         }
6378
6379                         entry = next;
6380                 }/* for */
6381
6382                 next_copy = NULL;
6383                 nentries = 1;
6384
6385                 /* adjust the copy object */
6386                 if (total_size > copy_size) {
6387                         vm_map_size_t   local_size = 0;
6388                         vm_map_size_t   entry_size;
6389
6390                         new_offset = copy->offset;
6391                         copy_entry = vm_map_copy_first_entry(copy);
6392                         while(copy_entry != vm_map_copy_to_entry(copy)) {
6393                                 entry_size = copy_entry->vme_end -
6394                                         copy_entry->vme_start;
6395                                 if((local_size < copy_size) &&
6396                                    ((local_size + entry_size)
6397                                     >= copy_size)) {
6398                                         vm_map_copy_clip_end(copy, copy_entry,
6399                                                              copy_entry->vme_start +
6400                                                              (copy_size - local_size));
6401                                         entry_size = copy_entry->vme_end -
6402                                                 copy_entry->vme_start;
6403                                         local_size += entry_size;
6404                                         new_offset += entry_size;
6405                                 }
6406                                 if(local_size >= copy_size) {
6407                                         next_copy = copy_entry->vme_next;
6408                                         copy_entry->vme_next =
6409                                                 vm_map_copy_to_entry(copy);
6410                                         previous_prev =
6411                                                 copy->cpy_hdr.links.prev;
6412                                         copy->cpy_hdr.links.prev = copy_entry;
6413                                         copy->size = copy_size;
6414                                         remaining_entries =
6415                                                 copy->cpy_hdr.nentries;
6416                                         remaining_entries -= nentries;
6417                                         copy->cpy_hdr.nentries = nentries;
6418                                         break;
6419                                 } else {
6420                                         local_size += entry_size;
6421                                         new_offset += entry_size;
6422                                         nentries++;
6423                                 }
6424                                 copy_entry = copy_entry->vme_next;
6425                         }
6426                 }
6427
6428                 if (aligned) {
6429                         pmap_t  local_pmap;
6430
6431                         if(pmap)
6432                                 local_pmap = pmap;
6433                         else
6434                                 local_pmap = dst_map->pmap;
6435
6436                         if ((kr =  vm_map_copy_overwrite_aligned(
6437                                      dst_map, tmp_entry, copy,
6438                                      base_addr, local_pmap)) != KERN_SUCCESS) {
6439                                 if(next_copy != NULL) {
6440                                         copy->cpy_hdr.nentries +=
6441                                                 remaining_entries;
6442                                         copy->cpy_hdr.links.prev->vme_next =
6443                                                 next_copy;
6444                                         copy->cpy_hdr.links.prev =
6445                                                 previous_prev;
6446                                         copy->size += copy_size;
6447                                 }
6448                                 return kr;
6449                         }
6450                         vm_map_unlock(dst_map);
6451                 } else {
6452                         /*
6453                          * Performance gain:
6454                          *
6455                          * if the copy and dst address are misaligned but the same
6456                          * offset within the page we can copy_not_aligned the
6457                          * misaligned parts and copy aligned the rest.  If they are
6458                          * aligned but len is unaligned we simply need to copy
6459                          * the end bit unaligned.  We'll need to split the misaligned
6460                          * bits of the region in this case !
6461                          */
6462                         /* ALWAYS UNLOCKS THE dst_map MAP */
6463                         kr = vm_map_copy_overwrite_unaligned(
6464                                 dst_map,
6465                                 tmp_entry,
6466                                 copy,
6467                                 base_addr,
6468                                 discard_on_success);
6469                         if (kr != KERN_SUCCESS) {
6470                                 if(next_copy != NULL) {
6471                                         copy->cpy_hdr.nentries +=
6472                                                 remaining_entries;
6473                                         copy->cpy_hdr.links.prev->vme_next =
6474                                                 next_copy;
6475                                         copy->cpy_hdr.links.prev =
6476                                                 previous_prev;
6477                                         copy->size += copy_size;
6478                                 }
6479                                 return kr;
6480                         }
6481                 }
6482                 total_size -= copy_size;
6483                 if(total_size == 0)
6484                         break;
6485                 base_addr += copy_size;
6486                 copy_size = 0;
6487                 copy->offset = new_offset;
6488                 if(next_copy != NULL) {
6489                         copy->cpy_hdr.nentries = remaining_entries;
6490                         copy->cpy_hdr.links.next = next_copy;
6491                         copy->cpy_hdr.links.prev = previous_prev;
6492                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6493                         copy->size = total_size;
6494                 }
6495                 vm_map_lock(dst_map);
6496                 while(TRUE) {
6497                         if (!vm_map_lookup_entry(dst_map,
6498                                                  base_addr, &tmp_entry)) {
6499                                 vm_map_unlock(dst_map);
6500                                 return(KERN_INVALID_ADDRESS);
6501                         }
6502                         if (tmp_entry->in_transition) {
6503                                 entry->needs_wakeup = TRUE;
6504                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6505                         } else {
6506                                 break;
6507                         }
6508                 }
6509                 vm_map_clip_start(dst_map,
6510                                   tmp_entry,
6511                                   vm_map_trunc_page(base_addr,
6512                                                     VM_MAP_PAGE_MASK(dst_map)));
6513
6514                 entry = tmp_entry;
6515         } /* while */
6516
6517         /*
6518          *      Throw away the vm_map_copy object
6519          */
6520         if (discard_on_success)
6521                 vm_map_copy_discard(copy);
6522
6523         return(KERN_SUCCESS);
6524 }/* vm_map_copy_overwrite */
6525
6526 kern_return_t
6527 vm_map_copy_overwrite(
6528         vm_map_t        dst_map,
6529         vm_map_offset_t dst_addr,
6530         vm_map_copy_t   copy,
6531         boolean_t       interruptible)
6532 {
6533         vm_map_size_t   head_size, tail_size;
6534         vm_map_copy_t   head_copy, tail_copy;
6535         vm_map_offset_t head_addr, tail_addr;
6536         vm_map_entry_t  entry;
6537         kern_return_t   kr;
6538
6539         head_size = 0;
6540         tail_size = 0;
6541         head_copy = NULL;
6542         tail_copy = NULL;
6543         head_addr = 0;
6544         tail_addr = 0;
6545
6546         if (interruptible ||
6547             copy == VM_MAP_COPY_NULL ||
6548             copy->type != VM_MAP_COPY_ENTRY_LIST) {
6549                 /*
6550                  * We can't split the "copy" map if we're interruptible
6551                  * or if we don't have a "copy" map...
6552                  */
6553         blunt_copy:
6554                 return vm_map_copy_overwrite_nested(dst_map,
6555                                                     dst_addr,
6556                                                     copy,
6557                                                     interruptible,
6558                                                     (pmap_t) NULL,
6559                                                     TRUE);
6560         }
6561
6562         if (copy->size < 3 * PAGE_SIZE) {
6563                 /*
6564                  * Too small to bother with optimizing...
6565                  */
6566                 goto blunt_copy;
6567         }
6568
6569         if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
6570             (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6571                 /*
6572                  * Incompatible mis-alignment of source and destination...
6573                  */
6574                 goto blunt_copy;
6575         }
6576
6577         /*
6578          * Proper alignment or identical mis-alignment at the beginning.
6579          * Let's try and do a small unaligned copy first (if needed)
6580          * and then an aligned copy for the rest.
6581          */
6582         if (!page_aligned(dst_addr)) {
6583                 head_addr = dst_addr;
6584                 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
6585                              (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6586         }
6587         if (!page_aligned(copy->offset + copy->size)) {
6588                 /*
6589                  * Mis-alignment at the end.
6590                  * Do an aligned copy up to the last page and
6591                  * then an unaligned copy for the remaining bytes.
6592                  */
6593                 tail_size = ((copy->offset + copy->size) &
6594                              VM_MAP_PAGE_MASK(dst_map));
6595                 tail_addr = dst_addr + copy->size - tail_size;
6596         }
6597
6598         if (head_size + tail_size == copy->size) {
6599                 /*
6600                  * It's all unaligned, no optimization possible...
6601                  */
6602                 goto blunt_copy;
6603         }
6604
6605         /*
6606          * Can't optimize if there are any submaps in the
6607          * destination due to the way we free the "copy" map
6608          * progressively in vm_map_copy_overwrite_nested()
6609          * in that case.
6610          */
6611         vm_map_lock_read(dst_map);
6612         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6613                 vm_map_unlock_read(dst_map);
6614                 goto blunt_copy;
6615         }
6616         for (;
6617              (entry != vm_map_copy_to_entry(copy) &&
6618               entry->vme_start < dst_addr + copy->size);
6619              entry = entry->vme_next) {
6620                 if (entry->is_sub_map) {
6621                         vm_map_unlock_read(dst_map);
6622                         goto blunt_copy;
6623                 }
6624         }
6625         vm_map_unlock_read(dst_map);
6626
6627         if (head_size) {
6628                 /*
6629                  * Unaligned copy of the first "head_size" bytes, to reach
6630                  * a page boundary.
6631                  */
6632
6633                 /*
6634                  * Extract "head_copy" out of "copy".
6635                  */
6636                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6637                 vm_map_copy_first_entry(head_copy) =
6638                         vm_map_copy_to_entry(head_copy);
6639                 vm_map_copy_last_entry(head_copy) =
6640                         vm_map_copy_to_entry(head_copy);
6641                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6642                 head_copy->cpy_hdr.nentries = 0;
6643                 head_copy->cpy_hdr.entries_pageable =
6644                         copy->cpy_hdr.entries_pageable;
6645                 vm_map_store_init(&head_copy->cpy_hdr);
6646
6647                 head_copy->offset = copy->offset;
6648                 head_copy->size = head_size;
6649
6650                 copy->offset += head_size;
6651                 copy->size -= head_size;
6652
6653                 entry = vm_map_copy_first_entry(copy);
6654                 vm_map_copy_clip_end(copy, entry, copy->offset);
6655                 vm_map_copy_entry_unlink(copy, entry);
6656                 vm_map_copy_entry_link(head_copy,
6657                                        vm_map_copy_to_entry(head_copy),
6658                                        entry);
6659
6660                 /*
6661                  * Do the unaligned copy.
6662                  */
6663                 kr = vm_map_copy_overwrite_nested(dst_map,
6664                                                   head_addr,
6665                                                   head_copy,
6666                                                   interruptible,
6667                                                   (pmap_t) NULL,
6668                                                   FALSE);
6669                 if (kr != KERN_SUCCESS)
6670                         goto done;
6671         }
6672
6673         if (tail_size) {
6674                 /*
6675                  * Extract "tail_copy" out of "copy".
6676                  */
6677                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6678                 vm_map_copy_first_entry(tail_copy) =
6679                         vm_map_copy_to_entry(tail_copy);
6680                 vm_map_copy_last_entry(tail_copy) =
6681                         vm_map_copy_to_entry(tail_copy);
6682                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6683                 tail_copy->cpy_hdr.nentries = 0;
6684                 tail_copy->cpy_hdr.entries_pageable =
6685                         copy->cpy_hdr.entries_pageable;
6686                 vm_map_store_init(&tail_copy->cpy_hdr);
6687
6688                 tail_copy->offset = copy->offset + copy->size - tail_size;
6689                 tail_copy->size = tail_size;
6690
6691                 copy->size -= tail_size;
6692
6693                 entry = vm_map_copy_last_entry(copy);
6694                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6695                 entry = vm_map_copy_last_entry(copy);
6696                 vm_map_copy_entry_unlink(copy, entry);
6697                 vm_map_copy_entry_link(tail_copy,
6698                                        vm_map_copy_last_entry(tail_copy),
6699                                        entry);
6700         }
6701
6702         /*
6703          * Copy most (or possibly all) of the data.
6704          */
6705         kr = vm_map_copy_overwrite_nested(dst_map,
6706                                           dst_addr + head_size,
6707                                           copy,
6708                                           interruptible,
6709                                           (pmap_t) NULL,
6710                                           FALSE);
6711         if (kr != KERN_SUCCESS) {
6712                 goto done;
6713         }
6714
6715         if (tail_size) {
6716                 kr = vm_map_copy_overwrite_nested(dst_map,
6717                                                   tail_addr,
6718                                                   tail_copy,
6719                                                   interruptible,
6720                                                   (pmap_t) NULL,
6721                                                   FALSE);
6722         }
6723
6724 done:
6725         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6726         if (kr == KERN_SUCCESS) {
6727                 /*
6728                  * Discard all the copy maps.
6729                  */
6730                 if (head_copy) {
6731                         vm_map_copy_discard(head_copy);
6732                         head_copy = NULL;
6733                 }
6734                 vm_map_copy_discard(copy);
6735                 if (tail_copy) {
6736                         vm_map_copy_discard(tail_copy);
6737                         tail_copy = NULL;
6738                 }
6739         } else {
6740                 /*
6741                  * Re-assemble the original copy map.
6742                  */
6743                 if (head_copy) {
6744                         entry = vm_map_copy_first_entry(head_copy);
6745                         vm_map_copy_entry_unlink(head_copy, entry);
6746                         vm_map_copy_entry_link(copy,
6747                                                vm_map_copy_to_entry(copy),
6748                                                entry);
6749                         copy->offset -= head_size;
6750                         copy->size += head_size;
6751                         vm_map_copy_discard(head_copy);
6752                         head_copy = NULL;
6753                 }
6754                 if (tail_copy) {
6755                         entry = vm_map_copy_last_entry(tail_copy);
6756                         vm_map_copy_entry_unlink(tail_copy, entry);
6757                         vm_map_copy_entry_link(copy,
6758                                                vm_map_copy_last_entry(copy),
6759                                                entry);
6760                         copy->size += tail_size;
6761                         vm_map_copy_discard(tail_copy);
6762                         tail_copy = NULL;
6763                 }
6764         }
6765         return kr;
6766 }
6767
6768
6769 /*
6770  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6771  *
6772  *      Decription:
6773  *      Physically copy unaligned data
6774  *
6775  *      Implementation:
6776  *      Unaligned parts of pages have to be physically copied.  We use
6777  *      a modified form of vm_fault_copy (which understands none-aligned
6778  *      page offsets and sizes) to do the copy.  We attempt to copy as
6779  *      much memory in one go as possibly, however vm_fault_copy copies
6780  *      within 1 memory object so we have to find the smaller of "amount left"
6781  *      "source object data size" and "target object data size".  With
6782  *      unaligned data we don't need to split regions, therefore the source
6783  *      (copy) object should be one map entry, the target range may be split
6784  *      over multiple map entries however.  In any event we are pessimistic
6785  *      about these assumptions.
6786  *
6787  *      Assumptions:
6788  *      dst_map is locked on entry and is return locked on success,
6789  *      unlocked on error.
6790  */
6791
6792 static kern_return_t
6793 vm_map_copy_overwrite_unaligned(
6794         vm_map_t        dst_map,
6795         vm_map_entry_t  entry,
6796         vm_map_copy_t   copy,
6797         vm_map_offset_t start,
6798         boolean_t       discard_on_success)
6799 {
6800         vm_map_entry_t          copy_entry;
6801         vm_map_entry_t          copy_entry_next;
6802         vm_map_version_t        version;
6803         vm_object_t             dst_object;
6804         vm_object_offset_t      dst_offset;
6805         vm_object_offset_t      src_offset;
6806         vm_object_offset_t      entry_offset;
6807         vm_map_offset_t         entry_end;
6808         vm_map_size_t           src_size,
6809                                 dst_size,
6810                                 copy_size,
6811                                 amount_left;
6812         kern_return_t           kr = KERN_SUCCESS;
6813
6814
6815         copy_entry = vm_map_copy_first_entry(copy);
6816
6817         vm_map_lock_write_to_read(dst_map);
6818
6819         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6820         amount_left = copy->size;
6821 /*
6822  *      unaligned so we never clipped this entry, we need the offset into
6823  *      the vm_object not just the data.
6824  */
6825         while (amount_left > 0) {
6826
6827                 if (entry == vm_map_to_entry(dst_map)) {
6828                         vm_map_unlock_read(dst_map);
6829                         return KERN_INVALID_ADDRESS;
6830                 }
6831
6832                 /* "start" must be within the current map entry */
6833                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6834
6835                 dst_offset = start - entry->vme_start;
6836
6837                 dst_size = entry->vme_end - start;
6838
6839                 src_size = copy_entry->vme_end -
6840                         (copy_entry->vme_start + src_offset);
6841
6842                 if (dst_size < src_size) {
6843 /*
6844  *                      we can only copy dst_size bytes before
6845  *                      we have to get the next destination entry
6846  */
6847                         copy_size = dst_size;
6848                 } else {
6849 /*
6850  *                      we can only copy src_size bytes before
6851  *                      we have to get the next source copy entry
6852  */
6853                         copy_size = src_size;
6854                 }
6855
6856                 if (copy_size > amount_left) {
6857                         copy_size = amount_left;
6858                 }
6859 /*
6860  *              Entry needs copy, create a shadow shadow object for
6861  *              Copy on write region.
6862  */
6863                 if (entry->needs_copy &&
6864                     ((entry->protection & VM_PROT_WRITE) != 0))
6865                 {
6866                         if (vm_map_lock_read_to_write(dst_map)) {
6867                                 vm_map_lock_read(dst_map);
6868                                 goto RetryLookup;
6869                         }
6870                         vm_object_shadow(&entry->object.vm_object,
6871                                          &entry->offset,
6872                                          (vm_map_size_t)(entry->vme_end
6873                                                          - entry->vme_start));
6874                         entry->needs_copy = FALSE;
6875                         vm_map_lock_write_to_read(dst_map);
6876                 }
6877                 dst_object = entry->object.vm_object;
6878 /*
6879  *              unlike with the virtual (aligned) copy we're going
6880  *              to fault on it therefore we need a target object.
6881  */
6882                 if (dst_object == VM_OBJECT_NULL) {
6883                         if (vm_map_lock_read_to_write(dst_map)) {
6884                                 vm_map_lock_read(dst_map);
6885                                 goto RetryLookup;
6886                         }
6887                         dst_object = vm_object_allocate((vm_map_size_t)
6888                                                         entry->vme_end - entry->vme_start);
6889                         entry->object.vm_object = dst_object;
6890                         entry->offset = 0;
6891                         vm_map_lock_write_to_read(dst_map);
6892                 }
6893 /*
6894  *              Take an object reference and unlock map. The "entry" may
6895  *              disappear or change when the map is unlocked.
6896  */
6897                 vm_object_reference(dst_object);
6898                 version.main_timestamp = dst_map->timestamp;
6899                 entry_offset = entry->offset;
6900                 entry_end = entry->vme_end;
6901                 vm_map_unlock_read(dst_map);
6902 /*
6903  *              Copy as much as possible in one pass
6904  */
6905                 kr = vm_fault_copy(
6906                         copy_entry->object.vm_object,
6907                         copy_entry->offset + src_offset,
6908                         &copy_size,
6909                         dst_object,
6910                         entry_offset + dst_offset,
6911                         dst_map,
6912                         &version,
6913                         THREAD_UNINT );
6914
6915                 start += copy_size;
6916                 src_offset += copy_size;
6917                 amount_left -= copy_size;
6918 /*
6919  *              Release the object reference
6920  */
6921                 vm_object_deallocate(dst_object);
6922 /*
6923  *              If a hard error occurred, return it now
6924  */
6925                 if (kr != KERN_SUCCESS)
6926                         return kr;
6927
6928                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6929                     || amount_left == 0)
6930                 {
6931 /*
6932  *                      all done with this copy entry, dispose.
6933  */
6934                         copy_entry_next = copy_entry->vme_next;
6935
6936                         if (discard_on_success) {
6937                                 vm_map_copy_entry_unlink(copy, copy_entry);
6938                                 assert(!copy_entry->is_sub_map);
6939                                 vm_object_deallocate(
6940                                         copy_entry->object.vm_object);
6941                                 vm_map_copy_entry_dispose(copy, copy_entry);
6942                         }
6943
6944                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
6945                             amount_left) {
6946 /*
6947  *                              not finished copying but run out of source
6948  */
6949                                 return KERN_INVALID_ADDRESS;
6950                         }
6951
6952                         copy_entry = copy_entry_next;
6953
6954                         src_offset = 0;
6955                 }
6956
6957                 if (amount_left == 0)
6958                         return KERN_SUCCESS;
6959
6960                 vm_map_lock_read(dst_map);
6961                 if (version.main_timestamp == dst_map->timestamp) {
6962                         if (start == entry_end) {
6963 /*
6964  *                              destination region is split.  Use the version
6965  *                              information to avoid a lookup in the normal
6966  *                              case.
6967  */
6968                                 entry = entry->vme_next;
6969 /*
6970  *                              should be contiguous. Fail if we encounter
6971  *                              a hole in the destination.
6972  */
6973                                 if (start != entry->vme_start) {
6974                                         vm_map_unlock_read(dst_map);
6975                                         return KERN_INVALID_ADDRESS ;
6976                                 }
6977                         }
6978                 } else {
6979 /*
6980  *                      Map version check failed.
6981  *                      we must lookup the entry because somebody
6982  *                      might have changed the map behind our backs.
6983  */
6984                 RetryLookup:
6985                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6986                         {
6987                                 vm_map_unlock_read(dst_map);
6988                                 return KERN_INVALID_ADDRESS ;
6989                         }
6990                 }
6991         }/* while */
6992
6993         return KERN_SUCCESS;
6994 }/* vm_map_copy_overwrite_unaligned */
6995
6996 /*
6997  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6998  *
6999  *      Description:
7000  *      Does all the vm_trickery possible for whole pages.
7001  *
7002  *      Implementation:
7003  *
7004  *      If there are no permanent objects in the destination,
7005  *      and the source and destination map entry zones match,
7006  *      and the destination map entry is not shared,
7007  *      then the map entries can be deleted and replaced
7008  *      with those from the copy.  The following code is the
7009  *      basic idea of what to do, but there are lots of annoying
7010  *      little details about getting protection and inheritance
7011  *      right.  Should add protection, inheritance, and sharing checks
7012  *      to the above pass and make sure that no wiring is involved.
7013  */
7014
7015 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
7016 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
7017 int vm_map_copy_overwrite_aligned_src_large = 0;
7018
7019 static kern_return_t
7020 vm_map_copy_overwrite_aligned(
7021         vm_map_t        dst_map,
7022         vm_map_entry_t  tmp_entry,
7023         vm_map_copy_t   copy,
7024         vm_map_offset_t start,
7025         __unused pmap_t pmap)
7026 {
7027         vm_object_t     object;
7028         vm_map_entry_t  copy_entry;
7029         vm_map_size_t   copy_size;
7030         vm_map_size_t   size;
7031         vm_map_entry_t  entry;
7032
7033         while ((copy_entry = vm_map_copy_first_entry(copy))
7034                != vm_map_copy_to_entry(copy))
7035         {
7036                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
7037
7038                 entry = tmp_entry;
7039                 assert(!entry->use_pmap); /* unnested when clipped earlier */
7040                 if (entry == vm_map_to_entry(dst_map)) {
7041                         vm_map_unlock(dst_map);
7042                         return KERN_INVALID_ADDRESS;
7043                 }
7044                 size = (entry->vme_end - entry->vme_start);
7045                 /*
7046                  *      Make sure that no holes popped up in the
7047                  *      address map, and that the protection is
7048                  *      still valid, in case the map was unlocked
7049                  *      earlier.
7050                  */
7051
7052                 if ((entry->vme_start != start) || ((entry->is_sub_map)
7053                                                     && !entry->needs_copy)) {
7054                         vm_map_unlock(dst_map);
7055                         return(KERN_INVALID_ADDRESS);
7056                 }
7057                 assert(entry != vm_map_to_entry(dst_map));
7058
7059                 /*
7060                  *      Check protection again
7061                  */
7062
7063                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7064                         vm_map_unlock(dst_map);
7065                         return(KERN_PROTECTION_FAILURE);
7066                 }
7067
7068                 /*
7069                  *      Adjust to source size first
7070                  */
7071
7072                 if (copy_size < size) {
7073                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
7074                         size = copy_size;
7075                 }
7076
7077                 /*
7078                  *      Adjust to destination size
7079                  */
7080
7081                 if (size < copy_size) {
7082                         vm_map_copy_clip_end(copy, copy_entry,
7083                                              copy_entry->vme_start + size);
7084                         copy_size = size;
7085                 }
7086
7087                 assert((entry->vme_end - entry->vme_start) == size);
7088                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
7089                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
7090
7091                 /*
7092                  *      If the destination contains temporary unshared memory,
7093                  *      we can perform the copy by throwing it away and
7094                  *      installing the source data.
7095                  */
7096
7097                 object = entry->object.vm_object;
7098                 if ((!entry->is_shared &&
7099                      ((object == VM_OBJECT_NULL) ||
7100                       (object->internal && !object->true_share))) ||
7101                     entry->needs_copy) {
7102                         vm_object_t     old_object = entry->object.vm_object;
7103                         vm_object_offset_t      old_offset = entry->offset;
7104                         vm_object_offset_t      offset;
7105
7106                         /*
7107                          * Ensure that the source and destination aren't
7108                          * identical
7109                          */
7110                         if (old_object == copy_entry->object.vm_object &&
7111                             old_offset == copy_entry->offset) {
7112                                 vm_map_copy_entry_unlink(copy, copy_entry);
7113                                 vm_map_copy_entry_dispose(copy, copy_entry);
7114
7115                                 if (old_object != VM_OBJECT_NULL)
7116                                         vm_object_deallocate(old_object);
7117
7118                                 start = tmp_entry->vme_end;
7119                                 tmp_entry = tmp_entry->vme_next;
7120                                 continue;
7121                         }
7122
7123 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
7124 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
7125                         if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
7126                             copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
7127                             copy_size <= __TRADEOFF1_COPY_SIZE) {
7128                                 /*
7129                                  * Virtual vs. Physical copy tradeoff #1.
7130                                  *
7131                                  * Copying only a few pages out of a large
7132                                  * object:  do a physical copy instead of
7133                                  * a virtual copy, to avoid possibly keeping
7134                                  * the entire large object alive because of
7135                                  * those few copy-on-write pages.
7136                                  */
7137                                 vm_map_copy_overwrite_aligned_src_large++;
7138                                 goto slow_copy;
7139                         }
7140
7141                         if (entry->alias >= VM_MEMORY_MALLOC &&
7142                             entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
7143                                 vm_object_t new_object, new_shadow;
7144
7145                                 /*
7146                                  * We're about to map something over a mapping
7147                                  * established by malloc()...
7148                                  */
7149                                 new_object = copy_entry->object.vm_object;
7150                                 if (new_object != VM_OBJECT_NULL) {
7151                                         vm_object_lock_shared(new_object);
7152                                 }
7153                                 while (new_object != VM_OBJECT_NULL &&
7154                                        !new_object->true_share &&
7155                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7156                                        new_object->internal) {
7157                                         new_shadow = new_object->shadow;
7158                                         if (new_shadow == VM_OBJECT_NULL) {
7159                                                 break;
7160                                         }
7161                                         vm_object_lock_shared(new_shadow);
7162                                         vm_object_unlock(new_object);
7163                                         new_object = new_shadow;
7164                                 }
7165                                 if (new_object != VM_OBJECT_NULL) {
7166                                         if (!new_object->internal) {
7167                                                 /*
7168                                                  * The new mapping is backed
7169                                                  * by an external object.  We
7170                                                  * don't want malloc'ed memory
7171                                                  * to be replaced with such a
7172                                                  * non-anonymous mapping, so
7173                                                  * let's go off the optimized
7174                                                  * path...
7175                                                  */
7176                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
7177                                                 vm_object_unlock(new_object);
7178                                                 goto slow_copy;
7179                                         }
7180                                         if (new_object->true_share ||
7181                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
7182                                                 /*
7183                                                  * Same if there's a "true_share"
7184                                                  * object in the shadow chain, or
7185                                                  * an object with a non-default
7186                                                  * (SYMMETRIC) copy strategy.
7187                                                  */
7188                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
7189                                                 vm_object_unlock(new_object);
7190                                                 goto slow_copy;
7191                                         }
7192                                         vm_object_unlock(new_object);
7193                                 }
7194                                 /*
7195                                  * The new mapping is still backed by
7196                                  * anonymous (internal) memory, so it's
7197                                  * OK to substitute it for the original
7198                                  * malloc() mapping.
7199                                  */
7200                         }
7201
7202                         if (old_object != VM_OBJECT_NULL) {
7203                                 if(entry->is_sub_map) {
7204                                         if(entry->use_pmap) {
7205 #ifndef NO_NESTED_PMAP
7206                                                 pmap_unnest(dst_map->pmap,
7207                                                             (addr64_t)entry->vme_start,
7208                                                             entry->vme_end - entry->vme_start);
7209 #endif  /* NO_NESTED_PMAP */
7210                                                 if(dst_map->mapped_in_other_pmaps) {
7211                                                         /* clean up parent */
7212                                                         /* map/maps */
7213                                                         vm_map_submap_pmap_clean(
7214                                                                 dst_map, entry->vme_start,
7215                                                                 entry->vme_end,
7216                                                                 entry->object.sub_map,
7217                                                                 entry->offset);
7218                                                 }
7219                                         } else {
7220                                                 vm_map_submap_pmap_clean(
7221                                                         dst_map, entry->vme_start,
7222                                                         entry->vme_end,
7223                                                         entry->object.sub_map,
7224                                                         entry->offset);
7225                                         }
7226                                         vm_map_deallocate(
7227                                                 entry->object.sub_map);
7228                                 } else {
7229                                         if(dst_map->mapped_in_other_pmaps) {
7230                                                 vm_object_pmap_protect_options(
7231                                                         entry->object.vm_object,
7232                                                         entry->offset,
7233                                                         entry->vme_end
7234                                                         - entry->vme_start,
7235                                                         PMAP_NULL,
7236                                                         entry->vme_start,
7237                                                         VM_PROT_NONE,
7238                                                         PMAP_OPTIONS_REMOVE);
7239                                         } else {
7240                                                 pmap_remove_options(
7241                                                         dst_map->pmap,
7242                                                         (addr64_t)(entry->vme_start),
7243                                                         (addr64_t)(entry->vme_end),
7244                                                         PMAP_OPTIONS_REMOVE);
7245                                         }
7246                                         vm_object_deallocate(old_object);
7247                                 }
7248                         }
7249
7250                         entry->is_sub_map = FALSE;
7251                         entry->object = copy_entry->object;
7252                         object = entry->object.vm_object;
7253                         entry->needs_copy = copy_entry->needs_copy;
7254                         entry->wired_count = 0;
7255                         entry->user_wired_count = 0;
7256                         offset = entry->offset = copy_entry->offset;
7257
7258                         vm_map_copy_entry_unlink(copy, copy_entry);
7259                         vm_map_copy_entry_dispose(copy, copy_entry);
7260
7261                         /*
7262                          * we could try to push pages into the pmap at this point, BUT
7263                          * this optimization only saved on average 2 us per page if ALL
7264                          * the pages in the source were currently mapped
7265                          * and ALL the pages in the dest were touched, if there were fewer
7266                          * than 2/3 of the pages touched, this optimization actually cost more cycles
7267                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
7268                          */
7269
7270                         /*
7271                          *      Set up for the next iteration.  The map
7272                          *      has not been unlocked, so the next
7273                          *      address should be at the end of this
7274                          *      entry, and the next map entry should be
7275                          *      the one following it.
7276                          */
7277
7278                         start = tmp_entry->vme_end;
7279                         tmp_entry = tmp_entry->vme_next;
7280                 } else {
7281                         vm_map_version_t        version;
7282                         vm_object_t             dst_object;
7283                         vm_object_offset_t      dst_offset;
7284                         kern_return_t           r;
7285
7286                 slow_copy:
7287                         if (entry->needs_copy) {
7288                                 vm_object_shadow(&entry->object.vm_object,
7289                                                  &entry->offset,
7290                                                  (entry->vme_end -
7291                                                   entry->vme_start));
7292                                 entry->needs_copy = FALSE;
7293                         }
7294
7295                         dst_object = entry->object.vm_object;
7296                         dst_offset = entry->offset;
7297
7298                         /*
7299                          *      Take an object reference, and record
7300                          *      the map version information so that the
7301                          *      map can be safely unlocked.
7302                          */
7303
7304                         if (dst_object == VM_OBJECT_NULL) {
7305                                 /*
7306                                  * We would usually have just taken the
7307                                  * optimized path above if the destination
7308                                  * object has not been allocated yet.  But we
7309                                  * now disable that optimization if the copy
7310                                  * entry's object is not backed by anonymous
7311                                  * memory to avoid replacing malloc'ed
7312                                  * (i.e. re-usable) anonymous memory with a
7313                                  * not-so-anonymous mapping.
7314                                  * So we have to handle this case here and
7315                                  * allocate a new VM object for this map entry.
7316                                  */
7317                                 dst_object = vm_object_allocate(
7318                                         entry->vme_end - entry->vme_start);
7319                                 dst_offset = 0;
7320                                 entry->object.vm_object = dst_object;
7321                                 entry->offset = dst_offset;
7322
7323                         }
7324
7325                         vm_object_reference(dst_object);
7326
7327                         /* account for unlock bumping up timestamp */
7328                         version.main_timestamp = dst_map->timestamp + 1;
7329
7330                         vm_map_unlock(dst_map);
7331
7332                         /*
7333                          *      Copy as much as possible in one pass
7334                          */
7335
7336                         copy_size = size;
7337                         r = vm_fault_copy(
7338                                 copy_entry->object.vm_object,
7339                                 copy_entry->offset,
7340                                 &copy_size,
7341                                 dst_object,
7342                                 dst_offset,
7343                                 dst_map,
7344                                 &version,
7345                                 THREAD_UNINT );
7346
7347                         /*
7348                          *      Release the object reference
7349                          */
7350
7351                         vm_object_deallocate(dst_object);
7352
7353                         /*
7354                          *      If a hard error occurred, return it now
7355                          */
7356
7357                         if (r != KERN_SUCCESS)
7358                                 return(r);
7359
7360                         if (copy_size != 0) {
7361                                 /*
7362                                  *      Dispose of the copied region
7363                                  */
7364
7365                                 vm_map_copy_clip_end(copy, copy_entry,
7366                                                      copy_entry->vme_start + copy_size);
7367                                 vm_map_copy_entry_unlink(copy, copy_entry);
7368                                 vm_object_deallocate(copy_entry->object.vm_object);
7369                                 vm_map_copy_entry_dispose(copy, copy_entry);
7370                         }
7371
7372                         /*
7373                          *      Pick up in the destination map where we left off.
7374                          *
7375                          *      Use the version information to avoid a lookup
7376                          *      in the normal case.
7377                          */
7378
7379                         start += copy_size;
7380                         vm_map_lock(dst_map);
7381                         if (version.main_timestamp == dst_map->timestamp &&
7382                             copy_size != 0) {
7383                                 /* We can safely use saved tmp_entry value */
7384
7385                                 vm_map_clip_end(dst_map, tmp_entry, start);
7386                                 tmp_entry = tmp_entry->vme_next;
7387                         } else {
7388                                 /* Must do lookup of tmp_entry */
7389
7390                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7391                                         vm_map_unlock(dst_map);
7392                                         return(KERN_INVALID_ADDRESS);
7393                                 }
7394                                 vm_map_clip_start(dst_map, tmp_entry, start);
7395                         }
7396                 }
7397         }/* while */
7398
7399         return(KERN_SUCCESS);
7400 }/* vm_map_copy_overwrite_aligned */
7401
7402 /*
7403  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
7404  *
7405  *      Description:
7406  *              Copy in data to a kernel buffer from space in the
7407  *              source map. The original space may be optionally
7408  *              deallocated.
7409  *
7410  *              If successful, returns a new copy object.
7411  */
7412 static kern_return_t
7413 vm_map_copyin_kernel_buffer(
7414         vm_map_t        src_map,
7415         vm_map_offset_t src_addr,
7416         vm_map_size_t   len,
7417         boolean_t       src_destroy,
7418         vm_map_copy_t   *copy_result)
7419 {
7420         kern_return_t kr;
7421         vm_map_copy_t copy;
7422         vm_size_t kalloc_size;
7423
7424         if ((vm_size_t) len != len) {
7425                 /* "len" is too big and doesn't fit in a "vm_size_t" */
7426                 return KERN_RESOURCE_SHORTAGE;
7427         }
7428         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7429         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7430
7431         copy = (vm_map_copy_t) kalloc(kalloc_size);
7432         if (copy == VM_MAP_COPY_NULL) {
7433                 return KERN_RESOURCE_SHORTAGE;
7434         }
7435         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7436         copy->size = len;
7437         copy->offset = 0;
7438         copy->cpy_kdata = (void *) (copy + 1);
7439         copy->cpy_kalloc_size = kalloc_size;
7440
7441         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7442         if (kr != KERN_SUCCESS) {
7443                 kfree(copy, kalloc_size);
7444                 return kr;
7445         }
7446         if (src_destroy) {
7447                 (void) vm_map_remove(
7448                         src_map,
7449                         vm_map_trunc_page(src_addr,
7450                                           VM_MAP_PAGE_MASK(src_map)),
7451                         vm_map_round_page(src_addr + len,
7452                                           VM_MAP_PAGE_MASK(src_map)),
7453                         (VM_MAP_REMOVE_INTERRUPTIBLE |
7454                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7455                          (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
7456         }
7457         *copy_result = copy;
7458         return KERN_SUCCESS;
7459 }
7460
7461 /*
7462  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
7463  *
7464  *      Description:
7465  *              Copy out data from a kernel buffer into space in the
7466  *              destination map. The space may be otpionally dynamically
7467  *              allocated.
7468  *
7469  *              If successful, consumes the copy object.
7470  *              Otherwise, the caller is responsible for it.
7471  */
7472 static int vm_map_copyout_kernel_buffer_failures = 0;
7473 static kern_return_t
7474 vm_map_copyout_kernel_buffer(
7475         vm_map_t                map,
7476         vm_map_address_t        *addr,  /* IN/OUT */
7477         vm_map_copy_t           copy,
7478         boolean_t               overwrite,
7479         boolean_t               consume_on_success)
7480 {
7481         kern_return_t kr = KERN_SUCCESS;
7482         thread_t thread = current_thread();
7483
7484         if (!overwrite) {
7485
7486                 /*
7487                  * Allocate space in the target map for the data
7488                  */
7489                 *addr = 0;
7490                 kr = vm_map_enter(map,
7491                                   addr,
7492                                   vm_map_round_page(copy->size,
7493                                                     VM_MAP_PAGE_MASK(map)),
7494                                   (vm_map_offset_t) 0,
7495                                   VM_FLAGS_ANYWHERE,
7496                                   VM_OBJECT_NULL,
7497                                   (vm_object_offset_t) 0,
7498                                   FALSE,
7499                                   VM_PROT_DEFAULT,
7500                                   VM_PROT_ALL,
7501                                   VM_INHERIT_DEFAULT);
7502                 if (kr != KERN_SUCCESS)
7503                         return kr;
7504         }
7505
7506         /*
7507          * Copyout the data from the kernel buffer to the target map.
7508          */
7509         if (thread->map == map) {
7510
7511                 /*
7512                  * If the target map is the current map, just do
7513                  * the copy.
7514                  */
7515                 assert((vm_size_t) copy->size == copy->size);
7516                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7517                         kr = KERN_INVALID_ADDRESS;
7518                 }
7519         }
7520         else {
7521                 vm_map_t oldmap;
7522
7523                 /*
7524                  * If the target map is another map, assume the
7525                  * target's address space identity for the duration
7526                  * of the copy.
7527                  */
7528                 vm_map_reference(map);
7529                 oldmap = vm_map_switch(map);
7530
7531                 assert((vm_size_t) copy->size == copy->size);
7532                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7533                         vm_map_copyout_kernel_buffer_failures++;
7534                         kr = KERN_INVALID_ADDRESS;
7535                 }
7536
7537                 (void) vm_map_switch(oldmap);
7538                 vm_map_deallocate(map);
7539         }
7540
7541         if (kr != KERN_SUCCESS) {
7542                 /* the copy failed, clean up */
7543                 if (!overwrite) {
7544                         /*
7545                          * Deallocate the space we allocated in the target map.
7546                          */
7547                         (void) vm_map_remove(
7548                                 map,
7549                                 vm_map_trunc_page(*addr,
7550                                                   VM_MAP_PAGE_MASK(map)),
7551                                 vm_map_round_page((*addr +
7552                                                    vm_map_round_page(copy->size,
7553                                                                      VM_MAP_PAGE_MASK(map))),
7554                                                   VM_MAP_PAGE_MASK(map)),
7555                                 VM_MAP_NO_FLAGS);
7556                         *addr = 0;
7557                 }
7558         } else {
7559                 /* copy was successful, dicard the copy structure */
7560                 if (consume_on_success) {
7561                         kfree(copy, copy->cpy_kalloc_size);
7562                 }
7563         }
7564
7565         return kr;
7566 }
7567
7568 /*
7569  *      Macro:          vm_map_copy_insert
7570  *
7571  *      Description:
7572  *              Link a copy chain ("copy") into a map at the
7573  *              specified location (after "where").
7574  *      Side effects:
7575  *              The copy chain is destroyed.
7576  *      Warning:
7577  *              The arguments are evaluated multiple times.
7578  */
7579 #define vm_map_copy_insert(map, where, copy)                            \
7580 MACRO_BEGIN                                                             \
7581         vm_map_store_copy_insert(map, where, copy);       \
7582         zfree(vm_map_copy_zone, copy);          \
7583 MACRO_END
7584
7585 void
7586 vm_map_copy_remap(
7587         vm_map_t        map,
7588         vm_map_entry_t  where,
7589         vm_map_copy_t   copy,
7590         vm_map_offset_t adjustment,
7591         vm_prot_t       cur_prot,
7592         vm_prot_t       max_prot,
7593         vm_inherit_t    inheritance)
7594 {
7595         vm_map_entry_t  copy_entry, new_entry;
7596
7597         for (copy_entry = vm_map_copy_first_entry(copy);
7598              copy_entry != vm_map_copy_to_entry(copy);
7599              copy_entry = copy_entry->vme_next) {
7600                 /* get a new VM map entry for the map */
7601                 new_entry = vm_map_entry_create(map,
7602                                                 !map->hdr.entries_pageable);
7603                 /* copy the "copy entry" to the new entry */
7604                 vm_map_entry_copy(new_entry, copy_entry);
7605                 /* adjust "start" and "end" */
7606                 new_entry->vme_start += adjustment;
7607                 new_entry->vme_end += adjustment;
7608                 /* clear some attributes */
7609                 new_entry->inheritance = inheritance;
7610                 new_entry->protection = cur_prot;
7611                 new_entry->max_protection = max_prot;
7612                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
7613                 /* take an extra reference on the entry's "object" */
7614                 if (new_entry->is_sub_map) {
7615                         vm_map_lock(new_entry->object.sub_map);
7616                         vm_map_reference(new_entry->object.sub_map);
7617                         vm_map_unlock(new_entry->object.sub_map);
7618                 } else {
7619                         vm_object_reference(new_entry->object.vm_object);
7620                 }
7621                 /* insert the new entry in the map */
7622                 vm_map_store_entry_link(map, where, new_entry);
7623                 /* continue inserting the "copy entries" after the new entry */
7624                 where = new_entry;
7625         }
7626 }
7627
7628 /*
7629  *      Routine:        vm_map_copyout
7630  *
7631  *      Description:
7632  *              Copy out a copy chain ("copy") into newly-allocated
7633  *              space in the destination map.
7634  *
7635  *              If successful, consumes the copy object.
7636  *              Otherwise, the caller is responsible for it.
7637  */
7638
7639 kern_return_t
7640 vm_map_copyout(
7641         vm_map_t                dst_map,
7642         vm_map_address_t        *dst_addr,      /* OUT */
7643         vm_map_copy_t           copy)
7644 {
7645         return vm_map_copyout_internal(dst_map, dst_addr, copy,
7646                                        TRUE, /* consume_on_success */
7647                                        VM_PROT_DEFAULT,
7648                                        VM_PROT_ALL,
7649                                        VM_INHERIT_DEFAULT);
7650 }
7651
7652 kern_return_t
7653 vm_map_copyout_internal(
7654         vm_map_t                dst_map,
7655         vm_map_address_t        *dst_addr,      /* OUT */
7656         vm_map_copy_t           copy,
7657         boolean_t               consume_on_success,
7658         vm_prot_t               cur_protection,
7659         vm_prot_t               max_protection,
7660         vm_inherit_t            inheritance)
7661 {
7662         vm_map_size_t           size;
7663         vm_map_size_t           adjustment;
7664         vm_map_offset_t         start;
7665         vm_object_offset_t      vm_copy_start;
7666         vm_map_entry_t          last;
7667         vm_map_entry_t          entry;
7668
7669         /*
7670          *      Check for null copy object.
7671          */
7672
7673         if (copy == VM_MAP_COPY_NULL) {
7674                 *dst_addr = 0;
7675                 return(KERN_SUCCESS);
7676         }
7677
7678         /*
7679          *      Check for special copy object, created
7680          *      by vm_map_copyin_object.
7681          */
7682
7683         if (copy->type == VM_MAP_COPY_OBJECT) {
7684                 vm_object_t             object = copy->cpy_object;
7685                 kern_return_t           kr;
7686                 vm_object_offset_t      offset;
7687
7688                 offset = vm_object_trunc_page(copy->offset);
7689                 size = vm_map_round_page((copy->size +
7690                                           (vm_map_size_t)(copy->offset -
7691                                                           offset)),
7692                                          VM_MAP_PAGE_MASK(dst_map));
7693                 *dst_addr = 0;
7694                 kr = vm_map_enter(dst_map, dst_addr, size,
7695                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7696                                   object, offset, FALSE,
7697                                   VM_PROT_DEFAULT, VM_PROT_ALL,
7698                                   VM_INHERIT_DEFAULT);
7699                 if (kr != KERN_SUCCESS)
7700                         return(kr);
7701                 /* Account for non-pagealigned copy object */
7702                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7703                 if (consume_on_success)
7704                         zfree(vm_map_copy_zone, copy);
7705                 return(KERN_SUCCESS);
7706         }
7707
7708         /*
7709          *      Check for special kernel buffer allocated
7710          *      by new_ipc_kmsg_copyin.
7711          */
7712
7713         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7714                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7715                                                     copy, FALSE,
7716                                                     consume_on_success);
7717         }
7718
7719
7720         /*
7721          *      Find space for the data
7722          */
7723
7724         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
7725                                           VM_MAP_COPY_PAGE_MASK(copy));
7726         size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
7727                                  VM_MAP_COPY_PAGE_MASK(copy))
7728                 - vm_copy_start;
7729
7730
7731 StartAgain: ;
7732
7733         vm_map_lock(dst_map);
7734         if( dst_map->disable_vmentry_reuse == TRUE) {
7735                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7736                 last = entry;
7737         } else {
7738                 assert(first_free_is_valid(dst_map));
7739                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7740                 vm_map_min(dst_map) : last->vme_end;
7741                 start = vm_map_round_page(start,
7742                                           VM_MAP_PAGE_MASK(dst_map));
7743         }
7744
7745         while (TRUE) {
7746                 vm_map_entry_t  next = last->vme_next;
7747                 vm_map_offset_t end = start + size;
7748
7749                 if ((end > dst_map->max_offset) || (end < start)) {
7750                         if (dst_map->wait_for_space) {
7751                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7752                                         assert_wait((event_t) dst_map,
7753                                                     THREAD_INTERRUPTIBLE);
7754                                         vm_map_unlock(dst_map);
7755                                         thread_block(THREAD_CONTINUE_NULL);
7756                                         goto StartAgain;
7757                                 }
7758                         }
7759                         vm_map_unlock(dst_map);
7760                         return(KERN_NO_SPACE);
7761                 }
7762
7763                 if ((next == vm_map_to_entry(dst_map)) ||
7764                     (next->vme_start >= end))
7765                         break;
7766
7767                 last = next;
7768                 start = last->vme_end;
7769                 start = vm_map_round_page(start,
7770                                           VM_MAP_PAGE_MASK(dst_map));
7771         }
7772
7773         adjustment = start - vm_copy_start;
7774         if (! consume_on_success) {
7775                 /*
7776                  * We're not allowed to consume "copy", so we'll have to
7777                  * copy its map entries into the destination map below.
7778                  * No need to re-allocate map entries from the correct
7779                  * (pageable or not) zone, since we'll get new map entries
7780                  * during the transfer.
7781                  * We'll also adjust the map entries's "start" and "end"
7782                  * during the transfer, to keep "copy"'s entries consistent
7783                  * with its "offset".
7784                  */
7785                 goto after_adjustments;
7786         }
7787
7788         /*
7789          *      Since we're going to just drop the map
7790          *      entries from the copy into the destination
7791          *      map, they must come from the same pool.
7792          */
7793
7794         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7795                 /*
7796                  * Mismatches occur when dealing with the default
7797                  * pager.
7798                  */
7799                 zone_t          old_zone;
7800                 vm_map_entry_t  next, new;
7801
7802                 /*
7803                  * Find the zone that the copies were allocated from
7804                  */
7805
7806                 entry = vm_map_copy_first_entry(copy);
7807
7808                 /*
7809                  * Reinitialize the copy so that vm_map_copy_entry_link
7810                  * will work.
7811                  */
7812                 vm_map_store_copy_reset(copy, entry);
7813                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7814
7815                 /*
7816                  * Copy each entry.
7817                  */
7818                 while (entry != vm_map_copy_to_entry(copy)) {
7819                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7820                         vm_map_entry_copy_full(new, entry);
7821                         new->use_pmap = FALSE;  /* clr address space specifics */
7822                         vm_map_copy_entry_link(copy,
7823                                                vm_map_copy_last_entry(copy),
7824                                                new);
7825                         next = entry->vme_next;
7826                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7827                         zfree(old_zone, entry);
7828                         entry = next;
7829                 }
7830         }
7831
7832         /*
7833          *      Adjust the addresses in the copy chain, and
7834          *      reset the region attributes.
7835          */
7836
7837         for (entry = vm_map_copy_first_entry(copy);
7838              entry != vm_map_copy_to_entry(copy);
7839              entry = entry->vme_next) {
7840                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
7841                         /*
7842                          * We're injecting this copy entry into a map that
7843                          * has the standard page alignment, so clear
7844                          * "map_aligned" (which might have been inherited
7845                          * from the original map entry).
7846                          */
7847                         entry->map_aligned = FALSE;
7848                 }
7849
7850                 entry->vme_start += adjustment;
7851                 entry->vme_end += adjustment;
7852
7853                 if (entry->map_aligned) {
7854                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
7855                                                    VM_MAP_PAGE_MASK(dst_map)));
7856                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
7857                                                    VM_MAP_PAGE_MASK(dst_map)));
7858                 }
7859
7860                 entry->inheritance = VM_INHERIT_DEFAULT;
7861                 entry->protection = VM_PROT_DEFAULT;
7862                 entry->max_protection = VM_PROT_ALL;
7863                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7864
7865                 /*
7866                  * If the entry is now wired,
7867                  * map the pages into the destination map.
7868                  */
7869                 if (entry->wired_count != 0) {
7870                         register vm_map_offset_t va;
7871                         vm_object_offset_t       offset;
7872                         register vm_object_t object;
7873                         vm_prot_t prot;
7874                         int     type_of_fault;
7875
7876                         object = entry->object.vm_object;
7877                         offset = entry->offset;
7878                         va = entry->vme_start;
7879
7880                         pmap_pageable(dst_map->pmap,
7881                                       entry->vme_start,
7882                                       entry->vme_end,
7883                                       TRUE);
7884
7885                         while (va < entry->vme_end) {
7886                                 register vm_page_t      m;
7887
7888                                 /*
7889                                  * Look up the page in the object.
7890                                  * Assert that the page will be found in the
7891                                  * top object:
7892                                  * either
7893                                  *      the object was newly created by
7894                                  *      vm_object_copy_slowly, and has
7895                                  *      copies of all of the pages from
7896                                  *      the source object
7897                                  * or
7898                                  *      the object was moved from the old
7899                                  *      map entry; because the old map
7900                                  *      entry was wired, all of the pages
7901                                  *      were in the top-level object.
7902                                  *      (XXX not true if we wire pages for
7903                                  *       reading)
7904                                  */
7905                                 vm_object_lock(object);
7906
7907                                 m = vm_page_lookup(object, offset);
7908                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7909                                     m->absent)
7910                                         panic("vm_map_copyout: wiring %p", m);
7911
7912                                 /*
7913                                  * ENCRYPTED SWAP:
7914                                  * The page is assumed to be wired here, so it
7915                                  * shouldn't be encrypted.  Otherwise, we
7916                                  * couldn't enter it in the page table, since
7917                                  * we don't want the user to see the encrypted
7918                                  * data.
7919                                  */
7920                                 ASSERT_PAGE_DECRYPTED(m);
7921
7922                                 prot = entry->protection;
7923
7924                                 if (override_nx(dst_map, entry->alias) && prot)
7925                                         prot |= VM_PROT_EXECUTE;
7926
7927                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7928
7929                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7930                                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
7931                                                &type_of_fault);
7932
7933                                 vm_object_unlock(object);
7934
7935                                 offset += PAGE_SIZE_64;
7936                                 va += PAGE_SIZE;
7937                         }
7938                 }
7939         }
7940
7941 after_adjustments:
7942
7943         /*
7944          *      Correct the page alignment for the result
7945          */
7946
7947         *dst_addr = start + (copy->offset - vm_copy_start);
7948
7949         /*
7950          *      Update the hints and the map size
7951          */
7952
7953         if (consume_on_success) {
7954                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7955         } else {
7956                 SAVE_HINT_MAP_WRITE(dst_map, last);
7957         }
7958
7959         dst_map->size += size;
7960
7961         /*
7962          *      Link in the copy
7963          */
7964
7965         if (consume_on_success) {
7966                 vm_map_copy_insert(dst_map, last, copy);
7967         } else {
7968                 vm_map_copy_remap(dst_map, last, copy, adjustment,
7969                                   cur_protection, max_protection,
7970                                   inheritance);
7971         }
7972
7973         vm_map_unlock(dst_map);
7974
7975         /*
7976          * XXX  If wiring_required, call vm_map_pageable
7977          */
7978
7979         return(KERN_SUCCESS);
7980 }
7981
7982 /*
7983  *      Routine:        vm_map_copyin
7984  *
7985  *      Description:
7986  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7987  *
7988  */
7989
7990 #undef vm_map_copyin
7991
7992 kern_return_t
7993 vm_map_copyin(
7994         vm_map_t                        src_map,
7995         vm_map_address_t        src_addr,
7996         vm_map_size_t           len,
7997         boolean_t                       src_destroy,
7998         vm_map_copy_t           *copy_result)   /* OUT */
7999 {
8000         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
8001                                         FALSE, copy_result, FALSE));
8002 }
8003
8004 /*
8005  *      Routine:        vm_map_copyin_common
8006  *
8007  *      Description:
8008  *              Copy the specified region (src_addr, len) from the
8009  *              source address space (src_map), possibly removing
8010  *              the region from the source address space (src_destroy).
8011  *
8012  *      Returns:
8013  *              A vm_map_copy_t object (copy_result), suitable for
8014  *              insertion into another address space (using vm_map_copyout),
8015  *              copying over another address space region (using
8016  *              vm_map_copy_overwrite).  If the copy is unused, it
8017  *              should be destroyed (using vm_map_copy_discard).
8018  *
8019  *      In/out conditions:
8020  *              The source map should not be locked on entry.
8021  */
8022
8023 typedef struct submap_map {
8024         vm_map_t        parent_map;
8025         vm_map_offset_t base_start;
8026         vm_map_offset_t base_end;
8027         vm_map_size_t   base_len;
8028         struct submap_map *next;
8029 } submap_map_t;
8030
8031 kern_return_t
8032 vm_map_copyin_common(
8033         vm_map_t        src_map,
8034         vm_map_address_t src_addr,
8035         vm_map_size_t   len,
8036         boolean_t       src_destroy,
8037         __unused boolean_t      src_volatile,
8038         vm_map_copy_t   *copy_result,   /* OUT */
8039         boolean_t       use_maxprot)
8040 {
8041         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
8042                                          * in multi-level lookup, this
8043                                          * entry contains the actual
8044                                          * vm_object/offset.
8045                                          */
8046         register
8047         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
8048
8049         vm_map_offset_t src_start;      /* Start of current entry --
8050                                          * where copy is taking place now
8051                                          */
8052         vm_map_offset_t src_end;        /* End of entire region to be
8053                                          * copied */
8054         vm_map_offset_t src_base;
8055         vm_map_t        base_map = src_map;
8056         boolean_t       map_share=FALSE;
8057         submap_map_t    *parent_maps = NULL;
8058
8059         register
8060         vm_map_copy_t   copy;           /* Resulting copy */
8061         vm_map_address_t        copy_addr;
8062
8063         /*
8064          *      Check for copies of zero bytes.
8065          */
8066
8067         if (len == 0) {
8068                 *copy_result = VM_MAP_COPY_NULL;
8069                 return(KERN_SUCCESS);
8070         }
8071
8072         /*
8073          *      Check that the end address doesn't overflow
8074          */
8075         src_end = src_addr + len;
8076         if (src_end < src_addr)
8077                 return KERN_INVALID_ADDRESS;
8078
8079         /*
8080          * If the copy is sufficiently small, use a kernel buffer instead
8081          * of making a virtual copy.  The theory being that the cost of
8082          * setting up VM (and taking C-O-W faults) dominates the copy costs
8083          * for small regions.
8084          */
8085         if ((len < msg_ool_size_small) && !use_maxprot)
8086                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
8087                                                    src_destroy, copy_result);
8088
8089         /*
8090          *      Compute (page aligned) start and end of region
8091          */
8092         src_start = vm_map_trunc_page(src_addr,
8093                                       VM_MAP_PAGE_MASK(src_map));
8094         src_end = vm_map_round_page(src_end,
8095                                     VM_MAP_PAGE_MASK(src_map));
8096
8097         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
8098
8099         /*
8100          *      Allocate a header element for the list.
8101          *
8102          *      Use the start and end in the header to
8103          *      remember the endpoints prior to rounding.
8104          */
8105
8106         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8107         vm_map_copy_first_entry(copy) =
8108                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8109         copy->type = VM_MAP_COPY_ENTRY_LIST;
8110         copy->cpy_hdr.nentries = 0;
8111         copy->cpy_hdr.entries_pageable = TRUE;
8112 #if 00
8113         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
8114 #else
8115         /*
8116          * The copy entries can be broken down for a variety of reasons,
8117          * so we can't guarantee that they will remain map-aligned...
8118          * Will need to adjust the first copy_entry's "vme_start" and
8119          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8120          * rather than the original map's alignment.
8121          */
8122         copy->cpy_hdr.page_shift = PAGE_SHIFT;
8123 #endif
8124
8125         vm_map_store_init( &(copy->cpy_hdr) );
8126
8127         copy->offset = src_addr;
8128         copy->size = len;
8129
8130         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8131
8132 #define RETURN(x)                                               \
8133         MACRO_BEGIN                                             \
8134         vm_map_unlock(src_map);                                 \
8135         if(src_map != base_map)                                 \
8136                 vm_map_deallocate(src_map);                     \
8137         if (new_entry != VM_MAP_ENTRY_NULL)                     \
8138                 vm_map_copy_entry_dispose(copy,new_entry);      \
8139         vm_map_copy_discard(copy);                              \
8140         {                                                       \
8141                 submap_map_t    *_ptr;                          \
8142                                                                 \
8143                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
8144                         parent_maps=parent_maps->next;          \
8145                         if (_ptr->parent_map != base_map)       \
8146                                 vm_map_deallocate(_ptr->parent_map);    \
8147                         kfree(_ptr, sizeof(submap_map_t));      \
8148                 }                                               \
8149         }                                                       \
8150         MACRO_RETURN(x);                                        \
8151         MACRO_END
8152
8153         /*
8154          *      Find the beginning of the region.
8155          */
8156
8157         vm_map_lock(src_map);
8158
8159         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
8160                 RETURN(KERN_INVALID_ADDRESS);
8161         if(!tmp_entry->is_sub_map) {
8162                 vm_map_clip_start(src_map, tmp_entry, src_start);
8163         }
8164         /* set for later submap fix-up */
8165         copy_addr = src_start;
8166
8167         /*
8168          *      Go through entries until we get to the end.
8169          */
8170
8171         while (TRUE) {
8172                 register
8173                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
8174                 vm_map_size_t   src_size;               /* Size of source
8175                                                          * map entry (in both
8176                                                          * maps)
8177                                                          */
8178
8179                 register
8180                 vm_object_t             src_object;     /* Object to copy */
8181                 vm_object_offset_t      src_offset;
8182
8183                 boolean_t       src_needs_copy;         /* Should source map
8184                                                          * be made read-only
8185                                                          * for copy-on-write?
8186                                                          */
8187
8188                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
8189
8190                 boolean_t       was_wired;              /* Was source wired? */
8191                 vm_map_version_t version;               /* Version before locks
8192                                                          * dropped to make copy
8193                                                          */
8194                 kern_return_t   result;                 /* Return value from
8195                                                          * copy_strategically.
8196                                                          */
8197                 while(tmp_entry->is_sub_map) {
8198                         vm_map_size_t submap_len;
8199                         submap_map_t *ptr;
8200
8201                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
8202                         ptr->next = parent_maps;
8203                         parent_maps = ptr;
8204                         ptr->parent_map = src_map;
8205                         ptr->base_start = src_start;
8206                         ptr->base_end = src_end;
8207                         submap_len = tmp_entry->vme_end - src_start;
8208                         if(submap_len > (src_end-src_start))
8209                                 submap_len = src_end-src_start;
8210                         ptr->base_len = submap_len;
8211
8212                         src_start -= tmp_entry->vme_start;
8213                         src_start += tmp_entry->offset;
8214                         src_end = src_start + submap_len;
8215                         src_map = tmp_entry->object.sub_map;
8216                         vm_map_lock(src_map);
8217                         /* keep an outstanding reference for all maps in */
8218                         /* the parents tree except the base map */
8219                         vm_map_reference(src_map);
8220                         vm_map_unlock(ptr->parent_map);
8221                         if (!vm_map_lookup_entry(
8222                                     src_map, src_start, &tmp_entry))
8223                                 RETURN(KERN_INVALID_ADDRESS);
8224                         map_share = TRUE;
8225                         if(!tmp_entry->is_sub_map)
8226                                 vm_map_clip_start(src_map, tmp_entry, src_start);
8227                         src_entry = tmp_entry;
8228                 }
8229                 /* we are now in the lowest level submap... */
8230
8231                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
8232                     (tmp_entry->object.vm_object->phys_contiguous)) {
8233                         /* This is not, supported for now.In future */
8234                         /* we will need to detect the phys_contig   */
8235                         /* condition and then upgrade copy_slowly   */
8236                         /* to do physical copy from the device mem  */
8237                         /* based object. We can piggy-back off of   */
8238                         /* the was wired boolean to set-up the      */
8239                         /* proper handling */
8240                         RETURN(KERN_PROTECTION_FAILURE);
8241                 }
8242                 /*
8243                  *      Create a new address map entry to hold the result.
8244                  *      Fill in the fields from the appropriate source entries.
8245                  *      We must unlock the source map to do this if we need
8246                  *      to allocate a map entry.
8247                  */
8248                 if (new_entry == VM_MAP_ENTRY_NULL) {
8249                         version.main_timestamp = src_map->timestamp;
8250                         vm_map_unlock(src_map);
8251
8252                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8253
8254                         vm_map_lock(src_map);
8255                         if ((version.main_timestamp + 1) != src_map->timestamp) {
8256                                 if (!vm_map_lookup_entry(src_map, src_start,
8257                                                          &tmp_entry)) {
8258                                         RETURN(KERN_INVALID_ADDRESS);
8259                                 }
8260                                 if (!tmp_entry->is_sub_map)
8261                                         vm_map_clip_start(src_map, tmp_entry, src_start);
8262                                 continue; /* restart w/ new tmp_entry */
8263                         }
8264                 }
8265
8266                 /*
8267                  *      Verify that the region can be read.
8268                  */
8269                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
8270                      !use_maxprot) ||
8271                     (src_entry->max_protection & VM_PROT_READ) == 0)
8272                         RETURN(KERN_PROTECTION_FAILURE);
8273
8274                 /*
8275                  *      Clip against the endpoints of the entire region.
8276                  */
8277
8278                 vm_map_clip_end(src_map, src_entry, src_end);
8279
8280                 src_size = src_entry->vme_end - src_start;
8281                 src_object = src_entry->object.vm_object;
8282                 src_offset = src_entry->offset;
8283                 was_wired = (src_entry->wired_count != 0);
8284
8285                 vm_map_entry_copy(new_entry, src_entry);
8286                 new_entry->use_pmap = FALSE; /* clr address space specifics */
8287
8288                 /*
8289                  *      Attempt non-blocking copy-on-write optimizations.
8290                  */
8291
8292                 if (src_destroy &&
8293                     (src_object == VM_OBJECT_NULL ||
8294                      (src_object->internal && !src_object->true_share
8295                       && !map_share))) {
8296                         /*
8297                          * If we are destroying the source, and the object
8298                          * is internal, we can move the object reference
8299                          * from the source to the copy.  The copy is
8300                          * copy-on-write only if the source is.
8301                          * We make another reference to the object, because
8302                          * destroying the source entry will deallocate it.
8303                          */
8304                         vm_object_reference(src_object);
8305
8306                         /*
8307                          * Copy is always unwired.  vm_map_copy_entry
8308                          * set its wired count to zero.
8309                          */
8310
8311                         goto CopySuccessful;
8312                 }
8313
8314
8315         RestartCopy:
8316                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8317                     src_object, new_entry, new_entry->object.vm_object,
8318                     was_wired, 0);
8319                 if ((src_object == VM_OBJECT_NULL ||
8320                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
8321                     vm_object_copy_quickly(
8322                             &new_entry->object.vm_object,
8323                             src_offset,
8324                             src_size,
8325                             &src_needs_copy,
8326                             &new_entry_needs_copy)) {
8327
8328                         new_entry->needs_copy = new_entry_needs_copy;
8329
8330                         /*
8331                          *      Handle copy-on-write obligations
8332                          */
8333
8334                         if (src_needs_copy && !tmp_entry->needs_copy) {
8335                                 vm_prot_t prot;
8336
8337                                 prot = src_entry->protection & ~VM_PROT_WRITE;
8338
8339                                 if (override_nx(src_map, src_entry->alias) && prot)
8340                                         prot |= VM_PROT_EXECUTE;
8341
8342                                 vm_object_pmap_protect(
8343                                         src_object,
8344                                         src_offset,
8345                                         src_size,
8346                                         (src_entry->is_shared ?
8347                                          PMAP_NULL
8348                                          : src_map->pmap),
8349                                         src_entry->vme_start,
8350                                         prot);
8351
8352                                 tmp_entry->needs_copy = TRUE;
8353                         }
8354
8355                         /*
8356                          *      The map has never been unlocked, so it's safe
8357                          *      to move to the next entry rather than doing
8358                          *      another lookup.
8359                          */
8360
8361                         goto CopySuccessful;
8362                 }
8363
8364                 /*
8365                  *      Take an object reference, so that we may
8366                  *      release the map lock(s).
8367                  */
8368
8369                 assert(src_object != VM_OBJECT_NULL);
8370                 vm_object_reference(src_object);
8371
8372                 /*
8373                  *      Record the timestamp for later verification.
8374                  *      Unlock the map.
8375                  */
8376
8377                 version.main_timestamp = src_map->timestamp;
8378                 vm_map_unlock(src_map); /* Increments timestamp once! */
8379
8380                 /*
8381                  *      Perform the copy
8382                  */
8383
8384                 if (was_wired) {
8385                 CopySlowly:
8386                         vm_object_lock(src_object);
8387                         result = vm_object_copy_slowly(
8388                                 src_object,
8389                                 src_offset,
8390                                 src_size,
8391                                 THREAD_UNINT,
8392                                 &new_entry->object.vm_object);
8393                         new_entry->offset = 0;
8394                         new_entry->needs_copy = FALSE;
8395
8396                 }
8397                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8398                          (tmp_entry->is_shared  || map_share)) {
8399                         vm_object_t new_object;
8400
8401                         vm_object_lock_shared(src_object);
8402                         new_object = vm_object_copy_delayed(
8403                                 src_object,
8404                                 src_offset,
8405                                 src_size,
8406                                 TRUE);
8407                         if (new_object == VM_OBJECT_NULL)
8408                                 goto CopySlowly;
8409
8410                         new_entry->object.vm_object = new_object;
8411                         new_entry->needs_copy = TRUE;
8412                         result = KERN_SUCCESS;
8413
8414                 } else {
8415                         result = vm_object_copy_strategically(src_object,
8416                                                               src_offset,
8417                                                               src_size,
8418                                                               &new_entry->object.vm_object,
8419                                                               &new_entry->offset,
8420                                                               &new_entry_needs_copy);
8421
8422                         new_entry->needs_copy = new_entry_needs_copy;
8423                 }
8424
8425                 if (result != KERN_SUCCESS &&
8426                     result != KERN_MEMORY_RESTART_COPY) {
8427                         vm_map_lock(src_map);
8428                         RETURN(result);
8429                 }
8430
8431                 /*
8432                  *      Throw away the extra reference
8433                  */
8434
8435                 vm_object_deallocate(src_object);
8436
8437                 /*
8438                  *      Verify that the map has not substantially
8439                  *      changed while the copy was being made.
8440                  */
8441
8442                 vm_map_lock(src_map);
8443
8444                 if ((version.main_timestamp + 1) == src_map->timestamp)
8445                         goto VerificationSuccessful;
8446
8447                 /*
8448                  *      Simple version comparison failed.
8449                  *
8450                  *      Retry the lookup and verify that the
8451                  *      same object/offset are still present.
8452                  *
8453                  *      [Note: a memory manager that colludes with
8454                  *      the calling task can detect that we have
8455                  *      cheated.  While the map was unlocked, the
8456                  *      mapping could have been changed and restored.]
8457                  */
8458
8459                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
8460                         RETURN(KERN_INVALID_ADDRESS);
8461                 }
8462
8463                 src_entry = tmp_entry;
8464                 vm_map_clip_start(src_map, src_entry, src_start);
8465
8466                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
8467                      !use_maxprot) ||
8468                     ((src_entry->max_protection & VM_PROT_READ) == 0))
8469                         goto VerificationFailed;
8470
8471                 if (src_entry->vme_end < new_entry->vme_end) {
8472                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
8473                                                    VM_MAP_COPY_PAGE_MASK(copy)));
8474                         new_entry->vme_end = src_entry->vme_end;
8475                         src_size = new_entry->vme_end - src_start;
8476                 }
8477
8478                 if ((src_entry->object.vm_object != src_object) ||
8479                     (src_entry->offset != src_offset) ) {
8480
8481                         /*
8482                          *      Verification failed.
8483                          *
8484                          *      Start over with this top-level entry.
8485                          */
8486
8487                 VerificationFailed: ;
8488
8489                         vm_object_deallocate(new_entry->object.vm_object);
8490                         tmp_entry = src_entry;
8491                         continue;
8492                 }
8493
8494                 /*
8495                  *      Verification succeeded.
8496                  */
8497
8498         VerificationSuccessful: ;
8499
8500                 if (result == KERN_MEMORY_RESTART_COPY)
8501                         goto RestartCopy;
8502
8503                 /*
8504                  *      Copy succeeded.
8505                  */
8506
8507         CopySuccessful: ;
8508
8509                 /*
8510                  *      Link in the new copy entry.
8511                  */
8512
8513                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
8514                                        new_entry);
8515
8516                 /*
8517                  *      Determine whether the entire region
8518                  *      has been copied.
8519                  */
8520                 src_base = src_start;
8521                 src_start = new_entry->vme_end;
8522                 new_entry = VM_MAP_ENTRY_NULL;
8523                 while ((src_start >= src_end) && (src_end != 0)) {
8524                         if (src_map != base_map) {
8525                                 submap_map_t    *ptr;
8526
8527                                 ptr = parent_maps;
8528                                 assert(ptr != NULL);
8529                                 parent_maps = parent_maps->next;
8530
8531                                 /* fix up the damage we did in that submap */
8532                                 vm_map_simplify_range(src_map,
8533                                                       src_base,
8534                                                       src_end);
8535
8536                                 vm_map_unlock(src_map);
8537                                 vm_map_deallocate(src_map);
8538                                 vm_map_lock(ptr->parent_map);
8539                                 src_map = ptr->parent_map;
8540                                 src_base = ptr->base_start;
8541                                 src_start = ptr->base_start + ptr->base_len;
8542                                 src_end = ptr->base_end;
8543                                 if ((src_end > src_start) &&
8544                                     !vm_map_lookup_entry(
8545                                             src_map, src_start, &tmp_entry))
8546                                         RETURN(KERN_INVALID_ADDRESS);
8547                                 kfree(ptr, sizeof(submap_map_t));
8548                                 if(parent_maps == NULL)
8549                                         map_share = FALSE;
8550                                 src_entry = tmp_entry->vme_prev;
8551                         } else
8552                                 break;
8553                 }
8554                 if ((src_start >= src_end) && (src_end != 0))
8555                         break;
8556
8557                 /*
8558                  *      Verify that there are no gaps in the region
8559                  */
8560
8561                 tmp_entry = src_entry->vme_next;
8562                 if ((tmp_entry->vme_start != src_start) ||
8563                     (tmp_entry == vm_map_to_entry(src_map))) {
8564
8565                         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
8566                             (vm_map_round_page(src_entry->vme_end,
8567                                                VM_MAP_PAGE_MASK(src_map)) ==
8568                              src_end)) {
8569                                 vm_map_entry_t last_copy_entry;
8570                                 vm_map_offset_t adjustment;
8571
8572                                 /*
8573                                  * This is the last entry in the range we
8574                                  * want and it happens to miss a few pages
8575                                  * because it is not map-aligned (must have
8576                                  * been imported from a differently-aligned
8577                                  * map).
8578                                  * Let's say we're done, but first we have
8579                                  * to compensate for the alignment adjustment
8580                                  * we're about to do before returning.
8581                                  */
8582
8583                                 last_copy_entry = vm_map_copy_last_entry(copy);
8584                                 assert(last_copy_entry !=
8585                                        vm_map_copy_to_entry(copy));
8586                                 adjustment =
8587                                         (vm_map_round_page((copy->offset +
8588                                                             copy->size),
8589                                                            VM_MAP_PAGE_MASK(src_map)) -
8590                                          vm_map_round_page((copy->offset +
8591                                                             copy->size),
8592                                                            PAGE_MASK));
8593                                 last_copy_entry->vme_end += adjustment;
8594                                 last_copy_entry->map_aligned = FALSE;
8595                                 /* ... and we're done */
8596                                 break;
8597                         }
8598
8599                         RETURN(KERN_INVALID_ADDRESS);
8600                 }
8601         }
8602
8603         /*
8604          * If the source should be destroyed, do it now, since the
8605          * copy was successful.
8606          */
8607         if (src_destroy) {
8608                 (void) vm_map_delete(
8609                         src_map,
8610                         vm_map_trunc_page(src_addr,
8611                                           VM_MAP_PAGE_MASK(src_map)),
8612                         src_end,
8613                         ((src_map == kernel_map) ?
8614                          VM_MAP_REMOVE_KUNWIRE :
8615                          VM_MAP_NO_FLAGS),
8616                         VM_MAP_NULL);
8617         } else {
8618                 /* fix up the damage we did in the base map */
8619                 vm_map_simplify_range(
8620                         src_map,
8621                         vm_map_trunc_page(src_addr,
8622                                           VM_MAP_PAGE_MASK(src_map)),
8623                         vm_map_round_page(src_end,
8624                                           VM_MAP_PAGE_MASK(src_map)));
8625         }
8626
8627         vm_map_unlock(src_map);
8628
8629         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
8630                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
8631
8632                 /* adjust alignment of first copy_entry's "vme_start" */
8633                 tmp_entry = vm_map_copy_first_entry(copy);
8634                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
8635                         vm_map_offset_t adjustment;
8636                         adjustment =
8637                                 (vm_map_trunc_page(copy->offset,
8638                                                    PAGE_MASK) -
8639                                  vm_map_trunc_page(copy->offset,
8640                                                    VM_MAP_PAGE_MASK(src_map)));
8641                         if (adjustment) {
8642                                 assert(page_aligned(adjustment));
8643                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
8644                                 tmp_entry->vme_start += adjustment;
8645                                 tmp_entry->offset += adjustment;
8646                                 copy_addr += adjustment;
8647                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8648                         }
8649                 }
8650
8651                 /* adjust alignment of last copy_entry's "vme_end" */
8652                 tmp_entry = vm_map_copy_last_entry(copy);
8653                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
8654                         vm_map_offset_t adjustment;
8655                         adjustment =
8656                                 (vm_map_round_page((copy->offset +
8657                                                     copy->size),
8658                                                    VM_MAP_PAGE_MASK(src_map)) -
8659                                  vm_map_round_page((copy->offset +
8660                                                     copy->size),
8661                                                    PAGE_MASK));
8662                         if (adjustment) {
8663                                 assert(page_aligned(adjustment));
8664                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
8665                                 tmp_entry->vme_end -= adjustment;
8666                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8667                         }
8668                 }
8669         }
8670
8671         /* Fix-up start and end points in copy.  This is necessary */
8672         /* when the various entries in the copy object were picked */
8673         /* up from different sub-maps */
8674
8675         tmp_entry = vm_map_copy_first_entry(copy);
8676         while (tmp_entry != vm_map_copy_to_entry(copy)) {
8677                 assert(VM_MAP_PAGE_ALIGNED(
8678                                copy_addr + (tmp_entry->vme_end -
8679                                             tmp_entry->vme_start),
8680                                VM_MAP_COPY_PAGE_MASK(copy)));
8681                 assert(VM_MAP_PAGE_ALIGNED(
8682                                copy_addr,
8683                                VM_MAP_COPY_PAGE_MASK(copy)));
8684
8685                 /*
8686                  * The copy_entries will be injected directly into the
8687                  * destination map and might not be "map aligned" there...
8688                  */
8689                 tmp_entry->map_aligned = FALSE;
8690
8691                 tmp_entry->vme_end = copy_addr +
8692                         (tmp_entry->vme_end - tmp_entry->vme_start);
8693                 tmp_entry->vme_start = copy_addr;
8694                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8695                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8696                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8697         }
8698
8699         *copy_result = copy;
8700         return(KERN_SUCCESS);
8701
8702 #undef  RETURN
8703 }
8704
8705 kern_return_t
8706 vm_map_copy_extract(
8707         vm_map_t                src_map,
8708         vm_map_address_t        src_addr,
8709         vm_map_size_t           len,
8710         vm_map_copy_t           *copy_result,   /* OUT */
8711         vm_prot_t               *cur_prot,      /* OUT */
8712         vm_prot_t               *max_prot)
8713 {
8714         vm_map_offset_t src_start, src_end;
8715         vm_map_copy_t   copy;
8716         kern_return_t   kr;
8717
8718         /*
8719          *      Check for copies of zero bytes.
8720          */
8721
8722         if (len == 0) {
8723                 *copy_result = VM_MAP_COPY_NULL;
8724                 return(KERN_SUCCESS);
8725         }
8726
8727         /*
8728          *      Check that the end address doesn't overflow
8729          */
8730         src_end = src_addr + len;
8731         if (src_end < src_addr)
8732                 return KERN_INVALID_ADDRESS;
8733
8734         /*
8735          *      Compute (page aligned) start and end of region
8736          */
8737         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
8738         src_end = vm_map_round_page(src_end, PAGE_MASK);
8739
8740         /*
8741          *      Allocate a header element for the list.
8742          *
8743          *      Use the start and end in the header to
8744          *      remember the endpoints prior to rounding.
8745          */
8746
8747         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8748         vm_map_copy_first_entry(copy) =
8749                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8750         copy->type = VM_MAP_COPY_ENTRY_LIST;
8751         copy->cpy_hdr.nentries = 0;
8752         copy->cpy_hdr.entries_pageable = TRUE;
8753
8754         vm_map_store_init(&copy->cpy_hdr);
8755
8756         copy->offset = 0;
8757         copy->size = len;
8758
8759         kr = vm_map_remap_extract(src_map,
8760                                   src_addr,
8761                                   len,
8762                                   FALSE, /* copy */
8763                                   &copy->cpy_hdr,
8764                                   cur_prot,
8765                                   max_prot,
8766                                   VM_INHERIT_SHARE,
8767                                   TRUE); /* pageable */
8768         if (kr != KERN_SUCCESS) {
8769                 vm_map_copy_discard(copy);
8770                 return kr;
8771         }
8772
8773         *copy_result = copy;
8774         return KERN_SUCCESS;
8775 }
8776
8777 /*
8778  *      vm_map_copyin_object:
8779  *
8780  *      Create a copy object from an object.
8781  *      Our caller donates an object reference.
8782  */
8783
8784 kern_return_t
8785 vm_map_copyin_object(
8786         vm_object_t             object,
8787         vm_object_offset_t      offset, /* offset of region in object */
8788         vm_object_size_t        size,   /* size of region in object */
8789         vm_map_copy_t   *copy_result)   /* OUT */
8790 {
8791         vm_map_copy_t   copy;           /* Resulting copy */
8792
8793         /*
8794          *      We drop the object into a special copy object
8795          *      that contains the object directly.
8796          */
8797
8798         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8799         copy->type = VM_MAP_COPY_OBJECT;
8800         copy->cpy_object = object;
8801         copy->offset = offset;
8802         copy->size = size;
8803
8804         *copy_result = copy;
8805         return(KERN_SUCCESS);
8806 }
8807
8808 static void
8809 vm_map_fork_share(
8810         vm_map_t        old_map,
8811         vm_map_entry_t  old_entry,
8812         vm_map_t        new_map)
8813 {
8814         vm_object_t     object;
8815         vm_map_entry_t  new_entry;
8816
8817         /*
8818          *      New sharing code.  New map entry
8819          *      references original object.  Internal
8820          *      objects use asynchronous copy algorithm for
8821          *      future copies.  First make sure we have
8822          *      the right object.  If we need a shadow,
8823          *      or someone else already has one, then
8824          *      make a new shadow and share it.
8825          */
8826
8827         object = old_entry->object.vm_object;
8828         if (old_entry->is_sub_map) {
8829                 assert(old_entry->wired_count == 0);
8830 #ifndef NO_NESTED_PMAP
8831                 if(old_entry->use_pmap) {
8832                         kern_return_t   result;
8833
8834                         result = pmap_nest(new_map->pmap,
8835                                            (old_entry->object.sub_map)->pmap,
8836                                            (addr64_t)old_entry->vme_start,
8837                                            (addr64_t)old_entry->vme_start,
8838                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8839                         if(result)
8840                                 panic("vm_map_fork_share: pmap_nest failed!");
8841                 }
8842 #endif  /* NO_NESTED_PMAP */
8843         } else if (object == VM_OBJECT_NULL) {
8844                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8845                                                             old_entry->vme_start));
8846                 old_entry->offset = 0;
8847                 old_entry->object.vm_object = object;
8848                 assert(!old_entry->needs_copy);
8849         } else if (object->copy_strategy !=
8850                    MEMORY_OBJECT_COPY_SYMMETRIC) {
8851
8852                 /*
8853                  *      We are already using an asymmetric
8854                  *      copy, and therefore we already have
8855                  *      the right object.
8856                  */
8857
8858                 assert(! old_entry->needs_copy);
8859         }
8860         else if (old_entry->needs_copy ||       /* case 1 */
8861                  object->shadowed ||            /* case 2 */
8862                  (!object->true_share &&        /* case 3 */
8863                   !old_entry->is_shared &&
8864                   (object->vo_size >
8865                    (vm_map_size_t)(old_entry->vme_end -
8866                                    old_entry->vme_start)))) {
8867
8868                 /*
8869                  *      We need to create a shadow.
8870                  *      There are three cases here.
8871                  *      In the first case, we need to
8872                  *      complete a deferred symmetrical
8873                  *      copy that we participated in.
8874                  *      In the second and third cases,
8875                  *      we need to create the shadow so
8876                  *      that changes that we make to the
8877                  *      object do not interfere with
8878                  *      any symmetrical copies which
8879                  *      have occured (case 2) or which
8880                  *      might occur (case 3).
8881                  *
8882                  *      The first case is when we had
8883                  *      deferred shadow object creation
8884                  *      via the entry->needs_copy mechanism.
8885                  *      This mechanism only works when
8886                  *      only one entry points to the source
8887                  *      object, and we are about to create
8888                  *      a second entry pointing to the
8889                  *      same object. The problem is that
8890                  *      there is no way of mapping from
8891                  *      an object to the entries pointing
8892                  *      to it. (Deferred shadow creation
8893                  *      works with one entry because occurs
8894                  *      at fault time, and we walk from the
8895                  *      entry to the object when handling
8896                  *      the fault.)
8897                  *
8898                  *      The second case is when the object
8899                  *      to be shared has already been copied
8900                  *      with a symmetric copy, but we point
8901                  *      directly to the object without
8902                  *      needs_copy set in our entry. (This
8903                  *      can happen because different ranges
8904                  *      of an object can be pointed to by
8905                  *      different entries. In particular,
8906                  *      a single entry pointing to an object
8907                  *      can be split by a call to vm_inherit,
8908                  *      which, combined with task_create, can
8909                  *      result in the different entries
8910                  *      having different needs_copy values.)
8911                  *      The shadowed flag in the object allows
8912                  *      us to detect this case. The problem
8913                  *      with this case is that if this object
8914                  *      has or will have shadows, then we
8915                  *      must not perform an asymmetric copy
8916                  *      of this object, since such a copy
8917                  *      allows the object to be changed, which
8918                  *      will break the previous symmetrical
8919                  *      copies (which rely upon the object
8920                  *      not changing). In a sense, the shadowed
8921                  *      flag says "don't change this object".
8922                  *      We fix this by creating a shadow
8923                  *      object for this object, and sharing
8924                  *      that. This works because we are free
8925                  *      to change the shadow object (and thus
8926                  *      to use an asymmetric copy strategy);
8927                  *      this is also semantically correct,
8928                  *      since this object is temporary, and
8929                  *      therefore a copy of the object is
8930                  *      as good as the object itself. (This
8931                  *      is not true for permanent objects,
8932                  *      since the pager needs to see changes,
8933                  *      which won't happen if the changes
8934                  *      are made to a copy.)
8935                  *
8936                  *      The third case is when the object
8937                  *      to be shared has parts sticking
8938                  *      outside of the entry we're working
8939                  *      with, and thus may in the future
8940                  *      be subject to a symmetrical copy.
8941                  *      (This is a preemptive version of
8942                  *      case 2.)
8943                  */
8944                 vm_object_shadow(&old_entry->object.vm_object,
8945                                  &old_entry->offset,
8946                                  (vm_map_size_t) (old_entry->vme_end -
8947                                                   old_entry->vme_start));
8948
8949                 /*
8950                  *      If we're making a shadow for other than
8951                  *      copy on write reasons, then we have
8952                  *      to remove write permission.
8953                  */
8954
8955                 if (!old_entry->needs_copy &&
8956                     (old_entry->protection & VM_PROT_WRITE)) {
8957                         vm_prot_t prot;
8958
8959                         prot = old_entry->protection & ~VM_PROT_WRITE;
8960
8961                         if (override_nx(old_map, old_entry->alias) && prot)
8962                                 prot |= VM_PROT_EXECUTE;
8963
8964                         if (old_map->mapped_in_other_pmaps) {
8965                                 vm_object_pmap_protect(
8966                                         old_entry->object.vm_object,
8967                                         old_entry->offset,
8968                                         (old_entry->vme_end -
8969                                          old_entry->vme_start),
8970                                         PMAP_NULL,
8971                                         old_entry->vme_start,
8972                                         prot);
8973                         } else {
8974                                 pmap_protect(old_map->pmap,
8975                                              old_entry->vme_start,
8976                                              old_entry->vme_end,
8977                                              prot);
8978                         }
8979                 }
8980
8981                 old_entry->needs_copy = FALSE;
8982                 object = old_entry->object.vm_object;
8983         }
8984
8985
8986         /*
8987          *      If object was using a symmetric copy strategy,
8988          *      change its copy strategy to the default
8989          *      asymmetric copy strategy, which is copy_delay
8990          *      in the non-norma case and copy_call in the
8991          *      norma case. Bump the reference count for the
8992          *      new entry.
8993          */
8994
8995         if(old_entry->is_sub_map) {
8996                 vm_map_lock(old_entry->object.sub_map);
8997                 vm_map_reference(old_entry->object.sub_map);
8998                 vm_map_unlock(old_entry->object.sub_map);
8999         } else {
9000                 vm_object_lock(object);
9001                 vm_object_reference_locked(object);
9002                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
9003                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
9004                 }
9005                 vm_object_unlock(object);
9006         }
9007
9008         /*
9009          *      Clone the entry, using object ref from above.
9010          *      Mark both entries as shared.
9011          */
9012
9013         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
9014                                                           * map or descendants */
9015         vm_map_entry_copy(new_entry, old_entry);
9016         old_entry->is_shared = TRUE;
9017         new_entry->is_shared = TRUE;
9018
9019         /*
9020          *      Insert the entry into the new map -- we
9021          *      know we're inserting at the end of the new
9022          *      map.
9023          */
9024
9025         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
9026
9027         /*
9028          *      Update the physical map
9029          */
9030
9031         if (old_entry->is_sub_map) {
9032                 /* Bill Angell pmap support goes here */
9033         } else {
9034                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
9035                           old_entry->vme_end - old_entry->vme_start,
9036                           old_entry->vme_start);
9037         }
9038 }
9039
9040 static boolean_t
9041 vm_map_fork_copy(
9042         vm_map_t        old_map,
9043         vm_map_entry_t  *old_entry_p,
9044         vm_map_t        new_map)
9045 {
9046         vm_map_entry_t old_entry = *old_entry_p;
9047         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
9048         vm_map_offset_t start = old_entry->vme_start;
9049         vm_map_copy_t copy;
9050         vm_map_entry_t last = vm_map_last_entry(new_map);
9051
9052         vm_map_unlock(old_map);
9053         /*
9054          *      Use maxprot version of copyin because we
9055          *      care about whether this memory can ever
9056          *      be accessed, not just whether it's accessible
9057          *      right now.
9058          */
9059         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
9060             != KERN_SUCCESS) {
9061                 /*
9062                  *      The map might have changed while it
9063                  *      was unlocked, check it again.  Skip
9064                  *      any blank space or permanently
9065                  *      unreadable region.
9066                  */
9067                 vm_map_lock(old_map);
9068                 if (!vm_map_lookup_entry(old_map, start, &last) ||
9069                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
9070                         last = last->vme_next;
9071                 }
9072                 *old_entry_p = last;
9073
9074                 /*
9075                  * XXX  For some error returns, want to
9076                  * XXX  skip to the next element.  Note
9077                  *      that INVALID_ADDRESS and
9078                  *      PROTECTION_FAILURE are handled above.
9079                  */
9080
9081                 return FALSE;
9082         }
9083
9084         /*
9085          *      Insert the copy into the new map
9086          */
9087
9088         vm_map_copy_insert(new_map, last, copy);
9089
9090         /*
9091          *      Pick up the traversal at the end of
9092          *      the copied region.
9093          */
9094
9095         vm_map_lock(old_map);
9096         start += entry_size;
9097         if (! vm_map_lookup_entry(old_map, start, &last)) {
9098                 last = last->vme_next;
9099         } else {
9100                 if (last->vme_start == start) {
9101                         /*
9102                          * No need to clip here and we don't
9103                          * want to cause any unnecessary
9104                          * unnesting...
9105                          */
9106                 } else {
9107                         vm_map_clip_start(old_map, last, start);
9108                 }
9109         }
9110         *old_entry_p = last;
9111
9112         return TRUE;
9113 }
9114
9115 /*
9116  *      vm_map_fork:
9117  *
9118  *      Create and return a new map based on the old
9119  *      map, according to the inheritance values on the
9120  *      regions in that map.
9121  *
9122  *      The source map must not be locked.
9123  */
9124 vm_map_t
9125 vm_map_fork(
9126         ledger_t        ledger,
9127         vm_map_t        old_map)
9128 {
9129         pmap_t          new_pmap;
9130         vm_map_t        new_map;
9131         vm_map_entry_t  old_entry;
9132         vm_map_size_t   new_size = 0, entry_size;
9133         vm_map_entry_t  new_entry;
9134         boolean_t       src_needs_copy;
9135         boolean_t       new_entry_needs_copy;
9136
9137         new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
9138 #if defined(__i386__) || defined(__x86_64__)
9139                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
9140 #else
9141 #error Unknown architecture.
9142 #endif
9143                                );
9144
9145         vm_map_reference_swap(old_map);
9146         vm_map_lock(old_map);
9147
9148         new_map = vm_map_create(new_pmap,
9149                                 old_map->min_offset,
9150                                 old_map->max_offset,
9151                                 old_map->hdr.entries_pageable);
9152         /* inherit the parent map's page size */
9153         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
9154         for (
9155                 old_entry = vm_map_first_entry(old_map);
9156                 old_entry != vm_map_to_entry(old_map);
9157                 ) {
9158
9159                 entry_size = old_entry->vme_end - old_entry->vme_start;
9160
9161                 switch (old_entry->inheritance) {
9162                 case VM_INHERIT_NONE:
9163                         break;
9164
9165                 case VM_INHERIT_SHARE:
9166                         vm_map_fork_share(old_map, old_entry, new_map);
9167                         new_size += entry_size;
9168                         break;
9169
9170                 case VM_INHERIT_COPY:
9171
9172                         /*
9173                          *      Inline the copy_quickly case;
9174                          *      upon failure, fall back on call
9175                          *      to vm_map_fork_copy.
9176                          */
9177
9178                         if(old_entry->is_sub_map)
9179                                 break;
9180                         if ((old_entry->wired_count != 0) ||
9181                             ((old_entry->object.vm_object != NULL) &&
9182                              (old_entry->object.vm_object->true_share))) {
9183                                 goto slow_vm_map_fork_copy;
9184                         }
9185
9186                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
9187                         vm_map_entry_copy(new_entry, old_entry);
9188                         /* clear address space specifics */
9189                         new_entry->use_pmap = FALSE;
9190
9191                         if (! vm_object_copy_quickly(
9192                                     &new_entry->object.vm_object,
9193                                     old_entry->offset,
9194                                     (old_entry->vme_end -
9195                                      old_entry->vme_start),
9196                                     &src_needs_copy,
9197                                     &new_entry_needs_copy)) {
9198                                 vm_map_entry_dispose(new_map, new_entry);
9199                                 goto slow_vm_map_fork_copy;
9200                         }
9201
9202                         /*
9203                          *      Handle copy-on-write obligations
9204                          */
9205
9206                         if (src_needs_copy && !old_entry->needs_copy) {
9207                                 vm_prot_t prot;
9208
9209                                 prot = old_entry->protection & ~VM_PROT_WRITE;
9210
9211                                 if (override_nx(old_map, old_entry->alias) && prot)
9212                                         prot |= VM_PROT_EXECUTE;
9213
9214                                 vm_object_pmap_protect(
9215                                         old_entry->object.vm_object,
9216                                         old_entry->offset,
9217                                         (old_entry->vme_end -
9218                                          old_entry->vme_start),
9219                                         ((old_entry->is_shared
9220                                           || old_map->mapped_in_other_pmaps)
9221                                          ? PMAP_NULL :
9222                                          old_map->pmap),
9223                                         old_entry->vme_start,
9224                                         prot);
9225
9226                                 old_entry->needs_copy = TRUE;
9227                         }
9228                         new_entry->needs_copy = new_entry_needs_copy;
9229
9230                         /*
9231                          *      Insert the entry at the end
9232                          *      of the map.
9233                          */
9234
9235                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
9236                                           new_entry);
9237                         new_size += entry_size;
9238                         break;
9239
9240                 slow_vm_map_fork_copy:
9241                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
9242                                 new_size += entry_size;
9243                         }
9244                         continue;
9245                 }
9246                 old_entry = old_entry->vme_next;
9247         }
9248
9249         new_map->size = new_size;
9250         vm_map_unlock(old_map);
9251         vm_map_deallocate(old_map);
9252
9253         return(new_map);
9254 }
9255
9256 /*
9257  * vm_map_exec:
9258  *
9259  *      Setup the "new_map" with the proper execution environment according
9260  *      to the type of executable (platform, 64bit, chroot environment).
9261  *      Map the comm page and shared region, etc...
9262  */
9263 kern_return_t
9264 vm_map_exec(
9265         vm_map_t        new_map,
9266         task_t          task,
9267         void            *fsroot,
9268         cpu_type_t      cpu)
9269 {
9270         SHARED_REGION_TRACE_DEBUG(
9271                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9272                  current_task(), new_map, task, fsroot, cpu));
9273         (void) vm_commpage_enter(new_map, task);
9274         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
9275         SHARED_REGION_TRACE_DEBUG(
9276                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9277                  current_task(), new_map, task, fsroot, cpu));
9278         return KERN_SUCCESS;
9279 }
9280
9281 /*
9282  *      vm_map_lookup_locked:
9283  *
9284  *      Finds the VM object, offset, and
9285  *      protection for a given virtual address in the
9286  *      specified map, assuming a page fault of the
9287  *      type specified.
9288  *
9289  *      Returns the (object, offset, protection) for
9290  *      this address, whether it is wired down, and whether
9291  *      this map has the only reference to the data in question.
9292  *      In order to later verify this lookup, a "version"
9293  *      is returned.
9294  *
9295  *      The map MUST be locked by the caller and WILL be
9296  *      locked on exit.  In order to guarantee the
9297  *      existence of the returned object, it is returned
9298  *      locked.
9299  *
9300  *      If a lookup is requested with "write protection"
9301  *      specified, the map may be changed to perform virtual
9302  *      copying operations, although the data referenced will
9303  *      remain the same.
9304  */
9305 kern_return_t
9306 vm_map_lookup_locked(
9307         vm_map_t                *var_map,       /* IN/OUT */
9308         vm_map_offset_t         vaddr,
9309         vm_prot_t               fault_type,
9310         int                     object_lock_type,
9311         vm_map_version_t        *out_version,   /* OUT */
9312         vm_object_t             *object,        /* OUT */
9313         vm_object_offset_t      *offset,        /* OUT */
9314         vm_prot_t               *out_prot,      /* OUT */
9315         boolean_t               *wired,         /* OUT */
9316         vm_object_fault_info_t  fault_info,     /* OUT */
9317         vm_map_t                *real_map)
9318 {
9319         vm_map_entry_t                  entry;
9320         register vm_map_t               map = *var_map;
9321         vm_map_t                        old_map = *var_map;
9322         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
9323         vm_map_offset_t                 cow_parent_vaddr = 0;
9324         vm_map_offset_t                 old_start = 0;
9325         vm_map_offset_t                 old_end = 0;
9326         register vm_prot_t              prot;
9327         boolean_t                       mask_protections;
9328         vm_prot_t                       original_fault_type;
9329
9330         /*
9331          * VM_PROT_MASK means that the caller wants us to use "fault_type"
9332          * as a mask against the mapping's actual protections, not as an
9333          * absolute value.
9334          */
9335         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
9336         fault_type &= ~VM_PROT_IS_MASK;
9337         original_fault_type = fault_type;
9338
9339         *real_map = map;
9340
9341 RetryLookup:
9342         fault_type = original_fault_type;
9343
9344         /*
9345          *      If the map has an interesting hint, try it before calling
9346          *      full blown lookup routine.
9347          */
9348         entry = map->hint;
9349
9350         if ((entry == vm_map_to_entry(map)) ||
9351             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
9352                 vm_map_entry_t  tmp_entry;
9353
9354                 /*
9355                  *      Entry was either not a valid hint, or the vaddr
9356                  *      was not contained in the entry, so do a full lookup.
9357                  */
9358                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
9359                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
9360                                 vm_map_unlock(cow_sub_map_parent);
9361                         if((*real_map != map)
9362                            && (*real_map != cow_sub_map_parent))
9363                                 vm_map_unlock(*real_map);
9364                         return KERN_INVALID_ADDRESS;
9365                 }
9366
9367                 entry = tmp_entry;
9368         }
9369         if(map == old_map) {
9370                 old_start = entry->vme_start;
9371                 old_end = entry->vme_end;
9372         }
9373
9374         /*
9375          *      Handle submaps.  Drop lock on upper map, submap is
9376          *      returned locked.
9377          */
9378
9379 submap_recurse:
9380         if (entry->is_sub_map) {
9381                 vm_map_offset_t         local_vaddr;
9382                 vm_map_offset_t         end_delta;
9383                 vm_map_offset_t         start_delta;
9384                 vm_map_entry_t          submap_entry;
9385                 boolean_t               mapped_needs_copy=FALSE;
9386
9387                 local_vaddr = vaddr;
9388
9389                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
9390                         /* if real_map equals map we unlock below */
9391                         if ((*real_map != map) &&
9392                             (*real_map != cow_sub_map_parent))
9393                                 vm_map_unlock(*real_map);
9394                         *real_map = entry->object.sub_map;
9395                 }
9396
9397                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
9398                         if (!mapped_needs_copy) {
9399                                 if (vm_map_lock_read_to_write(map)) {
9400                                         vm_map_lock_read(map);
9401                                         *real_map = map;
9402                                         goto RetryLookup;
9403                                 }
9404                                 vm_map_lock_read(entry->object.sub_map);
9405                                 *var_map = entry->object.sub_map;
9406                                 cow_sub_map_parent = map;
9407                                 /* reset base to map before cow object */
9408                                 /* this is the map which will accept   */
9409                                 /* the new cow object */
9410                                 old_start = entry->vme_start;
9411                                 old_end = entry->vme_end;
9412                                 cow_parent_vaddr = vaddr;
9413                                 mapped_needs_copy = TRUE;
9414                         } else {
9415                                 vm_map_lock_read(entry->object.sub_map);
9416                                 *var_map = entry->object.sub_map;
9417                                 if((cow_sub_map_parent != map) &&
9418                                    (*real_map != map))
9419                                         vm_map_unlock(map);
9420                         }
9421                 } else {
9422                         vm_map_lock_read(entry->object.sub_map);
9423                         *var_map = entry->object.sub_map;
9424                         /* leave map locked if it is a target */
9425                         /* cow sub_map above otherwise, just  */
9426                         /* follow the maps down to the object */
9427                         /* here we unlock knowing we are not  */
9428                         /* revisiting the map.  */
9429                         if((*real_map != map) && (map != cow_sub_map_parent))
9430                                 vm_map_unlock_read(map);
9431                 }
9432
9433                 map = *var_map;
9434
9435                 /* calculate the offset in the submap for vaddr */
9436                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
9437
9438         RetrySubMap:
9439                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
9440                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
9441                                 vm_map_unlock(cow_sub_map_parent);
9442                         }
9443                         if((*real_map != map)
9444                            && (*real_map != cow_sub_map_parent)) {
9445                                 vm_map_unlock(*real_map);
9446                         }
9447                         *real_map = map;
9448                         return KERN_INVALID_ADDRESS;
9449                 }
9450
9451                 /* find the attenuated shadow of the underlying object */
9452                 /* on our target map */
9453
9454                 /* in english the submap object may extend beyond the     */
9455                 /* region mapped by the entry or, may only fill a portion */
9456                 /* of it.  For our purposes, we only care if the object   */
9457                 /* doesn't fill.  In this case the area which will        */
9458                 /* ultimately be clipped in the top map will only need    */
9459                 /* to be as big as the portion of the underlying entry    */
9460                 /* which is mapped */
9461                 start_delta = submap_entry->vme_start > entry->offset ?
9462                         submap_entry->vme_start - entry->offset : 0;
9463
9464                 end_delta =
9465                         (entry->offset + start_delta + (old_end - old_start)) <=
9466                         submap_entry->vme_end ?
9467                         0 : (entry->offset +
9468                              (old_end - old_start))
9469                         - submap_entry->vme_end;
9470
9471                 old_start += start_delta;
9472                 old_end -= end_delta;
9473
9474                 if(submap_entry->is_sub_map) {
9475                         entry = submap_entry;
9476                         vaddr = local_vaddr;
9477                         goto submap_recurse;
9478                 }
9479
9480                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
9481
9482                         vm_object_t     sub_object, copy_object;
9483                         vm_object_offset_t copy_offset;
9484                         vm_map_offset_t local_start;
9485                         vm_map_offset_t local_end;
9486                         boolean_t               copied_slowly = FALSE;
9487
9488                         if (vm_map_lock_read_to_write(map)) {
9489                                 vm_map_lock_read(map);
9490                                 old_start -= start_delta;
9491                                 old_end += end_delta;
9492                                 goto RetrySubMap;
9493                         }
9494
9495
9496                         sub_object = submap_entry->object.vm_object;
9497                         if (sub_object == VM_OBJECT_NULL) {
9498                                 sub_object =
9499                                         vm_object_allocate(
9500                                                 (vm_map_size_t)
9501                                                 (submap_entry->vme_end -
9502                                                  submap_entry->vme_start));
9503                                 submap_entry->object.vm_object = sub_object;
9504                                 submap_entry->offset = 0;
9505                         }
9506                         local_start =  local_vaddr -
9507                                 (cow_parent_vaddr - old_start);
9508                         local_end = local_vaddr +
9509                                 (old_end - cow_parent_vaddr);
9510                         vm_map_clip_start(map, submap_entry, local_start);
9511                         vm_map_clip_end(map, submap_entry, local_end);
9512                         /* unnesting was done in vm_map_clip_start/end() */
9513                         assert(!submap_entry->use_pmap);
9514
9515                         /* This is the COW case, lets connect */
9516                         /* an entry in our space to the underlying */
9517                         /* object in the submap, bypassing the  */
9518                         /* submap. */
9519
9520
9521                         if(submap_entry->wired_count != 0 ||
9522                            (sub_object->copy_strategy ==
9523                             MEMORY_OBJECT_COPY_NONE)) {
9524                                 vm_object_lock(sub_object);
9525                                 vm_object_copy_slowly(sub_object,
9526                                                       submap_entry->offset,
9527                                                       (submap_entry->vme_end -
9528                                                        submap_entry->vme_start),
9529                                                       FALSE,
9530                                                       &copy_object);
9531                                 copied_slowly = TRUE;
9532                         } else {
9533
9534                                 /* set up shadow object */
9535                                 copy_object = sub_object;
9536                                 vm_object_reference(copy_object);
9537                                 sub_object->shadowed = TRUE;
9538                                 submap_entry->needs_copy = TRUE;
9539
9540                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
9541
9542                                 if (override_nx(old_map, submap_entry->alias) && prot)
9543                                         prot |= VM_PROT_EXECUTE;
9544
9545                                 vm_object_pmap_protect(
9546                                         sub_object,
9547                                         submap_entry->offset,
9548                                         submap_entry->vme_end -
9549                                         submap_entry->vme_start,
9550                                         (submap_entry->is_shared
9551                                          || map->mapped_in_other_pmaps) ?
9552                                         PMAP_NULL : map->pmap,
9553                                         submap_entry->vme_start,
9554                                         prot);
9555                         }
9556
9557                         /*
9558                          * Adjust the fault offset to the submap entry.
9559                          */
9560                         copy_offset = (local_vaddr -
9561                                        submap_entry->vme_start +
9562                                        submap_entry->offset);
9563
9564                         /* This works diffently than the   */
9565                         /* normal submap case. We go back  */
9566                         /* to the parent of the cow map and*/
9567                         /* clip out the target portion of  */
9568                         /* the sub_map, substituting the   */
9569                         /* new copy object,                */
9570
9571                         vm_map_unlock(map);
9572                         local_start = old_start;
9573                         local_end = old_end;
9574                         map = cow_sub_map_parent;
9575                         *var_map = cow_sub_map_parent;
9576                         vaddr = cow_parent_vaddr;
9577                         cow_sub_map_parent = NULL;
9578
9579                         if(!vm_map_lookup_entry(map,
9580                                                 vaddr, &entry)) {
9581                                 vm_object_deallocate(
9582                                         copy_object);
9583                                 vm_map_lock_write_to_read(map);
9584                                 return KERN_INVALID_ADDRESS;
9585                         }
9586
9587                         /* clip out the portion of space */
9588                         /* mapped by the sub map which   */
9589                         /* corresponds to the underlying */
9590                         /* object */
9591
9592                         /*
9593                          * Clip (and unnest) the smallest nested chunk
9594                          * possible around the faulting address...
9595                          */
9596                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
9597                         local_end = local_start + pmap_nesting_size_min;
9598                         /*
9599                          * ... but don't go beyond the "old_start" to "old_end"
9600                          * range, to avoid spanning over another VM region
9601                          * with a possibly different VM object and/or offset.
9602                          */
9603                         if (local_start < old_start) {
9604                                 local_start = old_start;
9605                         }
9606                         if (local_end > old_end) {
9607                                 local_end = old_end;
9608                         }
9609                         /*
9610                          * Adjust copy_offset to the start of the range.
9611                          */
9612                         copy_offset -= (vaddr - local_start);
9613
9614                         vm_map_clip_start(map, entry, local_start);
9615                         vm_map_clip_end(map, entry, local_end);
9616                         /* unnesting was done in vm_map_clip_start/end() */
9617                         assert(!entry->use_pmap);
9618
9619                         /* substitute copy object for */
9620                         /* shared map entry           */
9621                         vm_map_deallocate(entry->object.sub_map);
9622                         entry->is_sub_map = FALSE;
9623                         entry->object.vm_object = copy_object;
9624
9625                         /* propagate the submap entry's protections */
9626                         entry->protection |= submap_entry->protection;
9627                         entry->max_protection |= submap_entry->max_protection;
9628
9629                         if(copied_slowly) {
9630                                 entry->offset = local_start - old_start;
9631                                 entry->needs_copy = FALSE;
9632                                 entry->is_shared = FALSE;
9633                         } else {
9634                                 entry->offset = copy_offset;
9635                                 entry->needs_copy = TRUE;
9636                                 if(entry->inheritance == VM_INHERIT_SHARE)
9637                                         entry->inheritance = VM_INHERIT_COPY;
9638                                 if (map != old_map)
9639                                         entry->is_shared = TRUE;
9640                         }
9641                         if(entry->inheritance == VM_INHERIT_SHARE)
9642                                 entry->inheritance = VM_INHERIT_COPY;
9643
9644                         vm_map_lock_write_to_read(map);
9645                 } else {
9646                         if((cow_sub_map_parent)
9647                            && (cow_sub_map_parent != *real_map)
9648                            && (cow_sub_map_parent != map)) {
9649                                 vm_map_unlock(cow_sub_map_parent);
9650                         }
9651                         entry = submap_entry;
9652                         vaddr = local_vaddr;
9653                 }
9654         }
9655
9656         /*
9657          *      Check whether this task is allowed to have
9658          *      this page.
9659          */
9660
9661         prot = entry->protection;
9662
9663         if (override_nx(old_map, entry->alias) && prot) {
9664                 /*
9665                  * HACK -- if not a stack, then allow execution
9666                  */
9667                 prot |= VM_PROT_EXECUTE;
9668         }
9669
9670         if (mask_protections) {
9671                 fault_type &= prot;
9672                 if (fault_type == VM_PROT_NONE) {
9673                         goto protection_failure;
9674                 }
9675         }
9676         if ((fault_type & (prot)) != fault_type) {
9677         protection_failure:
9678                 if (*real_map != map) {
9679                         vm_map_unlock(*real_map);
9680                 }
9681                 *real_map = map;
9682
9683                 if ((fault_type & VM_PROT_EXECUTE) && prot)
9684                         log_stack_execution_failure((addr64_t)vaddr, prot);
9685
9686                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
9687                 return KERN_PROTECTION_FAILURE;
9688         }
9689
9690         /*
9691          *      If this page is not pageable, we have to get
9692          *      it for all possible accesses.
9693          */
9694
9695         *wired = (entry->wired_count != 0);
9696         if (*wired)
9697                 fault_type = prot;
9698
9699         /*
9700          *      If the entry was copy-on-write, we either ...
9701          */
9702
9703         if (entry->needs_copy) {
9704                 /*
9705                  *      If we want to write the page, we may as well
9706                  *      handle that now since we've got the map locked.
9707                  *
9708                  *      If we don't need to write the page, we just
9709                  *      demote the permissions allowed.
9710                  */
9711
9712                 if ((fault_type & VM_PROT_WRITE) || *wired) {
9713                         /*
9714                          *      Make a new object, and place it in the
9715                          *      object chain.  Note that no new references
9716                          *      have appeared -- one just moved from the
9717                          *      map to the new object.
9718                          */
9719
9720                         if (vm_map_lock_read_to_write(map)) {
9721                                 vm_map_lock_read(map);
9722                                 goto RetryLookup;
9723                         }
9724                         vm_object_shadow(&entry->object.vm_object,
9725                                          &entry->offset,
9726                                          (vm_map_size_t) (entry->vme_end -
9727                                                           entry->vme_start));
9728
9729                         entry->object.vm_object->shadowed = TRUE;
9730                         entry->needs_copy = FALSE;
9731                         vm_map_lock_write_to_read(map);
9732                 }
9733                 else {
9734                         /*
9735                          *      We're attempting to read a copy-on-write
9736                          *      page -- don't allow writes.
9737                          */
9738
9739                         prot &= (~VM_PROT_WRITE);
9740                 }
9741         }
9742
9743         /*
9744          *      Create an object if necessary.
9745          */
9746         if (entry->object.vm_object == VM_OBJECT_NULL) {
9747
9748                 if (vm_map_lock_read_to_write(map)) {
9749                         vm_map_lock_read(map);
9750                         goto RetryLookup;
9751                 }
9752
9753                 entry->object.vm_object = vm_object_allocate(
9754                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
9755                 entry->offset = 0;
9756                 vm_map_lock_write_to_read(map);
9757         }
9758
9759         /*
9760          *      Return the object/offset from this entry.  If the entry
9761          *      was copy-on-write or empty, it has been fixed up.  Also
9762          *      return the protection.
9763          */
9764
9765         *offset = (vaddr - entry->vme_start) + entry->offset;
9766         *object = entry->object.vm_object;
9767         *out_prot = prot;
9768
9769         if (fault_info) {
9770                 fault_info->interruptible = THREAD_UNINT; /* for now... */
9771                 /* ... the caller will change "interruptible" if needed */
9772                 fault_info->cluster_size = 0;
9773                 fault_info->user_tag = entry->alias;
9774                 fault_info->behavior = entry->behavior;
9775                 fault_info->lo_offset = entry->offset;
9776                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9777                 fault_info->no_cache  = entry->no_cache;
9778                 fault_info->stealth = FALSE;
9779                 fault_info->io_sync = FALSE;
9780                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9781                 fault_info->mark_zf_absent = FALSE;
9782                 fault_info->batch_pmap_op = FALSE;
9783         }
9784
9785         /*
9786          *      Lock the object to prevent it from disappearing
9787          */
9788         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9789                 vm_object_lock(*object);
9790         else
9791                 vm_object_lock_shared(*object);
9792
9793         /*
9794          *      Save the version number
9795          */
9796
9797         out_version->main_timestamp = map->timestamp;
9798
9799         return KERN_SUCCESS;
9800 }
9801
9802
9803 /*
9804  *      vm_map_verify:
9805  *
9806  *      Verifies that the map in question has not changed
9807  *      since the given version.  If successful, the map
9808  *      will not change until vm_map_verify_done() is called.
9809  */
9810 boolean_t
9811 vm_map_verify(
9812         register vm_map_t               map,
9813         register vm_map_version_t       *version)       /* REF */
9814 {
9815         boolean_t       result;
9816
9817         vm_map_lock_read(map);
9818         result = (map->timestamp == version->main_timestamp);
9819
9820         if (!result)
9821                 vm_map_unlock_read(map);
9822
9823         return(result);
9824 }
9825
9826 /*
9827  *      vm_map_verify_done:
9828  *
9829  *      Releases locks acquired by a vm_map_verify.
9830  *
9831  *      This is now a macro in vm/vm_map.h.  It does a
9832  *      vm_map_unlock_read on the map.
9833  */
9834
9835
9836 /*
9837  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9838  *      Goes away after regular vm_region_recurse function migrates to
9839  *      64 bits
9840  *      vm_region_recurse: A form of vm_region which follows the
9841  *      submaps in a target map
9842  *
9843  */
9844
9845 kern_return_t
9846 vm_map_region_recurse_64(
9847         vm_map_t                 map,
9848         vm_map_offset_t *address,               /* IN/OUT */
9849         vm_map_size_t           *size,                  /* OUT */
9850         natural_t               *nesting_depth, /* IN/OUT */
9851         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
9852         mach_msg_type_number_t  *count) /* IN/OUT */
9853 {
9854         mach_msg_type_number_t  original_count;
9855         vm_region_extended_info_data_t  extended;
9856         vm_map_entry_t                  tmp_entry;
9857         vm_map_offset_t                 user_address;
9858         unsigned int                    user_max_depth;
9859
9860         /*
9861          * "curr_entry" is the VM map entry preceding or including the
9862          * address we're looking for.
9863          * "curr_map" is the map or sub-map containing "curr_entry".
9864          * "curr_address" is the equivalent of the top map's "user_address"
9865          * in the current map.
9866          * "curr_offset" is the cumulated offset of "curr_map" in the
9867          * target task's address space.
9868          * "curr_depth" is the depth of "curr_map" in the chain of
9869          * sub-maps.
9870          *
9871          * "curr_max_below" and "curr_max_above" limit the range (around
9872          * "curr_address") we should take into account in the current (sub)map.
9873          * They limit the range to what's visible through the map entries
9874          * we've traversed from the top map to the current map.
9875
9876          */
9877         vm_map_entry_t                  curr_entry;
9878         vm_map_address_t                curr_address;
9879         vm_map_offset_t                 curr_offset;
9880         vm_map_t                        curr_map;
9881         unsigned int                    curr_depth;
9882         vm_map_offset_t                 curr_max_below, curr_max_above;
9883         vm_map_offset_t                 curr_skip;
9884
9885         /*
9886          * "next_" is the same as "curr_" but for the VM region immediately
9887          * after the address we're looking for.  We need to keep track of this
9888          * too because we want to return info about that region if the
9889          * address we're looking for is not mapped.
9890          */
9891         vm_map_entry_t                  next_entry;
9892         vm_map_offset_t                 next_offset;
9893         vm_map_offset_t                 next_address;
9894         vm_map_t                        next_map;
9895         unsigned int                    next_depth;
9896         vm_map_offset_t                 next_max_below, next_max_above;
9897         vm_map_offset_t                 next_skip;
9898
9899         boolean_t                       look_for_pages;
9900         vm_region_submap_short_info_64_t short_info;
9901
9902         if (map == VM_MAP_NULL) {
9903                 /* no address space to work on */
9904                 return KERN_INVALID_ARGUMENT;
9905         }
9906
9907
9908         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9909                 /*
9910                  * "info" structure is not big enough and
9911                  * would overflow
9912                  */
9913                 return KERN_INVALID_ARGUMENT;
9914         }
9915
9916         original_count = *count;
9917
9918         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
9919                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9920                 look_for_pages = FALSE;
9921                 short_info = (vm_region_submap_short_info_64_t) submap_info;
9922                 submap_info = NULL;
9923         } else {
9924                 look_for_pages = TRUE;
9925                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
9926                 short_info = NULL;
9927
9928                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
9929                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
9930                 }
9931         }
9932
9933         user_address = *address;
9934         user_max_depth = *nesting_depth;
9935
9936         curr_entry = NULL;
9937         curr_map = map;
9938         curr_address = user_address;
9939         curr_offset = 0;
9940         curr_skip = 0;
9941         curr_depth = 0;
9942         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9943         curr_max_below = curr_address;
9944
9945         next_entry = NULL;
9946         next_map = NULL;
9947         next_address = 0;
9948         next_offset = 0;
9949         next_skip = 0;
9950         next_depth = 0;
9951         next_max_above = (vm_map_offset_t) -1;
9952         next_max_below = (vm_map_offset_t) -1;
9953
9954         if (not_in_kdp) {
9955                 vm_map_lock_read(curr_map);
9956         }
9957
9958         for (;;) {
9959                 if (vm_map_lookup_entry(curr_map,
9960                                         curr_address,
9961                                         &tmp_entry)) {
9962                         /* tmp_entry contains the address we're looking for */
9963                         curr_entry = tmp_entry;
9964                 } else {
9965                         vm_map_offset_t skip;
9966                         /*
9967                          * The address is not mapped.  "tmp_entry" is the
9968                          * map entry preceding the address.  We want the next
9969                          * one, if it exists.
9970                          */
9971                         curr_entry = tmp_entry->vme_next;
9972
9973                         if (curr_entry == vm_map_to_entry(curr_map) ||
9974                             (curr_entry->vme_start >=
9975                              curr_address + curr_max_above)) {
9976                                 /* no next entry at this level: stop looking */
9977                                 if (not_in_kdp) {
9978                                         vm_map_unlock_read(curr_map);
9979                                 }
9980                                 curr_entry = NULL;
9981                                 curr_map = NULL;
9982                                 curr_offset = 0;
9983                                 curr_depth = 0;
9984                                 curr_max_above = 0;
9985                                 curr_max_below = 0;
9986                                 break;
9987                         }
9988
9989                         /* adjust current address and offset */
9990                         skip = curr_entry->vme_start - curr_address;
9991                         curr_address = curr_entry->vme_start;
9992                         curr_skip = skip;
9993                         curr_offset += skip;
9994                         curr_max_above -= skip;
9995                         curr_max_below = 0;
9996                 }
9997
9998                 /*
9999                  * Is the next entry at this level closer to the address (or
10000                  * deeper in the submap chain) than the one we had
10001                  * so far ?
10002                  */
10003                 tmp_entry = curr_entry->vme_next;
10004                 if (tmp_entry == vm_map_to_entry(curr_map)) {
10005                         /* no next entry at this level */
10006                 } else if (tmp_entry->vme_start >=
10007                            curr_address + curr_max_above) {
10008                         /*
10009                          * tmp_entry is beyond the scope of what we mapped of
10010                          * this submap in the upper level: ignore it.
10011                          */
10012                 } else if ((next_entry == NULL) ||
10013                            (tmp_entry->vme_start + curr_offset <=
10014                             next_entry->vme_start + next_offset)) {
10015                         /*
10016                          * We didn't have a "next_entry" or this one is
10017                          * closer to the address we're looking for:
10018                          * use this "tmp_entry" as the new "next_entry".
10019                          */
10020                         if (next_entry != NULL) {
10021                                 /* unlock the last "next_map" */
10022                                 if (next_map != curr_map && not_in_kdp) {
10023                                         vm_map_unlock_read(next_map);
10024                                 }
10025                         }
10026                         next_entry = tmp_entry;
10027                         next_map = curr_map;
10028                         next_depth = curr_depth;
10029                         next_address = next_entry->vme_start;
10030                         next_skip = curr_skip;
10031                         next_offset = curr_offset;
10032                         next_offset += (next_address - curr_address);
10033                         next_max_above = MIN(next_max_above, curr_max_above);
10034                         next_max_above = MIN(next_max_above,
10035                                              next_entry->vme_end - next_address);
10036                         next_max_below = MIN(next_max_below, curr_max_below);
10037                         next_max_below = MIN(next_max_below,
10038                                              next_address - next_entry->vme_start);
10039                 }
10040
10041                 /*
10042                  * "curr_max_{above,below}" allow us to keep track of the
10043                  * portion of the submap that is actually mapped at this level:
10044                  * the rest of that submap is irrelevant to us, since it's not
10045                  * mapped here.
10046                  * The relevant portion of the map starts at
10047                  * "curr_entry->offset" up to the size of "curr_entry".
10048                  */
10049                 curr_max_above = MIN(curr_max_above,
10050                                      curr_entry->vme_end - curr_address);
10051                 curr_max_below = MIN(curr_max_below,
10052                                      curr_address - curr_entry->vme_start);
10053
10054                 if (!curr_entry->is_sub_map ||
10055                     curr_depth >= user_max_depth) {
10056                         /*
10057                          * We hit a leaf map or we reached the maximum depth
10058                          * we could, so stop looking.  Keep the current map
10059                          * locked.
10060                          */
10061                         break;
10062                 }
10063
10064                 /*
10065                  * Get down to the next submap level.
10066                  */
10067
10068                 /*
10069                  * Lock the next level and unlock the current level,
10070                  * unless we need to keep it locked to access the "next_entry"
10071                  * later.
10072                  */
10073                 if (not_in_kdp) {
10074                         vm_map_lock_read(curr_entry->object.sub_map);
10075                 }
10076                 if (curr_map == next_map) {
10077                         /* keep "next_map" locked in case we need it */
10078                 } else {
10079                         /* release this map */
10080                         if (not_in_kdp)
10081                                 vm_map_unlock_read(curr_map);
10082                 }
10083
10084                 /*
10085                  * Adjust the offset.  "curr_entry" maps the submap
10086                  * at relative address "curr_entry->vme_start" in the
10087                  * curr_map but skips the first "curr_entry->offset"
10088                  * bytes of the submap.
10089                  * "curr_offset" always represents the offset of a virtual
10090                  * address in the curr_map relative to the absolute address
10091                  * space (i.e. the top-level VM map).
10092                  */
10093                 curr_offset +=
10094                         (curr_entry->offset - curr_entry->vme_start);
10095                 curr_address = user_address + curr_offset;
10096                 /* switch to the submap */
10097                 curr_map = curr_entry->object.sub_map;
10098                 curr_depth++;
10099                 curr_entry = NULL;
10100         }
10101
10102         if (curr_entry == NULL) {
10103                 /* no VM region contains the address... */
10104                 if (next_entry == NULL) {
10105                         /* ... and no VM region follows it either */
10106                         return KERN_INVALID_ADDRESS;
10107                 }
10108                 /* ... gather info about the next VM region */
10109                 curr_entry = next_entry;
10110                 curr_map = next_map;    /* still locked ... */
10111                 curr_address = next_address;
10112                 curr_skip = next_skip;
10113                 curr_offset = next_offset;
10114                 curr_depth = next_depth;
10115                 curr_max_above = next_max_above;
10116                 curr_max_below = next_max_below;
10117                 if (curr_map == map) {
10118                         user_address = curr_address;
10119                 }
10120         } else {
10121                 /* we won't need "next_entry" after all */
10122                 if (next_entry != NULL) {
10123                         /* release "next_map" */
10124                         if (next_map != curr_map && not_in_kdp) {
10125                                 vm_map_unlock_read(next_map);
10126                         }
10127                 }
10128         }
10129         next_entry = NULL;
10130         next_map = NULL;
10131         next_offset = 0;
10132         next_skip = 0;
10133         next_depth = 0;
10134         next_max_below = -1;
10135         next_max_above = -1;
10136
10137         *nesting_depth = curr_depth;
10138         *size = curr_max_above + curr_max_below;
10139         *address = user_address + curr_skip - curr_max_below;
10140
10141 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
10142 // so probably should be a real 32b ID vs. ptr.
10143 // Current users just check for equality
10144 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
10145
10146         if (look_for_pages) {
10147                 submap_info->user_tag = curr_entry->alias;
10148                 submap_info->offset = curr_entry->offset;
10149                 submap_info->protection = curr_entry->protection;
10150                 submap_info->inheritance = curr_entry->inheritance;
10151                 submap_info->max_protection = curr_entry->max_protection;
10152                 submap_info->behavior = curr_entry->behavior;
10153                 submap_info->user_wired_count = curr_entry->user_wired_count;
10154                 submap_info->is_submap = curr_entry->is_sub_map;
10155                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10156         } else {
10157                 short_info->user_tag = curr_entry->alias;
10158                 short_info->offset = curr_entry->offset;
10159                 short_info->protection = curr_entry->protection;
10160                 short_info->inheritance = curr_entry->inheritance;
10161                 short_info->max_protection = curr_entry->max_protection;
10162                 short_info->behavior = curr_entry->behavior;
10163                 short_info->user_wired_count = curr_entry->user_wired_count;
10164                 short_info->is_submap = curr_entry->is_sub_map;
10165                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10166         }
10167
10168         extended.pages_resident = 0;
10169         extended.pages_swapped_out = 0;
10170         extended.pages_shared_now_private = 0;
10171         extended.pages_dirtied = 0;
10172         extended.pages_reusable = 0;
10173         extended.external_pager = 0;
10174         extended.shadow_depth = 0;
10175
10176         if (not_in_kdp) {
10177                 if (!curr_entry->is_sub_map) {
10178                         vm_map_offset_t range_start, range_end;
10179                         range_start = MAX((curr_address - curr_max_below),
10180                                           curr_entry->vme_start);
10181                         range_end = MIN((curr_address + curr_max_above),
10182                                         curr_entry->vme_end);
10183                         vm_map_region_walk(curr_map,
10184                                            range_start,
10185                                            curr_entry,
10186                                            (curr_entry->offset +
10187                                             (range_start -
10188                                              curr_entry->vme_start)),
10189                                            range_end - range_start,
10190                                            &extended,
10191                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
10192                         if (extended.external_pager &&
10193                             extended.ref_count == 2 &&
10194                             extended.share_mode == SM_SHARED) {
10195                                 extended.share_mode = SM_PRIVATE;
10196                         }
10197                 } else {
10198                         if (curr_entry->use_pmap) {
10199                                 extended.share_mode = SM_TRUESHARED;
10200                         } else {
10201                                 extended.share_mode = SM_PRIVATE;
10202                         }
10203                         extended.ref_count =
10204                                 curr_entry->object.sub_map->ref_count;
10205                 }
10206         }
10207
10208         if (look_for_pages) {
10209                 submap_info->pages_resident = extended.pages_resident;
10210                 submap_info->pages_swapped_out = extended.pages_swapped_out;
10211                 submap_info->pages_shared_now_private =
10212                         extended.pages_shared_now_private;
10213                 submap_info->pages_dirtied = extended.pages_dirtied;
10214                 submap_info->external_pager = extended.external_pager;
10215                 submap_info->shadow_depth = extended.shadow_depth;
10216                 submap_info->share_mode = extended.share_mode;
10217                 submap_info->ref_count = extended.ref_count;
10218
10219                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10220                         submap_info->pages_reusable = extended.pages_reusable;
10221                 }
10222         } else {
10223                 short_info->external_pager = extended.external_pager;
10224                 short_info->shadow_depth = extended.shadow_depth;
10225                 short_info->share_mode = extended.share_mode;
10226                 short_info->ref_count = extended.ref_count;
10227         }
10228
10229         if (not_in_kdp) {
10230                 vm_map_unlock_read(curr_map);
10231         }
10232
10233         return KERN_SUCCESS;
10234 }
10235
10236 /*
10237  *      vm_region:
10238  *
10239  *      User call to obtain information about a region in
10240  *      a task's address map. Currently, only one flavor is
10241  *      supported.
10242  *
10243  *      XXX The reserved and behavior fields cannot be filled
10244  *          in until the vm merge from the IK is completed, and
10245  *          vm_reserve is implemented.
10246  */
10247
10248 kern_return_t
10249 vm_map_region(
10250         vm_map_t                 map,
10251         vm_map_offset_t *address,               /* IN/OUT */
10252         vm_map_size_t           *size,                  /* OUT */
10253         vm_region_flavor_t       flavor,                /* IN */
10254         vm_region_info_t         info,                  /* OUT */
10255         mach_msg_type_number_t  *count, /* IN/OUT */
10256         mach_port_t             *object_name)           /* OUT */
10257 {
10258         vm_map_entry_t          tmp_entry;
10259         vm_map_entry_t          entry;
10260         vm_map_offset_t         start;
10261
10262         if (map == VM_MAP_NULL)
10263                 return(KERN_INVALID_ARGUMENT);
10264
10265         switch (flavor) {
10266
10267         case VM_REGION_BASIC_INFO:
10268                 /* legacy for old 32-bit objects info */
10269         {
10270                 vm_region_basic_info_t  basic;
10271
10272                 if (*count < VM_REGION_BASIC_INFO_COUNT)
10273                         return(KERN_INVALID_ARGUMENT);
10274
10275                 basic = (vm_region_basic_info_t) info;
10276                 *count = VM_REGION_BASIC_INFO_COUNT;
10277
10278                 vm_map_lock_read(map);
10279
10280                 start = *address;
10281                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10282                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10283                                 vm_map_unlock_read(map);
10284                                 return(KERN_INVALID_ADDRESS);
10285                         }
10286                 } else {
10287                         entry = tmp_entry;
10288                 }
10289
10290                 start = entry->vme_start;
10291
10292                 basic->offset = (uint32_t)entry->offset;
10293                 basic->protection = entry->protection;
10294                 basic->inheritance = entry->inheritance;
10295                 basic->max_protection = entry->max_protection;
10296                 basic->behavior = entry->behavior;
10297                 basic->user_wired_count = entry->user_wired_count;
10298                 basic->reserved = entry->is_sub_map;
10299                 *address = start;
10300                 *size = (entry->vme_end - start);
10301
10302                 if (object_name) *object_name = IP_NULL;
10303                 if (entry->is_sub_map) {
10304                         basic->shared = FALSE;
10305                 } else {
10306                         basic->shared = entry->is_shared;
10307                 }
10308
10309                 vm_map_unlock_read(map);
10310                 return(KERN_SUCCESS);
10311         }
10312
10313         case VM_REGION_BASIC_INFO_64:
10314         {
10315                 vm_region_basic_info_64_t       basic;
10316
10317                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
10318                         return(KERN_INVALID_ARGUMENT);
10319
10320                 basic = (vm_region_basic_info_64_t) info;
10321                 *count = VM_REGION_BASIC_INFO_COUNT_64;
10322
10323                 vm_map_lock_read(map);
10324
10325                 start = *address;
10326                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10327                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10328                                 vm_map_unlock_read(map);
10329                                 return(KERN_INVALID_ADDRESS);
10330                         }
10331                 } else {
10332                         entry = tmp_entry;
10333                 }
10334
10335                 start = entry->vme_start;
10336
10337                 basic->offset = entry->offset;
10338                 basic->protection = entry->protection;
10339                 basic->inheritance = entry->inheritance;
10340                 basic->max_protection = entry->max_protection;
10341                 basic->behavior = entry->behavior;
10342                 basic->user_wired_count = entry->user_wired_count;
10343                 basic->reserved = entry->is_sub_map;
10344                 *address = start;
10345                 *size = (entry->vme_end - start);
10346
10347                 if (object_name) *object_name = IP_NULL;
10348                 if (entry->is_sub_map) {
10349                         basic->shared = FALSE;
10350                 } else {
10351                         basic->shared = entry->is_shared;
10352                 }
10353
10354                 vm_map_unlock_read(map);
10355                 return(KERN_SUCCESS);
10356         }
10357         case VM_REGION_EXTENDED_INFO:
10358                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
10359                         return(KERN_INVALID_ARGUMENT);
10360                 /*fallthru*/
10361         case VM_REGION_EXTENDED_INFO__legacy:
10362                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
10363                         return KERN_INVALID_ARGUMENT;
10364
10365         {
10366                 vm_region_extended_info_t       extended;
10367                 mach_msg_type_number_t original_count;
10368
10369                 extended = (vm_region_extended_info_t) info;
10370
10371                 vm_map_lock_read(map);
10372
10373                 start = *address;
10374                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10375                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10376                                 vm_map_unlock_read(map);
10377                                 return(KERN_INVALID_ADDRESS);
10378                         }
10379                 } else {
10380                         entry = tmp_entry;
10381                 }
10382                 start = entry->vme_start;
10383
10384                 extended->protection = entry->protection;
10385                 extended->user_tag = entry->alias;
10386                 extended->pages_resident = 0;
10387                 extended->pages_swapped_out = 0;
10388                 extended->pages_shared_now_private = 0;
10389                 extended->pages_dirtied = 0;
10390                 extended->external_pager = 0;
10391                 extended->shadow_depth = 0;
10392
10393                 original_count = *count;
10394                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
10395                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
10396                 } else {
10397                         extended->pages_reusable = 0;
10398                         *count = VM_REGION_EXTENDED_INFO_COUNT;
10399                 }
10400
10401                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
10402
10403                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
10404                         extended->share_mode = SM_PRIVATE;
10405
10406                 if (object_name)
10407                         *object_name = IP_NULL;
10408                 *address = start;
10409                 *size = (entry->vme_end - start);
10410
10411                 vm_map_unlock_read(map);
10412                 return(KERN_SUCCESS);
10413         }
10414         case VM_REGION_TOP_INFO:
10415         {
10416                 vm_region_top_info_t    top;
10417
10418                 if (*count < VM_REGION_TOP_INFO_COUNT)
10419                         return(KERN_INVALID_ARGUMENT);
10420
10421                 top = (vm_region_top_info_t) info;
10422                 *count = VM_REGION_TOP_INFO_COUNT;
10423
10424                 vm_map_lock_read(map);
10425
10426                 start = *address;
10427                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10428                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10429                                 vm_map_unlock_read(map);
10430                                 return(KERN_INVALID_ADDRESS);
10431                         }
10432                 } else {
10433                         entry = tmp_entry;
10434
10435                 }
10436                 start = entry->vme_start;
10437
10438                 top->private_pages_resident = 0;
10439                 top->shared_pages_resident = 0;
10440
10441                 vm_map_region_top_walk(entry, top);
10442
10443                 if (object_name)
10444                         *object_name = IP_NULL;
10445                 *address = start;
10446                 *size = (entry->vme_end - start);
10447
10448                 vm_map_unlock_read(map);
10449                 return(KERN_SUCCESS);
10450         }
10451         default:
10452                 return(KERN_INVALID_ARGUMENT);
10453         }
10454 }
10455
10456 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
10457         MIN((entry_size),                                               \
10458             ((obj)->all_reusable ?                                      \
10459              (obj)->wired_page_count :                                  \
10460              (obj)->resident_page_count - (obj)->reusable_page_count))
10461
10462 void
10463 vm_map_region_top_walk(
10464         vm_map_entry_t             entry,
10465         vm_region_top_info_t       top)
10466 {
10467
10468         if (entry->object.vm_object == 0 || entry->is_sub_map) {
10469                 top->share_mode = SM_EMPTY;
10470                 top->ref_count = 0;
10471                 top->obj_id = 0;
10472                 return;
10473         }
10474
10475         {
10476                 struct  vm_object *obj, *tmp_obj;
10477                 int             ref_count;
10478                 uint32_t        entry_size;
10479
10480                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
10481
10482                 obj = entry->object.vm_object;
10483
10484                 vm_object_lock(obj);
10485
10486                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10487                         ref_count--;
10488
10489                 assert(obj->reusable_page_count <= obj->resident_page_count);
10490                 if (obj->shadow) {
10491                         if (ref_count == 1)
10492                                 top->private_pages_resident =
10493                                         OBJ_RESIDENT_COUNT(obj, entry_size);
10494                         else
10495                                 top->shared_pages_resident =
10496                                         OBJ_RESIDENT_COUNT(obj, entry_size);
10497                         top->ref_count  = ref_count;
10498                         top->share_mode = SM_COW;
10499
10500                         while ((tmp_obj = obj->shadow)) {
10501                                 vm_object_lock(tmp_obj);
10502                                 vm_object_unlock(obj);
10503                                 obj = tmp_obj;
10504
10505                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10506                                         ref_count--;
10507
10508                                 assert(obj->reusable_page_count <= obj->resident_page_count);
10509                                 top->shared_pages_resident +=
10510                                         OBJ_RESIDENT_COUNT(obj, entry_size);
10511                                 top->ref_count += ref_count - 1;
10512                         }
10513                 } else {
10514                         if (entry->superpage_size) {
10515                                 top->share_mode = SM_LARGE_PAGE;
10516                                 top->shared_pages_resident = 0;
10517                                 top->private_pages_resident = entry_size;
10518                         } else if (entry->needs_copy) {
10519                                 top->share_mode = SM_COW;
10520                                 top->shared_pages_resident =
10521                                         OBJ_RESIDENT_COUNT(obj, entry_size);
10522                         } else {
10523                                 if (ref_count == 1 ||
10524                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
10525                                         top->share_mode = SM_PRIVATE;
10526                                                 top->private_pages_resident =
10527                                                         OBJ_RESIDENT_COUNT(obj,
10528                                                                            entry_size);
10529                                 } else {
10530                                         top->share_mode = SM_SHARED;
10531                                         top->shared_pages_resident =
10532                                                 OBJ_RESIDENT_COUNT(obj,
10533                                                                   entry_size);
10534                                 }
10535                         }
10536                         top->ref_count = ref_count;
10537                 }
10538                 /* XXX K64: obj_id will be truncated */
10539                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
10540
10541                 vm_object_unlock(obj);
10542         }
10543 }
10544
10545 void
10546 vm_map_region_walk(
10547         vm_map_t                        map,
10548         vm_map_offset_t                 va,
10549         vm_map_entry_t                  entry,
10550         vm_object_offset_t              offset,
10551         vm_object_size_t                range,
10552         vm_region_extended_info_t       extended,
10553         boolean_t                       look_for_pages,
10554         mach_msg_type_number_t count)
10555 {
10556         register struct vm_object *obj, *tmp_obj;
10557         register vm_map_offset_t       last_offset;
10558         register int               i;
10559         register int               ref_count;
10560         struct vm_object        *shadow_object;
10561         int                     shadow_depth;
10562
10563         if ((entry->object.vm_object == 0) ||
10564             (entry->is_sub_map) ||
10565             (entry->object.vm_object->phys_contiguous &&
10566              !entry->superpage_size)) {
10567                 extended->share_mode = SM_EMPTY;
10568                 extended->ref_count = 0;
10569                 return;
10570         }
10571
10572         if (entry->superpage_size) {
10573                 extended->shadow_depth = 0;
10574                 extended->share_mode = SM_LARGE_PAGE;
10575                 extended->ref_count = 1;
10576                 extended->external_pager = 0;
10577                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
10578                 extended->shadow_depth = 0;
10579                 return;
10580         }
10581
10582         {
10583                 obj = entry->object.vm_object;
10584
10585                 vm_object_lock(obj);
10586
10587                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10588                         ref_count--;
10589
10590                 if (look_for_pages) {
10591                         for (last_offset = offset + range;
10592                              offset < last_offset;
10593                              offset += PAGE_SIZE_64, va += PAGE_SIZE) {
10594                                         vm_map_region_look_for_page(map, va, obj,
10595                                                                     offset, ref_count,
10596                                                                     0, extended, count);
10597                         }
10598                 } else {
10599                         shadow_object = obj->shadow;
10600                         shadow_depth = 0;
10601
10602                         if ( !(obj->pager_trusted) && !(obj->internal))
10603                                 extended->external_pager = 1;
10604
10605                         if (shadow_object != VM_OBJECT_NULL) {
10606                                 vm_object_lock(shadow_object);
10607                                 for (;
10608                                      shadow_object != VM_OBJECT_NULL;
10609                                      shadow_depth++) {
10610                                         vm_object_t     next_shadow;
10611
10612                                         if ( !(shadow_object->pager_trusted) &&
10613                                              !(shadow_object->internal))
10614                                                 extended->external_pager = 1;
10615
10616                                         next_shadow = shadow_object->shadow;
10617                                         if (next_shadow) {
10618                                                 vm_object_lock(next_shadow);
10619                                         }
10620                                         vm_object_unlock(shadow_object);
10621                                         shadow_object = next_shadow;
10622                                 }
10623                         }
10624                         extended->shadow_depth = shadow_depth;
10625                 }
10626
10627                 if (extended->shadow_depth || entry->needs_copy)
10628                         extended->share_mode = SM_COW;
10629                 else {
10630                         if (ref_count == 1)
10631                                 extended->share_mode = SM_PRIVATE;
10632                         else {
10633                                 if (obj->true_share)
10634                                         extended->share_mode = SM_TRUESHARED;
10635                                 else
10636                                         extended->share_mode = SM_SHARED;
10637                         }
10638                 }
10639                 extended->ref_count = ref_count - extended->shadow_depth;
10640
10641                 for (i = 0; i < extended->shadow_depth; i++) {
10642                         if ((tmp_obj = obj->shadow) == 0)
10643                                 break;
10644                         vm_object_lock(tmp_obj);
10645                         vm_object_unlock(obj);
10646
10647                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
10648                                 ref_count--;
10649
10650                         extended->ref_count += ref_count;
10651                         obj = tmp_obj;
10652                 }
10653                 vm_object_unlock(obj);
10654
10655                 if (extended->share_mode == SM_SHARED) {
10656                         register vm_map_entry_t      cur;
10657                         register vm_map_entry_t      last;
10658                         int      my_refs;
10659
10660                         obj = entry->object.vm_object;
10661                         last = vm_map_to_entry(map);
10662                         my_refs = 0;
10663
10664                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10665                                 ref_count--;
10666                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
10667                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
10668
10669                         if (my_refs == ref_count)
10670                                 extended->share_mode = SM_PRIVATE_ALIASED;
10671                         else if (my_refs > 1)
10672                                 extended->share_mode = SM_SHARED_ALIASED;
10673                 }
10674         }
10675 }
10676
10677
10678 /* object is locked on entry and locked on return */
10679
10680
10681 static void
10682 vm_map_region_look_for_page(
10683         __unused vm_map_t               map,
10684         __unused vm_map_offset_t        va,
10685         vm_object_t                     object,
10686         vm_object_offset_t              offset,
10687         int                             max_refcnt,
10688         int                             depth,
10689         vm_region_extended_info_t       extended,
10690         mach_msg_type_number_t count)
10691 {
10692         register vm_page_t      p;
10693         register vm_object_t    shadow;
10694         register int            ref_count;
10695         vm_object_t             caller_object;
10696         kern_return_t           kr;
10697         shadow = object->shadow;
10698         caller_object = object;
10699
10700
10701         while (TRUE) {
10702
10703                 if ( !(object->pager_trusted) && !(object->internal))
10704                         extended->external_pager = 1;
10705
10706                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
10707                         if (shadow && (max_refcnt == 1))
10708                                 extended->pages_shared_now_private++;
10709
10710                         if (!p->fictitious &&
10711                             (p->dirty || pmap_is_modified(p->phys_page)))
10712                                 extended->pages_dirtied++;
10713                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
10714                                 if (p->reusable || p->object->all_reusable) {
10715                                         extended->pages_reusable++;
10716                                 }
10717                         }
10718
10719                         extended->pages_resident++;
10720
10721                         if(object != caller_object)
10722                                 vm_object_unlock(object);
10723
10724                         return;
10725                 }
10726 #if     MACH_PAGEMAP
10727                 if (object->existence_map) {
10728                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
10729
10730                                 extended->pages_swapped_out++;
10731
10732                                 if(object != caller_object)
10733                                         vm_object_unlock(object);
10734
10735                                 return;
10736                         }
10737                 } else
10738 #endif /* MACH_PAGEMAP */
10739                 if (object->internal &&
10740                     object->alive &&
10741                     !object->terminating &&
10742                     object->pager_ready) {
10743
10744                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
10745                                 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
10746                                                                   offset)
10747                                     == VM_EXTERNAL_STATE_EXISTS) {
10748                                         /* the pager has that page */
10749                                         extended->pages_swapped_out++;
10750                                         if (object != caller_object)
10751                                                 vm_object_unlock(object);
10752                                         return;
10753                                 }
10754                         } else {
10755                                 memory_object_t pager;
10756
10757                                 vm_object_paging_begin(object);
10758                                 pager = object->pager;
10759                                 vm_object_unlock(object);
10760
10761                                 kr = memory_object_data_request(
10762                                         pager,
10763                                         offset + object->paging_offset,
10764                                         0, /* just poke the pager */
10765                                         VM_PROT_READ,
10766                                         NULL);
10767
10768                                 vm_object_lock(object);
10769                                 vm_object_paging_end(object);
10770
10771                                 if (kr == KERN_SUCCESS) {
10772                                         /* the pager has that page */
10773                                         extended->pages_swapped_out++;
10774                                         if (object != caller_object)
10775                                                 vm_object_unlock(object);
10776                                         return;
10777                                 }
10778                         }
10779                 }
10780
10781                 if (shadow) {
10782                         vm_object_lock(shadow);
10783
10784                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
10785                                 ref_count--;
10786
10787                         if (++depth > extended->shadow_depth)
10788                                 extended->shadow_depth = depth;
10789
10790                         if (ref_count > max_refcnt)
10791                                 max_refcnt = ref_count;
10792
10793                         if(object != caller_object)
10794                                 vm_object_unlock(object);
10795
10796                         offset = offset + object->vo_shadow_offset;
10797                         object = shadow;
10798                         shadow = object->shadow;
10799                         continue;
10800                 }
10801                 if(object != caller_object)
10802                         vm_object_unlock(object);
10803                 break;
10804         }
10805 }
10806
10807 static int
10808 vm_map_region_count_obj_refs(
10809         vm_map_entry_t    entry,
10810         vm_object_t       object)
10811 {
10812         register int ref_count;
10813         register vm_object_t chk_obj;
10814         register vm_object_t tmp_obj;
10815
10816         if (entry->object.vm_object == 0)
10817                 return(0);
10818
10819         if (entry->is_sub_map)
10820                 return(0);
10821         else {
10822                 ref_count = 0;
10823
10824                 chk_obj = entry->object.vm_object;
10825                 vm_object_lock(chk_obj);
10826
10827                 while (chk_obj) {
10828                         if (chk_obj == object)
10829                                 ref_count++;
10830                         tmp_obj = chk_obj->shadow;
10831                         if (tmp_obj)
10832                                 vm_object_lock(tmp_obj);
10833                         vm_object_unlock(chk_obj);
10834
10835                         chk_obj = tmp_obj;
10836                 }
10837         }
10838         return(ref_count);
10839 }
10840
10841
10842 /*
10843  *      Routine:        vm_map_simplify
10844  *
10845  *      Description:
10846  *              Attempt to simplify the map representation in
10847  *              the vicinity of the given starting address.
10848  *      Note:
10849  *              This routine is intended primarily to keep the
10850  *              kernel maps more compact -- they generally don't
10851  *              benefit from the "expand a map entry" technology
10852  *              at allocation time because the adjacent entry
10853  *              is often wired down.
10854  */
10855 void
10856 vm_map_simplify_entry(
10857         vm_map_t        map,
10858         vm_map_entry_t  this_entry)
10859 {
10860         vm_map_entry_t  prev_entry;
10861
10862         counter(c_vm_map_simplify_entry_called++);
10863
10864         prev_entry = this_entry->vme_prev;
10865
10866         if ((this_entry != vm_map_to_entry(map)) &&
10867             (prev_entry != vm_map_to_entry(map)) &&
10868
10869             (prev_entry->vme_end == this_entry->vme_start) &&
10870
10871             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10872
10873             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10874             ((prev_entry->offset + (prev_entry->vme_end -
10875                                     prev_entry->vme_start))
10876              == this_entry->offset) &&
10877
10878             (prev_entry->map_aligned == this_entry->map_aligned) &&
10879             (prev_entry->inheritance == this_entry->inheritance) &&
10880             (prev_entry->protection == this_entry->protection) &&
10881             (prev_entry->max_protection == this_entry->max_protection) &&
10882             (prev_entry->behavior == this_entry->behavior) &&
10883             (prev_entry->alias == this_entry->alias) &&
10884             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10885             (prev_entry->no_cache == this_entry->no_cache) &&
10886             (prev_entry->wired_count == this_entry->wired_count) &&
10887             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10888
10889             (prev_entry->needs_copy == this_entry->needs_copy) &&
10890             (prev_entry->permanent == this_entry->permanent) &&
10891
10892             (prev_entry->use_pmap == FALSE) &&
10893             (this_entry->use_pmap == FALSE) &&
10894             (prev_entry->in_transition == FALSE) &&
10895             (this_entry->in_transition == FALSE) &&
10896             (prev_entry->needs_wakeup == FALSE) &&
10897             (this_entry->needs_wakeup == FALSE) &&
10898             (prev_entry->is_shared == FALSE) &&
10899             (this_entry->is_shared == FALSE)
10900                 ) {
10901                 vm_map_store_entry_unlink(map, prev_entry);
10902                 assert(prev_entry->vme_start < this_entry->vme_end);
10903                 if (prev_entry->map_aligned)
10904                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
10905                                                    VM_MAP_PAGE_MASK(map)));
10906                 this_entry->vme_start = prev_entry->vme_start;
10907                 this_entry->offset = prev_entry->offset;
10908                 if (prev_entry->is_sub_map) {
10909                         vm_map_deallocate(prev_entry->object.sub_map);
10910                 } else {
10911                         vm_object_deallocate(prev_entry->object.vm_object);
10912                 }
10913                 vm_map_entry_dispose(map, prev_entry);
10914                 SAVE_HINT_MAP_WRITE(map, this_entry);
10915                 counter(c_vm_map_simplified++);
10916         }
10917 }
10918
10919 void
10920 vm_map_simplify(
10921         vm_map_t        map,
10922         vm_map_offset_t start)
10923 {
10924         vm_map_entry_t  this_entry;
10925
10926         vm_map_lock(map);
10927         if (vm_map_lookup_entry(map, start, &this_entry)) {
10928                 vm_map_simplify_entry(map, this_entry);
10929                 vm_map_simplify_entry(map, this_entry->vme_next);
10930         }
10931         counter(c_vm_map_simplify_called++);
10932         vm_map_unlock(map);
10933 }
10934
10935 static void
10936 vm_map_simplify_range(
10937         vm_map_t        map,
10938         vm_map_offset_t start,
10939         vm_map_offset_t end)
10940 {
10941         vm_map_entry_t  entry;
10942
10943         /*
10944          * The map should be locked (for "write") by the caller.
10945          */
10946
10947         if (start >= end) {
10948                 /* invalid address range */
10949                 return;
10950         }
10951
10952         start = vm_map_trunc_page(start,
10953                                   VM_MAP_PAGE_MASK(map));
10954         end = vm_map_round_page(end,
10955                                 VM_MAP_PAGE_MASK(map));
10956
10957         if (!vm_map_lookup_entry(map, start, &entry)) {
10958                 /* "start" is not mapped and "entry" ends before "start" */
10959                 if (entry == vm_map_to_entry(map)) {
10960                         /* start with first entry in the map */
10961                         entry = vm_map_first_entry(map);
10962                 } else {
10963                         /* start with next entry */
10964                         entry = entry->vme_next;
10965                 }
10966         }
10967
10968         while (entry != vm_map_to_entry(map) &&
10969                entry->vme_start <= end) {
10970                 /* try and coalesce "entry" with its previous entry */
10971                 vm_map_simplify_entry(map, entry);
10972                 entry = entry->vme_next;
10973         }
10974 }
10975
10976
10977 /*
10978  *      Routine:        vm_map_machine_attribute
10979  *      Purpose:
10980  *              Provide machine-specific attributes to mappings,
10981  *              such as cachability etc. for machines that provide
10982  *              them.  NUMA architectures and machines with big/strange
10983  *              caches will use this.
10984  *      Note:
10985  *              Responsibilities for locking and checking are handled here,
10986  *              everything else in the pmap module. If any non-volatile
10987  *              information must be kept, the pmap module should handle
10988  *              it itself. [This assumes that attributes do not
10989  *              need to be inherited, which seems ok to me]
10990  */
10991 kern_return_t
10992 vm_map_machine_attribute(
10993         vm_map_t                        map,
10994         vm_map_offset_t         start,
10995         vm_map_offset_t         end,
10996         vm_machine_attribute_t  attribute,
10997         vm_machine_attribute_val_t* value)              /* IN/OUT */
10998 {
10999         kern_return_t   ret;
11000         vm_map_size_t sync_size;
11001         vm_map_entry_t entry;
11002
11003         if (start < vm_map_min(map) || end > vm_map_max(map))
11004                 return KERN_INVALID_ADDRESS;
11005
11006         /* Figure how much memory we need to flush (in page increments) */
11007         sync_size = end - start;
11008
11009         vm_map_lock(map);
11010
11011         if (attribute != MATTR_CACHE) {
11012                 /* If we don't have to find physical addresses, we */
11013                 /* don't have to do an explicit traversal here.    */
11014                 ret = pmap_attribute(map->pmap, start, end-start,
11015                                      attribute, value);
11016                 vm_map_unlock(map);
11017                 return ret;
11018         }
11019
11020         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
11021
11022         while(sync_size) {
11023                 if (vm_map_lookup_entry(map, start, &entry)) {
11024                         vm_map_size_t   sub_size;
11025                         if((entry->vme_end - start) > sync_size) {
11026                                 sub_size = sync_size;
11027                                 sync_size = 0;
11028                         } else {
11029                                 sub_size = entry->vme_end - start;
11030                                 sync_size -= sub_size;
11031                         }
11032                         if(entry->is_sub_map) {
11033                                 vm_map_offset_t sub_start;
11034                                 vm_map_offset_t sub_end;
11035
11036                                 sub_start = (start - entry->vme_start)
11037                                         + entry->offset;
11038                                 sub_end = sub_start + sub_size;
11039                                 vm_map_machine_attribute(
11040                                         entry->object.sub_map,
11041                                         sub_start,
11042                                         sub_end,
11043                                         attribute, value);
11044                         } else {
11045                                 if(entry->object.vm_object) {
11046                                         vm_page_t               m;
11047                                         vm_object_t             object;
11048                                         vm_object_t             base_object;
11049                                         vm_object_t             last_object;
11050                                         vm_object_offset_t      offset;
11051                                         vm_object_offset_t      base_offset;
11052                                         vm_map_size_t           range;
11053                                         range = sub_size;
11054                                         offset = (start - entry->vme_start)
11055                                                 + entry->offset;
11056                                         base_offset = offset;
11057                                         object = entry->object.vm_object;
11058                                         base_object = object;
11059                                         last_object = NULL;
11060
11061                                         vm_object_lock(object);
11062
11063                                         while (range) {
11064                                                 m = vm_page_lookup(
11065                                                         object, offset);
11066
11067                                                 if (m && !m->fictitious) {
11068                                                         ret =
11069                                                                 pmap_attribute_cache_sync(
11070                                                                         m->phys_page,
11071                                                                         PAGE_SIZE,
11072                                                                         attribute, value);
11073
11074                                                 } else if (object->shadow) {
11075                                                         offset = offset + object->vo_shadow_offset;
11076                                                         last_object = object;
11077                                                         object = object->shadow;
11078                                                         vm_object_lock(last_object->shadow);
11079                                                         vm_object_unlock(last_object);
11080                                                         continue;
11081                                                 }
11082                                                 range -= PAGE_SIZE;
11083
11084                                                 if (base_object != object) {
11085                                                         vm_object_unlock(object);
11086                                                         vm_object_lock(base_object);
11087                                                         object = base_object;
11088                                                 }
11089                                                 /* Bump to the next page */
11090                                                 base_offset += PAGE_SIZE;
11091                                                 offset = base_offset;
11092                                         }
11093                                         vm_object_unlock(object);
11094                                 }
11095                         }
11096                         start += sub_size;
11097                 } else {
11098                         vm_map_unlock(map);
11099                         return KERN_FAILURE;
11100                 }
11101
11102         }
11103
11104         vm_map_unlock(map);
11105
11106         return ret;
11107 }
11108
11109 /*
11110  *      vm_map_behavior_set:
11111  *
11112  *      Sets the paging reference behavior of the specified address
11113  *      range in the target map.  Paging reference behavior affects
11114  *      how pagein operations resulting from faults on the map will be
11115  *      clustered.
11116  */
11117 kern_return_t
11118 vm_map_behavior_set(
11119         vm_map_t        map,
11120         vm_map_offset_t start,
11121         vm_map_offset_t end,
11122         vm_behavior_t   new_behavior)
11123 {
11124         register vm_map_entry_t entry;
11125         vm_map_entry_t  temp_entry;
11126
11127         XPR(XPR_VM_MAP,
11128             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
11129             map, start, end, new_behavior, 0);
11130
11131         if (start > end ||
11132             start < vm_map_min(map) ||
11133             end > vm_map_max(map)) {
11134                 return KERN_NO_SPACE;
11135         }
11136
11137         switch (new_behavior) {
11138
11139         /*
11140          * This first block of behaviors all set a persistent state on the specified
11141          * memory range.  All we have to do here is to record the desired behavior
11142          * in the vm_map_entry_t's.
11143          */
11144
11145         case VM_BEHAVIOR_DEFAULT:
11146         case VM_BEHAVIOR_RANDOM:
11147         case VM_BEHAVIOR_SEQUENTIAL:
11148         case VM_BEHAVIOR_RSEQNTL:
11149         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
11150                 vm_map_lock(map);
11151
11152                 /*
11153                  *      The entire address range must be valid for the map.
11154                  *      Note that vm_map_range_check() does a
11155                  *      vm_map_lookup_entry() internally and returns the
11156                  *      entry containing the start of the address range if
11157                  *      the entire range is valid.
11158                  */
11159                 if (vm_map_range_check(map, start, end, &temp_entry)) {
11160                         entry = temp_entry;
11161                         vm_map_clip_start(map, entry, start);
11162                 }
11163                 else {
11164                         vm_map_unlock(map);
11165                         return(KERN_INVALID_ADDRESS);
11166                 }
11167
11168                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
11169                         vm_map_clip_end(map, entry, end);
11170                         assert(!entry->use_pmap);
11171
11172                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
11173                                 entry->zero_wired_pages = TRUE;
11174                         } else {
11175                                 entry->behavior = new_behavior;
11176                         }
11177                         entry = entry->vme_next;
11178                 }
11179
11180                 vm_map_unlock(map);
11181                 break;
11182
11183         /*
11184          * The rest of these are different from the above in that they cause
11185          * an immediate action to take place as opposed to setting a behavior that
11186          * affects future actions.
11187          */
11188
11189         case VM_BEHAVIOR_WILLNEED:
11190                 return vm_map_willneed(map, start, end);
11191
11192         case VM_BEHAVIOR_DONTNEED:
11193                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
11194
11195         case VM_BEHAVIOR_FREE:
11196                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
11197
11198         case VM_BEHAVIOR_REUSABLE:
11199                 return vm_map_reusable_pages(map, start, end);
11200
11201         case VM_BEHAVIOR_REUSE:
11202                 return vm_map_reuse_pages(map, start, end);
11203
11204         case VM_BEHAVIOR_CAN_REUSE:
11205                 return vm_map_can_reuse(map, start, end);
11206
11207         default:
11208                 return(KERN_INVALID_ARGUMENT);
11209         }
11210
11211         return(KERN_SUCCESS);
11212 }
11213
11214
11215 /*
11216  * Internals for madvise(MADV_WILLNEED) system call.
11217  *
11218  * The present implementation is to do a read-ahead if the mapping corresponds
11219  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
11220  * and basically ignore the "advice" (which we are always free to do).
11221  */
11222
11223
11224 static kern_return_t
11225 vm_map_willneed(
11226         vm_map_t        map,
11227         vm_map_offset_t start,
11228         vm_map_offset_t end
11229 )
11230 {
11231         vm_map_entry_t                  entry;
11232         vm_object_t                     object;
11233         memory_object_t                 pager;
11234         struct vm_object_fault_info     fault_info;
11235         kern_return_t                   kr;
11236         vm_object_size_t                len;
11237         vm_object_offset_t              offset;
11238
11239         /*
11240          * Fill in static values in fault_info.  Several fields get ignored by the code
11241          * we call, but we'll fill them in anyway since uninitialized fields are bad
11242          * when it comes to future backwards compatibility.
11243          */
11244
11245         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
11246         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
11247         fault_info.no_cache      = FALSE;                       /* ignored value */
11248         fault_info.stealth       = TRUE;
11249         fault_info.io_sync = FALSE;
11250         fault_info.cs_bypass = FALSE;
11251         fault_info.mark_zf_absent = FALSE;
11252         fault_info.batch_pmap_op = FALSE;
11253
11254         /*
11255          * The MADV_WILLNEED operation doesn't require any changes to the
11256          * vm_map_entry_t's, so the read lock is sufficient.
11257          */
11258
11259         vm_map_lock_read(map);
11260
11261         /*
11262          * The madvise semantics require that the address range be fully
11263          * allocated with no holes.  Otherwise, we're required to return
11264          * an error.
11265          */
11266
11267         if (! vm_map_range_check(map, start, end, &entry)) {
11268                 vm_map_unlock_read(map);
11269                 return KERN_INVALID_ADDRESS;
11270         }
11271
11272         /*
11273          * Examine each vm_map_entry_t in the range.
11274          */
11275         for (; entry != vm_map_to_entry(map) && start < end; ) {
11276
11277                 /*
11278                  * The first time through, the start address could be anywhere
11279                  * within the vm_map_entry we found.  So adjust the offset to
11280                  * correspond.  After that, the offset will always be zero to
11281                  * correspond to the beginning of the current vm_map_entry.
11282                  */
11283                 offset = (start - entry->vme_start) + entry->offset;
11284
11285                 /*
11286                  * Set the length so we don't go beyond the end of the
11287                  * map_entry or beyond the end of the range we were given.
11288                  * This range could span also multiple map entries all of which
11289                  * map different files, so make sure we only do the right amount
11290                  * of I/O for each object.  Note that it's possible for there
11291                  * to be multiple map entries all referring to the same object
11292                  * but with different page permissions, but it's not worth
11293                  * trying to optimize that case.
11294                  */
11295                 len = MIN(entry->vme_end - start, end - start);
11296
11297                 if ((vm_size_t) len != len) {
11298                         /* 32-bit overflow */
11299                         len = (vm_size_t) (0 - PAGE_SIZE);
11300                 }
11301                 fault_info.cluster_size = (vm_size_t) len;
11302                 fault_info.lo_offset    = offset;
11303                 fault_info.hi_offset    = offset + len;
11304                 fault_info.user_tag     = entry->alias;
11305
11306                 /*
11307                  * If there's no read permission to this mapping, then just
11308                  * skip it.
11309                  */
11310                 if ((entry->protection & VM_PROT_READ) == 0) {
11311                         entry = entry->vme_next;
11312                         start = entry->vme_start;
11313                         continue;
11314                 }
11315
11316                 /*
11317                  * Find the file object backing this map entry.  If there is
11318                  * none, then we simply ignore the "will need" advice for this
11319                  * entry and go on to the next one.
11320                  */
11321                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
11322                         entry = entry->vme_next;
11323                         start = entry->vme_start;
11324                         continue;
11325                 }
11326
11327                 /*
11328                  * The data_request() could take a long time, so let's
11329                  * release the map lock to avoid blocking other threads.
11330                  */
11331                 vm_map_unlock_read(map);
11332
11333                 vm_object_paging_begin(object);
11334                 pager = object->pager;
11335                 vm_object_unlock(object);
11336
11337                 /*
11338                  * Get the data from the object asynchronously.
11339                  *
11340                  * Note that memory_object_data_request() places limits on the
11341                  * amount of I/O it will do.  Regardless of the len we
11342                  * specified, it won't do more than MAX_UPL_TRANSFER and it
11343                  * silently truncates the len to that size.  This isn't
11344                  * necessarily bad since madvise shouldn't really be used to
11345                  * page in unlimited amounts of data.  Other Unix variants
11346                  * limit the willneed case as well.  If this turns out to be an
11347                  * issue for developers, then we can always adjust the policy
11348                  * here and still be backwards compatible since this is all
11349                  * just "advice".
11350                  */
11351                 kr = memory_object_data_request(
11352                         pager,
11353                         offset + object->paging_offset,
11354                         0,      /* ignored */
11355                         VM_PROT_READ,
11356                         (memory_object_fault_info_t)&fault_info);
11357
11358                 vm_object_lock(object);
11359                 vm_object_paging_end(object);
11360                 vm_object_unlock(object);
11361
11362                 /*
11363                  * If we couldn't do the I/O for some reason, just give up on
11364                  * the madvise.  We still return success to the user since
11365                  * madvise isn't supposed to fail when the advice can't be
11366                  * taken.
11367                  */
11368                 if (kr != KERN_SUCCESS) {
11369                         return KERN_SUCCESS;
11370                 }
11371
11372                 start += len;
11373                 if (start >= end) {
11374                         /* done */
11375                         return KERN_SUCCESS;
11376                 }
11377
11378                 /* look up next entry */
11379                 vm_map_lock_read(map);
11380                 if (! vm_map_lookup_entry(map, start, &entry)) {
11381                         /*
11382                          * There's a new hole in the address range.
11383                          */
11384                         vm_map_unlock_read(map);
11385                         return KERN_INVALID_ADDRESS;
11386                 }
11387         }
11388
11389         vm_map_unlock_read(map);
11390         return KERN_SUCCESS;
11391 }
11392
11393 static boolean_t
11394 vm_map_entry_is_reusable(
11395         vm_map_entry_t entry)
11396 {
11397         vm_object_t object;
11398
11399         switch (entry->alias) {
11400         case VM_MEMORY_MALLOC:
11401         case VM_MEMORY_MALLOC_SMALL:
11402         case VM_MEMORY_MALLOC_LARGE:
11403         case VM_MEMORY_REALLOC:
11404         case VM_MEMORY_MALLOC_TINY:
11405         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
11406         case VM_MEMORY_MALLOC_LARGE_REUSED:
11407                 /*
11408                  * This is a malloc() memory region: check if it's still
11409                  * in its original state and can be re-used for more
11410                  * malloc() allocations.
11411                  */
11412                 break;
11413         default:
11414                 /*
11415                  * Not a malloc() memory region: let the caller decide if
11416                  * it's re-usable.
11417                  */
11418                 return TRUE;
11419         }
11420
11421         if (entry->is_shared ||
11422             entry->is_sub_map ||
11423             entry->in_transition ||
11424             entry->protection != VM_PROT_DEFAULT ||
11425             entry->max_protection != VM_PROT_ALL ||
11426             entry->inheritance != VM_INHERIT_DEFAULT ||
11427             entry->no_cache ||
11428             entry->permanent ||
11429             entry->superpage_size != FALSE ||
11430             entry->zero_wired_pages ||
11431             entry->wired_count != 0 ||
11432             entry->user_wired_count != 0) {
11433                 return FALSE;
11434         }
11435
11436         object = entry->object.vm_object;
11437         if (object == VM_OBJECT_NULL) {
11438                 return TRUE;
11439         }
11440         if (
11441 #if 0
11442                 /*
11443                  * Let's proceed even if the VM object is potentially
11444                  * shared.
11445                  * We check for this later when processing the actual
11446                  * VM pages, so the contents will be safe if shared.
11447                  *
11448                  * But we can still mark this memory region as "reusable" to
11449                  * acknowledge that the caller did let us know that the memory
11450                  * could be re-used and should not be penalized for holding
11451                  * on to it.  This allows its "resident size" to not include
11452                  * the reusable range.
11453                  */
11454             object->ref_count == 1 &&
11455 #endif
11456             object->wired_page_count == 0 &&
11457             object->copy == VM_OBJECT_NULL &&
11458             object->shadow == VM_OBJECT_NULL &&
11459             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11460             object->internal &&
11461             !object->true_share &&
11462             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
11463             !object->code_signed) {
11464                 return TRUE;
11465         }
11466         return FALSE;
11467
11468
11469 }
11470
11471 static kern_return_t
11472 vm_map_reuse_pages(
11473         vm_map_t        map,
11474         vm_map_offset_t start,
11475         vm_map_offset_t end)
11476 {
11477         vm_map_entry_t                  entry;
11478         vm_object_t                     object;
11479         vm_object_offset_t              start_offset, end_offset;
11480
11481         /*
11482          * The MADV_REUSE operation doesn't require any changes to the
11483          * vm_map_entry_t's, so the read lock is sufficient.
11484          */
11485
11486         vm_map_lock_read(map);
11487
11488         /*
11489          * The madvise semantics require that the address range be fully
11490          * allocated with no holes.  Otherwise, we're required to return
11491          * an error.
11492          */
11493
11494         if (!vm_map_range_check(map, start, end, &entry)) {
11495                 vm_map_unlock_read(map);
11496                 vm_page_stats_reusable.reuse_pages_failure++;
11497                 return KERN_INVALID_ADDRESS;
11498         }
11499
11500         /*
11501          * Examine each vm_map_entry_t in the range.
11502          */
11503         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11504              entry = entry->vme_next) {
11505                 /*
11506                  * Sanity check on the VM map entry.
11507                  */
11508                 if (! vm_map_entry_is_reusable(entry)) {
11509                         vm_map_unlock_read(map);
11510                         vm_page_stats_reusable.reuse_pages_failure++;
11511                         return KERN_INVALID_ADDRESS;
11512                 }
11513
11514                 /*
11515                  * The first time through, the start address could be anywhere
11516                  * within the vm_map_entry we found.  So adjust the offset to
11517                  * correspond.
11518                  */
11519                 if (entry->vme_start < start) {
11520                         start_offset = start - entry->vme_start;
11521                 } else {
11522                         start_offset = 0;
11523                 }
11524                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
11525                 start_offset += entry->offset;
11526                 end_offset += entry->offset;
11527
11528                 object = entry->object.vm_object;
11529                 if (object != VM_OBJECT_NULL) {
11530                         /* tell pmap to not count this range as "reusable" */
11531                         pmap_reusable(map->pmap,
11532                                       MAX(start, entry->vme_start),
11533                                       MIN(end, entry->vme_end),
11534                                       FALSE);
11535                         vm_object_lock(object);
11536                         vm_object_reuse_pages(object, start_offset, end_offset,
11537                                               TRUE);
11538                         vm_object_unlock(object);
11539                 }
11540
11541                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
11542                         /*
11543                          * XXX
11544                          * We do not hold the VM map exclusively here.
11545                          * The "alias" field is not that critical, so it's
11546                          * safe to update it here, as long as it is the only
11547                          * one that can be modified while holding the VM map
11548                          * "shared".
11549                          */
11550                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
11551                 }
11552         }
11553
11554         vm_map_unlock_read(map);
11555         vm_page_stats_reusable.reuse_pages_success++;
11556         return KERN_SUCCESS;
11557 }
11558
11559
11560 static kern_return_t
11561 vm_map_reusable_pages(
11562         vm_map_t        map,
11563         vm_map_offset_t start,
11564         vm_map_offset_t end)
11565 {
11566         vm_map_entry_t                  entry;
11567         vm_object_t                     object;
11568         vm_object_offset_t              start_offset, end_offset;
11569
11570         /*
11571          * The MADV_REUSABLE operation doesn't require any changes to the
11572          * vm_map_entry_t's, so the read lock is sufficient.
11573          */
11574
11575         vm_map_lock_read(map);
11576
11577         /*
11578          * The madvise semantics require that the address range be fully
11579          * allocated with no holes.  Otherwise, we're required to return
11580          * an error.
11581          */
11582
11583         if (!vm_map_range_check(map, start, end, &entry)) {
11584                 vm_map_unlock_read(map);
11585                 vm_page_stats_reusable.reusable_pages_failure++;
11586                 return KERN_INVALID_ADDRESS;
11587         }
11588
11589         /*
11590          * Examine each vm_map_entry_t in the range.
11591          */
11592         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11593              entry = entry->vme_next) {
11594                 int kill_pages = 0;
11595
11596                 /*
11597                  * Sanity check on the VM map entry.
11598                  */
11599                 if (! vm_map_entry_is_reusable(entry)) {
11600                         vm_map_unlock_read(map);
11601                         vm_page_stats_reusable.reusable_pages_failure++;
11602                         return KERN_INVALID_ADDRESS;
11603                 }
11604
11605                 /*
11606                  * The first time through, the start address could be anywhere
11607                  * within the vm_map_entry we found.  So adjust the offset to
11608                  * correspond.
11609                  */
11610                 if (entry->vme_start < start) {
11611                         start_offset = start - entry->vme_start;
11612                 } else {
11613                         start_offset = 0;
11614                 }
11615                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
11616                 start_offset += entry->offset;
11617                 end_offset += entry->offset;
11618
11619                 object = entry->object.vm_object;
11620                 if (object == VM_OBJECT_NULL)
11621                         continue;
11622
11623
11624                 vm_object_lock(object);
11625                 if (object->ref_count == 1 && !object->shadow)
11626                         kill_pages = 1;
11627                 else
11628                         kill_pages = -1;
11629                 if (kill_pages != -1) {
11630                         /* tell pmap to count this range as "reusable" */
11631                         pmap_reusable(map->pmap,
11632                                       MAX(start, entry->vme_start),
11633                                       MIN(end, entry->vme_end),
11634                                       TRUE);
11635                         vm_object_deactivate_pages(object,
11636                                                    start_offset,
11637                                                    end_offset - start_offset,
11638                                                    kill_pages,
11639                                                    TRUE /*reusable_pages*/);
11640                 } else {
11641                         vm_page_stats_reusable.reusable_pages_shared++;
11642                 }
11643                 vm_object_unlock(object);
11644
11645                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
11646                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
11647                         /*
11648                          * XXX
11649                          * We do not hold the VM map exclusively here.
11650                          * The "alias" field is not that critical, so it's
11651                          * safe to update it here, as long as it is the only
11652                          * one that can be modified while holding the VM map
11653                          * "shared".
11654                          */
11655                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
11656                 }
11657         }
11658
11659         vm_map_unlock_read(map);
11660         vm_page_stats_reusable.reusable_pages_success++;
11661         return KERN_SUCCESS;
11662 }
11663
11664
11665 static kern_return_t
11666 vm_map_can_reuse(
11667         vm_map_t        map,
11668         vm_map_offset_t start,
11669         vm_map_offset_t end)
11670 {
11671         vm_map_entry_t                  entry;
11672
11673         /*
11674          * The MADV_REUSABLE operation doesn't require any changes to the
11675          * vm_map_entry_t's, so the read lock is sufficient.
11676          */
11677
11678         vm_map_lock_read(map);
11679
11680         /*
11681          * The madvise semantics require that the address range be fully
11682          * allocated with no holes.  Otherwise, we're required to return
11683          * an error.
11684          */
11685
11686         if (!vm_map_range_check(map, start, end, &entry)) {
11687                 vm_map_unlock_read(map);
11688                 vm_page_stats_reusable.can_reuse_failure++;
11689                 return KERN_INVALID_ADDRESS;
11690         }
11691
11692         /*
11693          * Examine each vm_map_entry_t in the range.
11694          */
11695         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11696              entry = entry->vme_next) {
11697                 /*
11698                  * Sanity check on the VM map entry.
11699                  */
11700                 if (! vm_map_entry_is_reusable(entry)) {
11701                         vm_map_unlock_read(map);
11702                         vm_page_stats_reusable.can_reuse_failure++;
11703                         return KERN_INVALID_ADDRESS;
11704                 }
11705         }
11706
11707         vm_map_unlock_read(map);
11708         vm_page_stats_reusable.can_reuse_success++;
11709         return KERN_SUCCESS;
11710 }
11711
11712
11713 /*
11714  *      Routine:        vm_map_entry_insert
11715  *
11716  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
11717  */
11718 vm_map_entry_t
11719 vm_map_entry_insert(
11720         vm_map_t                map,
11721         vm_map_entry_t          insp_entry,
11722         vm_map_offset_t         start,
11723         vm_map_offset_t         end,
11724         vm_object_t             object,
11725         vm_object_offset_t      offset,
11726         boolean_t               needs_copy,
11727         boolean_t               is_shared,
11728         boolean_t               in_transition,
11729         vm_prot_t               cur_protection,
11730         vm_prot_t               max_protection,
11731         vm_behavior_t           behavior,
11732         vm_inherit_t            inheritance,
11733         unsigned                wired_count,
11734         boolean_t               no_cache,
11735         boolean_t               permanent,
11736         unsigned int            superpage_size,
11737         boolean_t               clear_map_aligned)
11738 {
11739         vm_map_entry_t  new_entry;
11740
11741         assert(insp_entry != (vm_map_entry_t)0);
11742
11743         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11744
11745         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
11746                 new_entry->map_aligned = TRUE;
11747         } else {
11748                 new_entry->map_aligned = FALSE;
11749         }
11750         if (clear_map_aligned &&
11751             ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) {
11752                 new_entry->map_aligned = FALSE;
11753         }
11754
11755         new_entry->vme_start = start;
11756         new_entry->vme_end = end;
11757         assert(page_aligned(new_entry->vme_start));
11758         assert(page_aligned(new_entry->vme_end));
11759         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
11760                                    VM_MAP_PAGE_MASK(map)));
11761         if (new_entry->map_aligned) {
11762                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
11763                                            VM_MAP_PAGE_MASK(map)));
11764         }
11765         assert(new_entry->vme_start < new_entry->vme_end);
11766
11767         new_entry->object.vm_object = object;
11768         new_entry->offset = offset;
11769         new_entry->is_shared = is_shared;
11770         new_entry->is_sub_map = FALSE;
11771         new_entry->needs_copy = needs_copy;
11772         new_entry->in_transition = in_transition;
11773         new_entry->needs_wakeup = FALSE;
11774         new_entry->inheritance = inheritance;
11775         new_entry->protection = cur_protection;
11776         new_entry->max_protection = max_protection;
11777         new_entry->behavior = behavior;
11778         new_entry->wired_count = wired_count;
11779         new_entry->user_wired_count = 0;
11780         new_entry->use_pmap = FALSE;
11781         new_entry->alias = 0;
11782         new_entry->zero_wired_pages = FALSE;
11783         new_entry->no_cache = no_cache;
11784         new_entry->permanent = permanent;
11785         if (superpage_size)
11786                 new_entry->superpage_size = TRUE;
11787         else
11788                 new_entry->superpage_size = FALSE;
11789         new_entry->used_for_jit = FALSE;
11790
11791         /*
11792          *      Insert the new entry into the list.
11793          */
11794
11795         vm_map_store_entry_link(map, insp_entry, new_entry);
11796         map->size += end - start;
11797
11798         /*
11799          *      Update the free space hint and the lookup hint.
11800          */
11801
11802         SAVE_HINT_MAP_WRITE(map, new_entry);
11803         return new_entry;
11804 }
11805
11806 /*
11807  *      Routine:        vm_map_remap_extract
11808  *
11809  *      Descritpion:    This routine returns a vm_entry list from a map.
11810  */
11811 static kern_return_t
11812 vm_map_remap_extract(
11813         vm_map_t                map,
11814         vm_map_offset_t         addr,
11815         vm_map_size_t           size,
11816         boolean_t               copy,
11817         struct vm_map_header    *map_header,
11818         vm_prot_t               *cur_protection,
11819         vm_prot_t               *max_protection,
11820         /* What, no behavior? */
11821         vm_inherit_t            inheritance,
11822         boolean_t               pageable)
11823 {
11824         kern_return_t           result;
11825         vm_map_size_t           mapped_size;
11826         vm_map_size_t           tmp_size;
11827         vm_map_entry_t          src_entry;     /* result of last map lookup */
11828         vm_map_entry_t          new_entry;
11829         vm_object_offset_t      offset;
11830         vm_map_offset_t         map_address;
11831         vm_map_offset_t         src_start;     /* start of entry to map */
11832         vm_map_offset_t         src_end;       /* end of region to be mapped */
11833         vm_object_t             object;
11834         vm_map_version_t        version;
11835         boolean_t               src_needs_copy;
11836         boolean_t               new_entry_needs_copy;
11837
11838         assert(map != VM_MAP_NULL);
11839         assert(size != 0);
11840         assert(size == vm_map_round_page(size, PAGE_MASK));
11841         assert(inheritance == VM_INHERIT_NONE ||
11842                inheritance == VM_INHERIT_COPY ||
11843                inheritance == VM_INHERIT_SHARE);
11844
11845         /*
11846          *      Compute start and end of region.
11847          */
11848         src_start = vm_map_trunc_page(addr, PAGE_MASK);
11849         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
11850
11851
11852         /*
11853          *      Initialize map_header.
11854          */
11855         map_header->links.next = (struct vm_map_entry *)&map_header->links;
11856         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11857         map_header->nentries = 0;
11858         map_header->entries_pageable = pageable;
11859         map_header->page_shift = PAGE_SHIFT;
11860
11861         vm_map_store_init( map_header );
11862
11863         *cur_protection = VM_PROT_ALL;
11864         *max_protection = VM_PROT_ALL;
11865
11866         map_address = 0;
11867         mapped_size = 0;
11868         result = KERN_SUCCESS;
11869
11870         /*
11871          *      The specified source virtual space might correspond to
11872          *      multiple map entries, need to loop on them.
11873          */
11874         vm_map_lock(map);
11875         while (mapped_size != size) {
11876                 vm_map_size_t   entry_size;
11877
11878                 /*
11879                  *      Find the beginning of the region.
11880                  */
11881                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11882                         result = KERN_INVALID_ADDRESS;
11883                         break;
11884                 }
11885
11886                 if (src_start < src_entry->vme_start ||
11887                     (mapped_size && src_start != src_entry->vme_start)) {
11888                         result = KERN_INVALID_ADDRESS;
11889                         break;
11890                 }
11891
11892                 tmp_size = size - mapped_size;
11893                 if (src_end > src_entry->vme_end)
11894                         tmp_size -= (src_end - src_entry->vme_end);
11895
11896                 entry_size = (vm_map_size_t)(src_entry->vme_end -
11897                                              src_entry->vme_start);
11898
11899                 if(src_entry->is_sub_map) {
11900                         vm_map_reference(src_entry->object.sub_map);
11901                         object = VM_OBJECT_NULL;
11902                 } else {
11903                         object = src_entry->object.vm_object;
11904
11905                         if (object == VM_OBJECT_NULL) {
11906                                 object = vm_object_allocate(entry_size);
11907                                 src_entry->offset = 0;
11908                                 src_entry->object.vm_object = object;
11909                         } else if (object->copy_strategy !=
11910                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11911                                 /*
11912                                  *      We are already using an asymmetric
11913                                  *      copy, and therefore we already have
11914                                  *      the right object.
11915                                  */
11916                                 assert(!src_entry->needs_copy);
11917                         } else if (src_entry->needs_copy || object->shadowed ||
11918                                    (object->internal && !object->true_share &&
11919                                     !src_entry->is_shared &&
11920                                     object->vo_size > entry_size)) {
11921
11922                                 vm_object_shadow(&src_entry->object.vm_object,
11923                                                  &src_entry->offset,
11924                                                  entry_size);
11925
11926                                 if (!src_entry->needs_copy &&
11927                                     (src_entry->protection & VM_PROT_WRITE)) {
11928                                         vm_prot_t prot;
11929
11930                                         prot = src_entry->protection & ~VM_PROT_WRITE;
11931
11932                                         if (override_nx(map, src_entry->alias) && prot)
11933                                                 prot |= VM_PROT_EXECUTE;
11934
11935                                         if(map->mapped_in_other_pmaps) {
11936                                                 vm_object_pmap_protect(
11937                                                         src_entry->object.vm_object,
11938                                                         src_entry->offset,
11939                                                         entry_size,
11940                                                         PMAP_NULL,
11941                                                         src_entry->vme_start,
11942                                                         prot);
11943                                         } else {
11944                                                 pmap_protect(vm_map_pmap(map),
11945                                                              src_entry->vme_start,
11946                                                              src_entry->vme_end,
11947                                                              prot);
11948                                         }
11949                                 }
11950
11951                                 object = src_entry->object.vm_object;
11952                                 src_entry->needs_copy = FALSE;
11953                         }
11954
11955
11956                         vm_object_lock(object);
11957                         vm_object_reference_locked(object); /* object ref. for new entry */
11958                         if (object->copy_strategy ==
11959                             MEMORY_OBJECT_COPY_SYMMETRIC) {
11960                                 object->copy_strategy =
11961                                         MEMORY_OBJECT_COPY_DELAY;
11962                         }
11963                         vm_object_unlock(object);
11964                 }
11965
11966                 offset = src_entry->offset + (src_start - src_entry->vme_start);
11967
11968                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11969                 vm_map_entry_copy(new_entry, src_entry);
11970                 new_entry->use_pmap = FALSE; /* clr address space specifics */
11971
11972                 new_entry->map_aligned = FALSE;
11973
11974                 new_entry->vme_start = map_address;
11975                 new_entry->vme_end = map_address + tmp_size;
11976                 assert(new_entry->vme_start < new_entry->vme_end);
11977                 new_entry->inheritance = inheritance;
11978                 new_entry->offset = offset;
11979
11980                 /*
11981                  * The new region has to be copied now if required.
11982                  */
11983         RestartCopy:
11984                 if (!copy) {
11985                         /*
11986                          * Cannot allow an entry describing a JIT
11987                          * region to be shared across address spaces.
11988                          */
11989                         if (src_entry->used_for_jit == TRUE) {
11990                                 result = KERN_INVALID_ARGUMENT;
11991                                 break;
11992                         }
11993                         src_entry->is_shared = TRUE;
11994                         new_entry->is_shared = TRUE;
11995                         if (!(new_entry->is_sub_map))
11996                                 new_entry->needs_copy = FALSE;
11997
11998                 } else if (src_entry->is_sub_map) {
11999                         /* make this a COW sub_map if not already */
12000                         new_entry->needs_copy = TRUE;
12001                         object = VM_OBJECT_NULL;
12002                 } else if (src_entry->wired_count == 0 &&
12003                            vm_object_copy_quickly(&new_entry->object.vm_object,
12004                                                   new_entry->offset,
12005                                                   (new_entry->vme_end -
12006                                                    new_entry->vme_start),
12007                                                   &src_needs_copy,
12008                                                   &new_entry_needs_copy)) {
12009
12010                         new_entry->needs_copy = new_entry_needs_copy;
12011                         new_entry->is_shared = FALSE;
12012
12013                         /*
12014                          * Handle copy_on_write semantics.
12015                          */
12016                         if (src_needs_copy && !src_entry->needs_copy) {
12017                                 vm_prot_t prot;
12018
12019                                 prot = src_entry->protection & ~VM_PROT_WRITE;
12020
12021                                 if (override_nx(map, src_entry->alias) && prot)
12022                                         prot |= VM_PROT_EXECUTE;
12023
12024                                 vm_object_pmap_protect(object,
12025                                                        offset,
12026                                                        entry_size,
12027                                                        ((src_entry->is_shared
12028                                                          || map->mapped_in_other_pmaps) ?
12029                                                         PMAP_NULL : map->pmap),
12030                                                        src_entry->vme_start,
12031                                                        prot);
12032
12033                                 src_entry->needs_copy = TRUE;
12034                         }
12035                         /*
12036                          * Throw away the old object reference of the new entry.
12037                          */
12038                         vm_object_deallocate(object);
12039
12040                 } else {
12041                         new_entry->is_shared = FALSE;
12042
12043                         /*
12044                          * The map can be safely unlocked since we
12045                          * already hold a reference on the object.
12046                          *
12047                          * Record the timestamp of the map for later
12048                          * verification, and unlock the map.
12049                          */
12050                         version.main_timestamp = map->timestamp;
12051                         vm_map_unlock(map);     /* Increments timestamp once! */
12052
12053                         /*
12054                          * Perform the copy.
12055                          */
12056                         if (src_entry->wired_count > 0) {
12057                                 vm_object_lock(object);
12058                                 result = vm_object_copy_slowly(
12059                                         object,
12060                                         offset,
12061                                         entry_size,
12062                                         THREAD_UNINT,
12063                                         &new_entry->object.vm_object);
12064
12065                                 new_entry->offset = 0;
12066                                 new_entry->needs_copy = FALSE;
12067                         } else {
12068                                 result = vm_object_copy_strategically(
12069                                         object,
12070                                         offset,
12071                                         entry_size,
12072                                         &new_entry->object.vm_object,
12073                                         &new_entry->offset,
12074                                         &new_entry_needs_copy);
12075
12076                                 new_entry->needs_copy = new_entry_needs_copy;
12077                         }
12078
12079                         /*
12080                          * Throw away the old object reference of the new entry.
12081                          */
12082                         vm_object_deallocate(object);
12083
12084                         if (result != KERN_SUCCESS &&
12085                             result != KERN_MEMORY_RESTART_COPY) {
12086                                 _vm_map_entry_dispose(map_header, new_entry);
12087                                 break;
12088                         }
12089
12090                         /*
12091                          * Verify that the map has not substantially
12092                          * changed while the copy was being made.
12093                          */
12094
12095                         vm_map_lock(map);
12096                         if (version.main_timestamp + 1 != map->timestamp) {
12097                                 /*
12098                                  * Simple version comparison failed.
12099                                  *
12100                                  * Retry the lookup and verify that the
12101                                  * same object/offset are still present.
12102                                  */
12103                                 vm_object_deallocate(new_entry->
12104                                                      object.vm_object);
12105                                 _vm_map_entry_dispose(map_header, new_entry);
12106                                 if (result == KERN_MEMORY_RESTART_COPY)
12107                                         result = KERN_SUCCESS;
12108                                 continue;
12109                         }
12110
12111                         if (result == KERN_MEMORY_RESTART_COPY) {
12112                                 vm_object_reference(object);
12113                                 goto RestartCopy;
12114                         }
12115                 }
12116
12117                 _vm_map_store_entry_link(map_header,
12118                                    map_header->links.prev, new_entry);
12119
12120                 /*Protections for submap mapping are irrelevant here*/
12121                 if( !src_entry->is_sub_map ) {
12122                         *cur_protection &= src_entry->protection;
12123                         *max_protection &= src_entry->max_protection;
12124                 }
12125                 map_address += tmp_size;
12126                 mapped_size += tmp_size;
12127                 src_start += tmp_size;
12128
12129         } /* end while */
12130
12131         vm_map_unlock(map);
12132         if (result != KERN_SUCCESS) {
12133                 /*
12134                  * Free all allocated elements.
12135                  */
12136                 for (src_entry = map_header->links.next;
12137                      src_entry != (struct vm_map_entry *)&map_header->links;
12138                      src_entry = new_entry) {
12139                         new_entry = src_entry->vme_next;
12140                         _vm_map_store_entry_unlink(map_header, src_entry);
12141                         if (src_entry->is_sub_map) {
12142                                 vm_map_deallocate(src_entry->object.sub_map);
12143                         } else {
12144                                 vm_object_deallocate(src_entry->object.vm_object);
12145                         }
12146                         _vm_map_entry_dispose(map_header, src_entry);
12147                 }
12148         }
12149         return result;
12150 }
12151
12152 /*
12153  *      Routine:        vm_remap
12154  *
12155  *                      Map portion of a task's address space.
12156  *                      Mapped region must not overlap more than
12157  *                      one vm memory object. Protections and
12158  *                      inheritance attributes remain the same
12159  *                      as in the original task and are out parameters.
12160  *                      Source and Target task can be identical
12161  *                      Other attributes are identical as for vm_map()
12162  */
12163 kern_return_t
12164 vm_map_remap(
12165         vm_map_t                target_map,
12166         vm_map_address_t        *address,
12167         vm_map_size_t           size,
12168         vm_map_offset_t         mask,
12169         int                     flags,
12170         vm_map_t                src_map,
12171         vm_map_offset_t         memory_address,
12172         boolean_t               copy,
12173         vm_prot_t               *cur_protection,
12174         vm_prot_t               *max_protection,
12175         vm_inherit_t            inheritance)
12176 {
12177         kern_return_t           result;
12178         vm_map_entry_t          entry;
12179         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
12180         vm_map_entry_t          new_entry;
12181         struct vm_map_header    map_header;
12182         vm_map_offset_t         offset_in_mapping;
12183
12184         if (target_map == VM_MAP_NULL)
12185                 return KERN_INVALID_ARGUMENT;
12186
12187         switch (inheritance) {
12188         case VM_INHERIT_NONE:
12189         case VM_INHERIT_COPY:
12190         case VM_INHERIT_SHARE:
12191                 if (size != 0 && src_map != VM_MAP_NULL)
12192                         break;
12193                 /*FALL THRU*/
12194         default:
12195                 return KERN_INVALID_ARGUMENT;
12196         }
12197
12198         /*
12199          * If the user is requesting that we return the address of the
12200          * first byte of the data (rather than the base of the page),
12201          * then we use different rounding semantics: specifically,
12202          * we assume that (memory_address, size) describes a region
12203          * all of whose pages we must cover, rather than a base to be truncated
12204          * down and a size to be added to that base.  So we figure out
12205          * the highest page that the requested region includes and make
12206          * sure that the size will cover it.
12207          *
12208          * The key example we're worried about it is of the form:
12209          *
12210          *              memory_address = 0x1ff0, size = 0x20
12211          *
12212          * With the old semantics, we round down the memory_address to 0x1000
12213          * and round up the size to 0x1000, resulting in our covering *only*
12214          * page 0x1000.  With the new semantics, we'd realize that the region covers
12215          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
12216          * 0x1000 and page 0x2000 in the region we remap.
12217          */
12218         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12219                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
12220                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
12221         } else {
12222                 size = vm_map_round_page(size, PAGE_MASK);
12223         }
12224
12225         result = vm_map_remap_extract(src_map, memory_address,
12226                                       size, copy, &map_header,
12227                                       cur_protection,
12228                                       max_protection,
12229                                       inheritance,
12230                                       target_map->hdr.entries_pageable);
12231
12232         if (result != KERN_SUCCESS) {
12233                 return result;
12234         }
12235
12236         /*
12237          * Allocate/check a range of free virtual address
12238          * space for the target
12239          */
12240         *address = vm_map_trunc_page(*address,
12241                                      VM_MAP_PAGE_MASK(target_map));
12242         vm_map_lock(target_map);
12243         result = vm_map_remap_range_allocate(target_map, address, size,
12244                                              mask, flags, &insp_entry);
12245
12246         for (entry = map_header.links.next;
12247              entry != (struct vm_map_entry *)&map_header.links;
12248              entry = new_entry) {
12249                 new_entry = entry->vme_next;
12250                 _vm_map_store_entry_unlink(&map_header, entry);
12251                 if (result == KERN_SUCCESS) {
12252                         entry->vme_start += *address;
12253                         entry->vme_end += *address;
12254                         assert(!entry->map_aligned);
12255                         vm_map_store_entry_link(target_map, insp_entry, entry);
12256                         insp_entry = entry;
12257                 } else {
12258                         if (!entry->is_sub_map) {
12259                                 vm_object_deallocate(entry->object.vm_object);
12260                         } else {
12261                                 vm_map_deallocate(entry->object.sub_map);
12262                         }
12263                         _vm_map_entry_dispose(&map_header, entry);
12264                 }
12265         }
12266
12267         if( target_map->disable_vmentry_reuse == TRUE) {
12268                 if( target_map->highest_entry_end < insp_entry->vme_end ){
12269                         target_map->highest_entry_end = insp_entry->vme_end;
12270                 }
12271         }
12272
12273         if (result == KERN_SUCCESS) {
12274                 target_map->size += size;
12275                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
12276         }
12277         vm_map_unlock(target_map);
12278
12279         if (result == KERN_SUCCESS && target_map->wiring_required)
12280                 result = vm_map_wire(target_map, *address,
12281                                      *address + size, *cur_protection, TRUE);
12282
12283         /*
12284          * If requested, return the address of the data pointed to by the
12285          * request, rather than the base of the resulting page.
12286          */
12287         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12288                 *address += offset_in_mapping;
12289         }
12290
12291         return result;
12292 }
12293
12294 /*
12295  *      Routine:        vm_map_remap_range_allocate
12296  *
12297  *      Description:
12298  *              Allocate a range in the specified virtual address map.
12299  *              returns the address and the map entry just before the allocated
12300  *              range
12301  *
12302  *      Map must be locked.
12303  */
12304
12305 static kern_return_t
12306 vm_map_remap_range_allocate(
12307         vm_map_t                map,
12308         vm_map_address_t        *address,       /* IN/OUT */
12309         vm_map_size_t           size,
12310         vm_map_offset_t         mask,
12311         int                     flags,
12312         vm_map_entry_t          *map_entry)     /* OUT */
12313 {
12314         vm_map_entry_t  entry;
12315         vm_map_offset_t start;
12316         vm_map_offset_t end;
12317         kern_return_t   kr;
12318
12319 StartAgain: ;
12320
12321         start = *address;
12322
12323         if (flags & VM_FLAGS_ANYWHERE)
12324         {
12325                 /*
12326                  *      Calculate the first possible address.
12327                  */
12328
12329                 if (start < map->min_offset)
12330                         start = map->min_offset;
12331                 if (start > map->max_offset)
12332                         return(KERN_NO_SPACE);
12333
12334                 /*
12335                  *      Look for the first possible address;
12336                  *      if there's already something at this
12337                  *      address, we have to start after it.
12338                  */
12339
12340                 if( map->disable_vmentry_reuse == TRUE) {
12341                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
12342                 } else {
12343                         assert(first_free_is_valid(map));
12344                         if (start == map->min_offset) {
12345                                 if ((entry = map->first_free) != vm_map_to_entry(map))
12346                                         start = entry->vme_end;
12347                         } else {
12348                                 vm_map_entry_t  tmp_entry;
12349                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
12350                                         start = tmp_entry->vme_end;
12351                                 entry = tmp_entry;
12352                         }
12353                         start = vm_map_round_page(start,
12354                                                   VM_MAP_PAGE_MASK(map));
12355                 }
12356
12357                 /*
12358                  *      In any case, the "entry" always precedes
12359                  *      the proposed new region throughout the
12360                  *      loop:
12361                  */
12362
12363                 while (TRUE) {
12364                         register vm_map_entry_t next;
12365
12366                         /*
12367                          *      Find the end of the proposed new region.
12368                          *      Be sure we didn't go beyond the end, or
12369                          *      wrap around the address.
12370                          */
12371
12372                         end = ((start + mask) & ~mask);
12373                         end = vm_map_round_page(end,
12374                                                 VM_MAP_PAGE_MASK(map));
12375                         if (end < start)
12376                                 return(KERN_NO_SPACE);
12377                         start = end;
12378                         end += size;
12379
12380                         if ((end > map->max_offset) || (end < start)) {
12381                                 if (map->wait_for_space) {
12382                                         if (size <= (map->max_offset -
12383                                                      map->min_offset)) {
12384                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
12385                                                 vm_map_unlock(map);
12386                                                 thread_block(THREAD_CONTINUE_NULL);
12387                                                 vm_map_lock(map);
12388                                                 goto StartAgain;
12389                                         }
12390                                 }
12391
12392                                 return(KERN_NO_SPACE);
12393                         }
12394
12395                         /*
12396                          *      If there are no more entries, we must win.
12397                          */
12398
12399                         next = entry->vme_next;
12400                         if (next == vm_map_to_entry(map))
12401                                 break;
12402
12403                         /*
12404                          *      If there is another entry, it must be
12405                          *      after the end of the potential new region.
12406                          */
12407
12408                         if (next->vme_start >= end)
12409                                 break;
12410
12411                         /*
12412                          *      Didn't fit -- move to the next entry.
12413                          */
12414
12415                         entry = next;
12416                         start = entry->vme_end;
12417                 }
12418                 *address = start;
12419         } else {
12420                 vm_map_entry_t          temp_entry;
12421
12422                 /*
12423                  *      Verify that:
12424                  *              the address doesn't itself violate
12425                  *              the mask requirement.
12426                  */
12427
12428                 if ((start & mask) != 0)
12429                         return(KERN_NO_SPACE);
12430
12431
12432                 /*
12433                  *      ...     the address is within bounds
12434                  */
12435
12436                 end = start + size;
12437
12438                 if ((start < map->min_offset) ||
12439                     (end > map->max_offset) ||
12440                     (start >= end)) {
12441                         return(KERN_INVALID_ADDRESS);
12442                 }
12443
12444                 /*
12445                  * If we're asked to overwrite whatever was mapped in that
12446                  * range, first deallocate that range.
12447                  */
12448                 if (flags & VM_FLAGS_OVERWRITE) {
12449                         vm_map_t zap_map;
12450
12451                         /*
12452                          * We use a "zap_map" to avoid having to unlock
12453                          * the "map" in vm_map_delete(), which would compromise
12454                          * the atomicity of the "deallocate" and then "remap"
12455                          * combination.
12456                          */
12457                         zap_map = vm_map_create(PMAP_NULL,
12458                                                 start,
12459                                                 end,
12460                                                 map->hdr.entries_pageable);
12461                         if (zap_map == VM_MAP_NULL) {
12462                                 return KERN_RESOURCE_SHORTAGE;
12463                         }
12464                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
12465
12466                         kr = vm_map_delete(map, start, end,
12467                                            VM_MAP_REMOVE_SAVE_ENTRIES,
12468                                            zap_map);
12469                         if (kr == KERN_SUCCESS) {
12470                                 vm_map_destroy(zap_map,
12471                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
12472                                 zap_map = VM_MAP_NULL;
12473                         }
12474                 }
12475
12476                 /*
12477                  *      ...     the starting address isn't allocated
12478                  */
12479
12480                 if (vm_map_lookup_entry(map, start, &temp_entry))
12481                         return(KERN_NO_SPACE);
12482
12483                 entry = temp_entry;
12484
12485                 /*
12486                  *      ...     the next region doesn't overlap the
12487                  *              end point.
12488                  */
12489
12490                 if ((entry->vme_next != vm_map_to_entry(map)) &&
12491                     (entry->vme_next->vme_start < end))
12492                         return(KERN_NO_SPACE);
12493         }
12494         *map_entry = entry;
12495         return(KERN_SUCCESS);
12496 }
12497
12498 /*
12499  *      vm_map_switch:
12500  *
12501  *      Set the address map for the current thread to the specified map
12502  */
12503
12504 vm_map_t
12505 vm_map_switch(
12506         vm_map_t        map)
12507 {
12508         int             mycpu;
12509         thread_t        thread = current_thread();
12510         vm_map_t        oldmap = thread->map;
12511
12512         mp_disable_preemption();
12513         mycpu = cpu_number();
12514
12515         /*
12516          *      Deactivate the current map and activate the requested map
12517          */
12518         PMAP_SWITCH_USER(thread, map, mycpu);
12519
12520         mp_enable_preemption();
12521         return(oldmap);
12522 }
12523
12524
12525 /*
12526  *      Routine:        vm_map_write_user
12527  *
12528  *      Description:
12529  *              Copy out data from a kernel space into space in the
12530  *              destination map. The space must already exist in the
12531  *              destination map.
12532  *              NOTE:  This routine should only be called by threads
12533  *              which can block on a page fault. i.e. kernel mode user
12534  *              threads.
12535  *
12536  */
12537 kern_return_t
12538 vm_map_write_user(
12539         vm_map_t                map,
12540         void                    *src_p,
12541         vm_map_address_t        dst_addr,
12542         vm_size_t               size)
12543 {
12544         kern_return_t   kr = KERN_SUCCESS;
12545
12546         if(current_map() == map) {
12547                 if (copyout(src_p, dst_addr, size)) {
12548                         kr = KERN_INVALID_ADDRESS;
12549                 }
12550         } else {
12551                 vm_map_t        oldmap;
12552
12553                 /* take on the identity of the target map while doing */
12554                 /* the transfer */
12555
12556                 vm_map_reference(map);
12557                 oldmap = vm_map_switch(map);
12558                 if (copyout(src_p, dst_addr, size)) {
12559                         kr = KERN_INVALID_ADDRESS;
12560                 }
12561                 vm_map_switch(oldmap);
12562                 vm_map_deallocate(map);
12563         }
12564         return kr;
12565 }
12566
12567 /*
12568  *      Routine:        vm_map_read_user
12569  *
12570  *      Description:
12571  *              Copy in data from a user space source map into the
12572  *              kernel map. The space must already exist in the
12573  *              kernel map.
12574  *              NOTE:  This routine should only be called by threads
12575  *              which can block on a page fault. i.e. kernel mode user
12576  *              threads.
12577  *
12578  */
12579 kern_return_t
12580 vm_map_read_user(
12581         vm_map_t                map,
12582         vm_map_address_t        src_addr,
12583         void                    *dst_p,
12584         vm_size_t               size)
12585 {
12586         kern_return_t   kr = KERN_SUCCESS;
12587
12588         if(current_map() == map) {
12589                 if (copyin(src_addr, dst_p, size)) {
12590                         kr = KERN_INVALID_ADDRESS;
12591                 }
12592         } else {
12593                 vm_map_t        oldmap;
12594
12595                 /* take on the identity of the target map while doing */
12596                 /* the transfer */
12597
12598                 vm_map_reference(map);
12599                 oldmap = vm_map_switch(map);
12600                 if (copyin(src_addr, dst_p, size)) {
12601                         kr = KERN_INVALID_ADDRESS;
12602                 }
12603                 vm_map_switch(oldmap);
12604                 vm_map_deallocate(map);
12605         }
12606         return kr;
12607 }
12608
12609
12610 /*
12611  *      vm_map_check_protection:
12612  *
12613  *      Assert that the target map allows the specified
12614  *      privilege on the entire address region given.
12615  *      The entire region must be allocated.
12616  */
12617 boolean_t
12618 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
12619                         vm_map_offset_t end, vm_prot_t protection)
12620 {
12621         vm_map_entry_t entry;
12622         vm_map_entry_t tmp_entry;
12623
12624         vm_map_lock(map);
12625
12626         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
12627         {
12628                 vm_map_unlock(map);
12629                 return (FALSE);
12630         }
12631
12632         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12633                 vm_map_unlock(map);
12634                 return(FALSE);
12635         }
12636
12637         entry = tmp_entry;
12638
12639         while (start < end) {
12640                 if (entry == vm_map_to_entry(map)) {
12641                         vm_map_unlock(map);
12642                         return(FALSE);
12643                 }
12644
12645                 /*
12646                  *      No holes allowed!
12647                  */
12648
12649                 if (start < entry->vme_start) {
12650                         vm_map_unlock(map);
12651                         return(FALSE);
12652                 }
12653
12654                 /*
12655                  * Check protection associated with entry.
12656                  */
12657
12658                 if ((entry->protection & protection) != protection) {
12659                         vm_map_unlock(map);
12660                         return(FALSE);
12661                 }
12662
12663                 /* go to next entry */
12664
12665                 start = entry->vme_end;
12666                 entry = entry->vme_next;
12667         }
12668         vm_map_unlock(map);
12669         return(TRUE);
12670 }
12671
12672 kern_return_t
12673 vm_map_purgable_control(
12674         vm_map_t                map,
12675         vm_map_offset_t         address,
12676         vm_purgable_t           control,
12677         int                     *state)
12678 {
12679         vm_map_entry_t          entry;
12680         vm_object_t             object;
12681         kern_return_t           kr;
12682
12683         /*
12684          * Vet all the input parameters and current type and state of the
12685          * underlaying object.  Return with an error if anything is amiss.
12686          */
12687         if (map == VM_MAP_NULL)
12688                 return(KERN_INVALID_ARGUMENT);
12689
12690         if (control != VM_PURGABLE_SET_STATE &&
12691             control != VM_PURGABLE_GET_STATE &&
12692             control != VM_PURGABLE_PURGE_ALL)
12693                 return(KERN_INVALID_ARGUMENT);
12694
12695         if (control == VM_PURGABLE_PURGE_ALL) {
12696                 vm_purgeable_object_purge_all();
12697                 return KERN_SUCCESS;
12698         }
12699
12700         if (control == VM_PURGABLE_SET_STATE &&
12701             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
12702              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
12703                 return(KERN_INVALID_ARGUMENT);
12704
12705         vm_map_lock_read(map);
12706
12707         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12708
12709                 /*
12710                  * Must pass a valid non-submap address.
12711                  */
12712                 vm_map_unlock_read(map);
12713                 return(KERN_INVALID_ADDRESS);
12714         }
12715
12716         if ((entry->protection & VM_PROT_WRITE) == 0) {
12717                 /*
12718                  * Can't apply purgable controls to something you can't write.
12719                  */
12720                 vm_map_unlock_read(map);
12721                 return(KERN_PROTECTION_FAILURE);
12722         }
12723
12724         object = entry->object.vm_object;
12725         if (object == VM_OBJECT_NULL) {
12726                 /*
12727                  * Object must already be present or it can't be purgable.
12728                  */
12729                 vm_map_unlock_read(map);
12730                 return KERN_INVALID_ARGUMENT;
12731         }
12732
12733         vm_object_lock(object);
12734
12735 #if 00
12736         if (entry->offset != 0 ||
12737             entry->vme_end - entry->vme_start != object->vo_size) {
12738                 /*
12739                  * Can only apply purgable controls to the whole (existing)
12740                  * object at once.
12741                  */
12742                 vm_map_unlock_read(map);
12743                 vm_object_unlock(object);
12744                 return KERN_INVALID_ARGUMENT;
12745         }
12746 #endif
12747
12748         vm_map_unlock_read(map);
12749
12750         kr = vm_object_purgable_control(object, control, state);
12751
12752         vm_object_unlock(object);
12753
12754         return kr;
12755 }
12756
12757 kern_return_t
12758 vm_map_page_query_internal(
12759         vm_map_t        target_map,
12760         vm_map_offset_t offset,
12761         int             *disposition,
12762         int             *ref_count)
12763 {
12764         kern_return_t                   kr;
12765         vm_page_info_basic_data_t       info;
12766         mach_msg_type_number_t          count;
12767
12768         count = VM_PAGE_INFO_BASIC_COUNT;
12769         kr = vm_map_page_info(target_map,
12770                               offset,
12771                               VM_PAGE_INFO_BASIC,
12772                               (vm_page_info_t) &info,
12773                               &count);
12774         if (kr == KERN_SUCCESS) {
12775                 *disposition = info.disposition;
12776                 *ref_count = info.ref_count;
12777         } else {
12778                 *disposition = 0;
12779                 *ref_count = 0;
12780         }
12781
12782         return kr;
12783 }
12784
12785 kern_return_t
12786 vm_map_page_info(
12787         vm_map_t                map,
12788         vm_map_offset_t         offset,
12789         vm_page_info_flavor_t   flavor,
12790         vm_page_info_t          info,
12791         mach_msg_type_number_t  *count)
12792 {
12793         vm_map_entry_t          map_entry;
12794         vm_object_t             object;
12795         vm_page_t               m;
12796         kern_return_t           kr;
12797         kern_return_t           retval = KERN_SUCCESS;
12798         boolean_t               top_object;
12799         int                     disposition;
12800         int                     ref_count;
12801         vm_page_info_basic_t    basic_info;
12802         int                     depth;
12803         vm_map_offset_t         offset_in_page;
12804
12805         switch (flavor) {
12806         case VM_PAGE_INFO_BASIC:
12807                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12808                         /*
12809                          * The "vm_page_info_basic_data" structure was not
12810                          * properly padded, so allow the size to be off by
12811                          * one to maintain backwards binary compatibility...
12812                          */
12813                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12814                                 return KERN_INVALID_ARGUMENT;
12815                 }
12816                 break;
12817         default:
12818                 return KERN_INVALID_ARGUMENT;
12819         }
12820
12821         disposition = 0;
12822         ref_count = 0;
12823         top_object = TRUE;
12824         depth = 0;
12825
12826         retval = KERN_SUCCESS;
12827         offset_in_page = offset & PAGE_MASK;
12828         offset = vm_map_trunc_page(offset, PAGE_MASK);
12829
12830         vm_map_lock_read(map);
12831
12832         /*
12833          * First, find the map entry covering "offset", going down
12834          * submaps if necessary.
12835          */
12836         for (;;) {
12837                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12838                         vm_map_unlock_read(map);
12839                         return KERN_INVALID_ADDRESS;
12840                 }
12841                 /* compute offset from this map entry's start */
12842                 offset -= map_entry->vme_start;
12843                 /* compute offset into this map entry's object (or submap) */
12844                 offset += map_entry->offset;
12845
12846                 if (map_entry->is_sub_map) {
12847                         vm_map_t sub_map;
12848
12849                         sub_map = map_entry->object.sub_map;
12850                         vm_map_lock_read(sub_map);
12851                         vm_map_unlock_read(map);
12852
12853                         map = sub_map;
12854
12855                         ref_count = MAX(ref_count, map->ref_count);
12856                         continue;
12857                 }
12858                 break;
12859         }
12860
12861         object = map_entry->object.vm_object;
12862         if (object == VM_OBJECT_NULL) {
12863                 /* no object -> no page */
12864                 vm_map_unlock_read(map);
12865                 goto done;
12866         }
12867
12868         vm_object_lock(object);
12869         vm_map_unlock_read(map);
12870
12871         /*
12872          * Go down the VM object shadow chain until we find the page
12873          * we're looking for.
12874          */
12875         for (;;) {
12876                 ref_count = MAX(ref_count, object->ref_count);
12877
12878                 m = vm_page_lookup(object, offset);
12879
12880                 if (m != VM_PAGE_NULL) {
12881                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12882                         break;
12883                 } else {
12884 #if MACH_PAGEMAP
12885                         if (object->existence_map) {
12886                                 if (vm_external_state_get(object->existence_map,
12887                                                           offset) ==
12888                                     VM_EXTERNAL_STATE_EXISTS) {
12889                                         /*
12890                                          * this page has been paged out
12891                                          */
12892                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12893                                         break;
12894                                 }
12895                         } else
12896 #endif
12897                         if (object->internal &&
12898                             object->alive &&
12899                             !object->terminating &&
12900                             object->pager_ready) {
12901
12902                                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12903                                         if (VM_COMPRESSOR_PAGER_STATE_GET(
12904                                                     object,
12905                                                     offset)
12906                                             == VM_EXTERNAL_STATE_EXISTS) {
12907                                                 /* the pager has that page */
12908                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12909                                                 break;
12910                                         }
12911                                 } else {
12912                                         memory_object_t pager;
12913
12914                                         vm_object_paging_begin(object);
12915                                         pager = object->pager;
12916                                         vm_object_unlock(object);
12917
12918                                         /*
12919                                          * Ask the default pager if
12920                                          * it has this page.
12921                                          */
12922                                         kr = memory_object_data_request(
12923                                                 pager,
12924                                                 offset + object->paging_offset,
12925                                                 0, /* just poke the pager */
12926                                                 VM_PROT_READ,
12927                                                 NULL);
12928
12929                                         vm_object_lock(object);
12930                                         vm_object_paging_end(object);
12931
12932                                         if (kr == KERN_SUCCESS) {
12933                                                 /* the default pager has it */
12934                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12935                                                 break;
12936                                         }
12937                                 }
12938                         }
12939
12940                         if (object->shadow != VM_OBJECT_NULL) {
12941                                 vm_object_t shadow;
12942
12943                                 offset += object->vo_shadow_offset;
12944                                 shadow = object->shadow;
12945
12946                                 vm_object_lock(shadow);
12947                                 vm_object_unlock(object);
12948
12949                                 object = shadow;
12950                                 top_object = FALSE;
12951                                 depth++;
12952                         } else {
12953 //                              if (!object->internal)
12954 //                                      break;
12955 //                              retval = KERN_FAILURE;
12956 //                              goto done_with_object;
12957                                 break;
12958                         }
12959                 }
12960         }
12961         /* The ref_count is not strictly accurate, it measures the number   */
12962         /* of entities holding a ref on the object, they may not be mapping */
12963         /* the object or may not be mapping the section holding the         */
12964         /* target page but its still a ball park number and though an over- */
12965         /* count, it picks up the copy-on-write cases                       */
12966
12967         /* We could also get a picture of page sharing from pmap_attributes */
12968         /* but this would under count as only faulted-in mappings would     */
12969         /* show up.                                                         */
12970
12971         if (top_object == TRUE && object->shadow)
12972                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12973
12974         if (! object->internal)
12975                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12976
12977         if (m == VM_PAGE_NULL)
12978                 goto done_with_object;
12979
12980         if (m->fictitious) {
12981                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12982                 goto done_with_object;
12983         }
12984         if (m->dirty || pmap_is_modified(m->phys_page))
12985                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12986
12987         if (m->reference || pmap_is_referenced(m->phys_page))
12988                 disposition |= VM_PAGE_QUERY_PAGE_REF;
12989
12990         if (m->speculative)
12991                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12992
12993         if (m->cs_validated)
12994                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12995         if (m->cs_tainted)
12996                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12997
12998 done_with_object:
12999         vm_object_unlock(object);
13000 done:
13001
13002         switch (flavor) {
13003         case VM_PAGE_INFO_BASIC:
13004                 basic_info = (vm_page_info_basic_t) info;
13005                 basic_info->disposition = disposition;
13006                 basic_info->ref_count = ref_count;
13007                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
13008                         VM_KERNEL_ADDRPERM(object);
13009                 basic_info->offset =
13010                         (memory_object_offset_t) offset + offset_in_page;
13011                 basic_info->depth = depth;
13012                 break;
13013         }
13014
13015         return retval;
13016 }
13017
13018 /*
13019  *      vm_map_msync
13020  *
13021  *      Synchronises the memory range specified with its backing store
13022  *      image by either flushing or cleaning the contents to the appropriate
13023  *      memory manager engaging in a memory object synchronize dialog with
13024  *      the manager.  The client doesn't return until the manager issues
13025  *      m_o_s_completed message.  MIG Magically converts user task parameter
13026  *      to the task's address map.
13027  *
13028  *      interpretation of sync_flags
13029  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
13030  *                                pages to manager.
13031  *
13032  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13033  *                              - discard pages, write dirty or precious
13034  *                                pages back to memory manager.
13035  *
13036  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13037  *                              - write dirty or precious pages back to
13038  *                                the memory manager.
13039  *
13040  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
13041  *                                is a hole in the region, and we would
13042  *                                have returned KERN_SUCCESS, return
13043  *                                KERN_INVALID_ADDRESS instead.
13044  *
13045  *      NOTE
13046  *      The memory object attributes have not yet been implemented, this
13047  *      function will have to deal with the invalidate attribute
13048  *
13049  *      RETURNS
13050  *      KERN_INVALID_TASK               Bad task parameter
13051  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
13052  *      KERN_SUCCESS                    The usual.
13053  *      KERN_INVALID_ADDRESS            There was a hole in the region.
13054  */
13055
13056 kern_return_t
13057 vm_map_msync(
13058         vm_map_t                map,
13059         vm_map_address_t        address,
13060         vm_map_size_t           size,
13061         vm_sync_t               sync_flags)
13062 {
13063         msync_req_t             msr;
13064         msync_req_t             new_msr;
13065         queue_chain_t           req_q;  /* queue of requests for this msync */
13066         vm_map_entry_t          entry;
13067         vm_map_size_t           amount_left;
13068         vm_object_offset_t      offset;
13069         boolean_t               do_sync_req;
13070         boolean_t               had_hole = FALSE;
13071         memory_object_t         pager;
13072
13073         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
13074             (sync_flags & VM_SYNC_SYNCHRONOUS))
13075                 return(KERN_INVALID_ARGUMENT);
13076
13077         /*
13078          * align address and size on page boundaries
13079          */
13080         size = (vm_map_round_page(address + size,
13081                                   VM_MAP_PAGE_MASK(map)) -
13082                 vm_map_trunc_page(address,
13083                                   VM_MAP_PAGE_MASK(map)));
13084         address = vm_map_trunc_page(address,
13085                                     VM_MAP_PAGE_MASK(map));
13086
13087         if (map == VM_MAP_NULL)
13088                 return(KERN_INVALID_TASK);
13089
13090         if (size == 0)
13091                 return(KERN_SUCCESS);
13092
13093         queue_init(&req_q);
13094         amount_left = size;
13095
13096         while (amount_left > 0) {
13097                 vm_object_size_t        flush_size;
13098                 vm_object_t             object;
13099
13100                 vm_map_lock(map);
13101                 if (!vm_map_lookup_entry(map,
13102                                          vm_map_trunc_page(
13103                                                  address,
13104                                                  VM_MAP_PAGE_MASK(map)),
13105                                          &entry)) {
13106
13107                         vm_map_size_t   skip;
13108
13109                         /*
13110                          * hole in the address map.
13111                          */
13112                         had_hole = TRUE;
13113
13114                         /*
13115                          * Check for empty map.
13116                          */
13117                         if (entry == vm_map_to_entry(map) &&
13118                             entry->vme_next == entry) {
13119                                 vm_map_unlock(map);
13120                                 break;
13121                         }
13122                         /*
13123                          * Check that we don't wrap and that
13124                          * we have at least one real map entry.
13125                          */
13126                         if ((map->hdr.nentries == 0) ||
13127                             (entry->vme_next->vme_start < address)) {
13128                                 vm_map_unlock(map);
13129                                 break;
13130                         }
13131                         /*
13132                          * Move up to the next entry if needed
13133                          */
13134                         skip = (entry->vme_next->vme_start - address);
13135                         if (skip >= amount_left)
13136                                 amount_left = 0;
13137                         else
13138                                 amount_left -= skip;
13139                         address = entry->vme_next->vme_start;
13140                         vm_map_unlock(map);
13141                         continue;
13142                 }
13143
13144                 offset = address - entry->vme_start;
13145
13146                 /*
13147                  * do we have more to flush than is contained in this
13148                  * entry ?
13149                  */
13150                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
13151                         flush_size = entry->vme_end -
13152                                 (entry->vme_start + offset);
13153                 } else {
13154                         flush_size = amount_left;
13155                 }
13156                 amount_left -= flush_size;
13157                 address += flush_size;
13158
13159                 if (entry->is_sub_map == TRUE) {
13160                         vm_map_t        local_map;
13161                         vm_map_offset_t local_offset;
13162
13163                         local_map = entry->object.sub_map;
13164                         local_offset = entry->offset;
13165                         vm_map_unlock(map);
13166                         if (vm_map_msync(
13167                                     local_map,
13168                                     local_offset,
13169                                     flush_size,
13170                                     sync_flags) == KERN_INVALID_ADDRESS) {
13171                                 had_hole = TRUE;
13172                         }
13173                         continue;
13174                 }
13175                 object = entry->object.vm_object;
13176
13177                 /*
13178                  * We can't sync this object if the object has not been
13179                  * created yet
13180                  */
13181                 if (object == VM_OBJECT_NULL) {
13182                         vm_map_unlock(map);
13183                         continue;
13184                 }
13185                 offset += entry->offset;
13186
13187                 vm_object_lock(object);
13188
13189                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
13190                         int kill_pages = 0;
13191                         boolean_t reusable_pages = FALSE;
13192
13193                         if (sync_flags & VM_SYNC_KILLPAGES) {
13194                                 if (object->ref_count == 1 && !object->shadow)
13195                                         kill_pages = 1;
13196                                 else
13197                                         kill_pages = -1;
13198                         }
13199                         if (kill_pages != -1)
13200                                 vm_object_deactivate_pages(object, offset,
13201                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
13202                         vm_object_unlock(object);
13203                         vm_map_unlock(map);
13204                         continue;
13205                 }
13206                 /*
13207                  * We can't sync this object if there isn't a pager.
13208                  * Don't bother to sync internal objects, since there can't
13209                  * be any "permanent" storage for these objects anyway.
13210                  */
13211                 if ((object->pager == MEMORY_OBJECT_NULL) ||
13212                     (object->internal) || (object->private)) {
13213                         vm_object_unlock(object);
13214                         vm_map_unlock(map);
13215                         continue;
13216                 }
13217                 /*
13218                  * keep reference on the object until syncing is done
13219                  */
13220                 vm_object_reference_locked(object);
13221                 vm_object_unlock(object);
13222
13223                 vm_map_unlock(map);
13224
13225                 do_sync_req = vm_object_sync(object,
13226                                              offset,
13227                                              flush_size,
13228                                              sync_flags & VM_SYNC_INVALIDATE,
13229                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
13230                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
13231                                              sync_flags & VM_SYNC_SYNCHRONOUS);
13232                 /*
13233                  * only send a m_o_s if we returned pages or if the entry
13234                  * is writable (ie dirty pages may have already been sent back)
13235                  */
13236                 if (!do_sync_req) {
13237                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
13238                                 /*
13239                                  * clear out the clustering and read-ahead hints
13240                                  */
13241                                 vm_object_lock(object);
13242
13243                                 object->pages_created = 0;
13244                                 object->pages_used = 0;
13245                                 object->sequential = 0;
13246                                 object->last_alloc = 0;
13247
13248                                 vm_object_unlock(object);
13249                         }
13250                         vm_object_deallocate(object);
13251                         continue;
13252                 }
13253                 msync_req_alloc(new_msr);
13254
13255                 vm_object_lock(object);
13256                 offset += object->paging_offset;
13257
13258                 new_msr->offset = offset;
13259                 new_msr->length = flush_size;
13260                 new_msr->object = object;
13261                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
13262         re_iterate:
13263
13264                 /*
13265                  * We can't sync this object if there isn't a pager.  The
13266                  * pager can disappear anytime we're not holding the object
13267                  * lock.  So this has to be checked anytime we goto re_iterate.
13268                  */
13269
13270                 pager = object->pager;
13271
13272                 if (pager == MEMORY_OBJECT_NULL) {
13273                         vm_object_unlock(object);
13274                         vm_object_deallocate(object);
13275                         msync_req_free(new_msr);
13276                         new_msr = NULL;
13277                         continue;
13278                 }
13279
13280                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
13281                         /*
13282                          * need to check for overlapping entry, if found, wait
13283                          * on overlapping msr to be done, then reiterate
13284                          */
13285                         msr_lock(msr);
13286                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
13287                             ((offset >= msr->offset &&
13288                               offset < (msr->offset + msr->length)) ||
13289                              (msr->offset >= offset &&
13290                               msr->offset < (offset + flush_size))))
13291                         {
13292                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
13293                                 msr_unlock(msr);
13294                                 vm_object_unlock(object);
13295                                 thread_block(THREAD_CONTINUE_NULL);
13296                                 vm_object_lock(object);
13297                                 goto re_iterate;
13298                         }
13299                         msr_unlock(msr);
13300                 }/* queue_iterate */
13301
13302                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
13303
13304                 vm_object_paging_begin(object);
13305                 vm_object_unlock(object);
13306
13307                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
13308
13309                 (void) memory_object_synchronize(
13310                         pager,
13311                         offset,
13312                         flush_size,
13313                         sync_flags & ~VM_SYNC_CONTIGUOUS);
13314
13315                 vm_object_lock(object);
13316                 vm_object_paging_end(object);
13317                 vm_object_unlock(object);
13318         }/* while */
13319
13320         /*
13321          * wait for memory_object_sychronize_completed messages from pager(s)
13322          */
13323
13324         while (!queue_empty(&req_q)) {
13325                 msr = (msync_req_t)queue_first(&req_q);
13326                 msr_lock(msr);
13327                 while(msr->flag != VM_MSYNC_DONE) {
13328                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
13329                         msr_unlock(msr);
13330                         thread_block(THREAD_CONTINUE_NULL);
13331                         msr_lock(msr);
13332                 }/* while */
13333                 queue_remove(&req_q, msr, msync_req_t, req_q);
13334                 msr_unlock(msr);
13335                 vm_object_deallocate(msr->object);
13336                 msync_req_free(msr);
13337         }/* queue_iterate */
13338
13339         /* for proper msync() behaviour */
13340         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
13341                 return(KERN_INVALID_ADDRESS);
13342
13343         return(KERN_SUCCESS);
13344 }/* vm_msync */
13345
13346 /*
13347  *      Routine:        convert_port_entry_to_map
13348  *      Purpose:
13349  *              Convert from a port specifying an entry or a task
13350  *              to a map. Doesn't consume the port ref; produces a map ref,
13351  *              which may be null.  Unlike convert_port_to_map, the
13352  *              port may be task or a named entry backed.
13353  *      Conditions:
13354  *              Nothing locked.
13355  */
13356
13357
13358 vm_map_t
13359 convert_port_entry_to_map(
13360         ipc_port_t      port)
13361 {
13362         vm_map_t map;
13363         vm_named_entry_t        named_entry;
13364         uint32_t        try_failed_count = 0;
13365
13366         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13367                 while(TRUE) {
13368                         ip_lock(port);
13369                         if(ip_active(port) && (ip_kotype(port)
13370                                                == IKOT_NAMED_ENTRY)) {
13371                                 named_entry =
13372                                         (vm_named_entry_t)port->ip_kobject;
13373                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
13374                                         ip_unlock(port);
13375
13376                                         try_failed_count++;
13377                                         mutex_pause(try_failed_count);
13378                                         continue;
13379                                 }
13380                                 named_entry->ref_count++;
13381                                 lck_mtx_unlock(&(named_entry)->Lock);
13382                                 ip_unlock(port);
13383                                 if ((named_entry->is_sub_map) &&
13384                                     (named_entry->protection
13385                                      & VM_PROT_WRITE)) {
13386                                         map = named_entry->backing.map;
13387                                 } else {
13388                                         mach_destroy_memory_entry(port);
13389                                         return VM_MAP_NULL;
13390                                 }
13391                                 vm_map_reference_swap(map);
13392                                 mach_destroy_memory_entry(port);
13393                                 break;
13394                         }
13395                         else
13396                                 return VM_MAP_NULL;
13397                 }
13398         }
13399         else
13400                 map = convert_port_to_map(port);
13401
13402         return map;
13403 }
13404
13405 /*
13406  *      Routine:        convert_port_entry_to_object
13407  *      Purpose:
13408  *              Convert from a port specifying a named entry to an
13409  *              object. Doesn't consume the port ref; produces a map ref,
13410  *              which may be null.
13411  *      Conditions:
13412  *              Nothing locked.
13413  */
13414
13415
13416 vm_object_t
13417 convert_port_entry_to_object(
13418         ipc_port_t      port)
13419 {
13420         vm_object_t             object = VM_OBJECT_NULL;
13421         vm_named_entry_t        named_entry;
13422         uint32_t                try_failed_count = 0;
13423
13424         if (IP_VALID(port) &&
13425             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13426         try_again:
13427                 ip_lock(port);
13428                 if (ip_active(port) &&
13429                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13430                         named_entry = (vm_named_entry_t)port->ip_kobject;
13431                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
13432                                 ip_unlock(port);
13433                                 try_failed_count++;
13434                                 mutex_pause(try_failed_count);
13435                                 goto try_again;
13436                         }
13437                         named_entry->ref_count++;
13438                         lck_mtx_unlock(&(named_entry)->Lock);
13439                         ip_unlock(port);
13440                         if (!(named_entry->is_sub_map) &&
13441                             !(named_entry->is_pager) &&
13442                             !(named_entry->is_copy) &&
13443                             (named_entry->protection & VM_PROT_WRITE)) {
13444                                 object = named_entry->backing.object;
13445                                 vm_object_reference(object);
13446                         }
13447                         mach_destroy_memory_entry(port);
13448                 }
13449         }
13450
13451         return object;
13452 }
13453
13454 /*
13455  * Export routines to other components for the things we access locally through
13456  * macros.
13457  */
13458 #undef current_map
13459 vm_map_t
13460 current_map(void)
13461 {
13462         return (current_map_fast());
13463 }
13464
13465 /*
13466  *      vm_map_reference:
13467  *
13468  *      Most code internal to the osfmk will go through a
13469  *      macro defining this.  This is always here for the
13470  *      use of other kernel components.
13471  */
13472 #undef vm_map_reference
13473 void
13474 vm_map_reference(
13475         register vm_map_t       map)
13476 {
13477         if (map == VM_MAP_NULL)
13478                 return;
13479
13480         lck_mtx_lock(&map->s_lock);
13481 #if     TASK_SWAPPER
13482         assert(map->res_count > 0);
13483         assert(map->ref_count >= map->res_count);
13484         map->res_count++;
13485 #endif
13486         map->ref_count++;
13487         lck_mtx_unlock(&map->s_lock);
13488 }
13489
13490 /*
13491  *      vm_map_deallocate:
13492  *
13493  *      Removes a reference from the specified map,
13494  *      destroying it if no references remain.
13495  *      The map should not be locked.
13496  */
13497 void
13498 vm_map_deallocate(
13499         register vm_map_t       map)
13500 {
13501         unsigned int            ref;
13502
13503         if (map == VM_MAP_NULL)
13504                 return;
13505
13506         lck_mtx_lock(&map->s_lock);
13507         ref = --map->ref_count;
13508         if (ref > 0) {
13509                 vm_map_res_deallocate(map);
13510                 lck_mtx_unlock(&map->s_lock);
13511                 return;
13512         }
13513         assert(map->ref_count == 0);
13514         lck_mtx_unlock(&map->s_lock);
13515
13516 #if     TASK_SWAPPER
13517         /*
13518          * The map residence count isn't decremented here because
13519          * the vm_map_delete below will traverse the entire map,
13520          * deleting entries, and the residence counts on objects
13521          * and sharing maps will go away then.
13522          */
13523 #endif
13524
13525         vm_map_destroy(map, VM_MAP_NO_FLAGS);
13526 }
13527
13528
13529 void
13530 vm_map_disable_NX(vm_map_t map)
13531 {
13532         if (map == NULL)
13533                 return;
13534         if (map->pmap == NULL)
13535                 return;
13536
13537         pmap_disable_NX(map->pmap);
13538 }
13539
13540 void
13541 vm_map_disallow_data_exec(vm_map_t map)
13542 {
13543     if (map == NULL)
13544         return;
13545
13546     map->map_disallow_data_exec = TRUE;
13547 }
13548
13549 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
13550  * more descriptive.
13551  */
13552 void
13553 vm_map_set_32bit(vm_map_t map)
13554 {
13555         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
13556 }
13557
13558
13559 void
13560 vm_map_set_64bit(vm_map_t map)
13561 {
13562         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
13563 }
13564
13565 vm_map_offset_t
13566 vm_compute_max_offset(unsigned is64)
13567 {
13568         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
13569 }
13570
13571 uint64_t
13572 vm_map_get_max_aslr_slide_pages(vm_map_t map)
13573 {
13574         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
13575 }
13576
13577 boolean_t
13578 vm_map_is_64bit(
13579                 vm_map_t map)
13580 {
13581         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
13582 }
13583
13584 boolean_t
13585 vm_map_has_hard_pagezero(
13586                 vm_map_t        map,
13587                 vm_map_offset_t pagezero_size)
13588 {
13589         /*
13590          * XXX FBDP
13591          * We should lock the VM map (for read) here but we can get away
13592          * with it for now because there can't really be any race condition:
13593          * the VM map's min_offset is changed only when the VM map is created
13594          * and when the zero page is established (when the binary gets loaded),
13595          * and this routine gets called only when the task terminates and the
13596          * VM map is being torn down, and when a new map is created via
13597          * load_machfile()/execve().
13598          */
13599         return (map->min_offset >= pagezero_size);
13600 }
13601
13602 void
13603 vm_map_set_4GB_pagezero(vm_map_t map)
13604 {
13605 #pragma unused(map)
13606
13607 }
13608
13609 void
13610 vm_map_clear_4GB_pagezero(vm_map_t map)
13611 {
13612 #pragma unused(map)
13613 }
13614
13615 /*
13616  * Raise a VM map's maximun offset.
13617  */
13618 kern_return_t
13619 vm_map_raise_max_offset(
13620         vm_map_t        map,
13621         vm_map_offset_t new_max_offset)
13622 {
13623         kern_return_t   ret;
13624
13625         vm_map_lock(map);
13626         ret = KERN_INVALID_ADDRESS;
13627
13628         if (new_max_offset >= map->max_offset) {
13629                 if (!vm_map_is_64bit(map)) {
13630                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
13631                                 map->max_offset = new_max_offset;
13632                                 ret = KERN_SUCCESS;
13633                         }
13634                 } else {
13635                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
13636                                 map->max_offset = new_max_offset;
13637                                 ret = KERN_SUCCESS;
13638                         }
13639                 }
13640         }
13641
13642         vm_map_unlock(map);
13643         return ret;
13644 }
13645
13646
13647 /*
13648  * Raise a VM map's minimum offset.
13649  * To strictly enforce "page zero" reservation.
13650  */
13651 kern_return_t
13652 vm_map_raise_min_offset(
13653         vm_map_t        map,
13654         vm_map_offset_t new_min_offset)
13655 {
13656         vm_map_entry_t  first_entry;
13657
13658         new_min_offset = vm_map_round_page(new_min_offset,
13659                                            VM_MAP_PAGE_MASK(map));
13660
13661         vm_map_lock(map);
13662
13663         if (new_min_offset < map->min_offset) {
13664                 /*
13665                  * Can't move min_offset backwards, as that would expose
13666                  * a part of the address space that was previously, and for
13667                  * possibly good reasons, inaccessible.
13668                  */
13669                 vm_map_unlock(map);
13670                 return KERN_INVALID_ADDRESS;
13671         }
13672
13673         first_entry = vm_map_first_entry(map);
13674         if (first_entry != vm_map_to_entry(map) &&
13675             first_entry->vme_start < new_min_offset) {
13676                 /*
13677                  * Some memory was already allocated below the new
13678                  * minimun offset.  It's too late to change it now...
13679                  */
13680                 vm_map_unlock(map);
13681                 return KERN_NO_SPACE;
13682         }
13683
13684         map->min_offset = new_min_offset;
13685
13686         vm_map_unlock(map);
13687
13688         return KERN_SUCCESS;
13689 }
13690
13691 /*
13692  * Set the limit on the maximum amount of user wired memory allowed for this map.
13693  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
13694  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
13695  * don't have to reach over to the BSD data structures.
13696  */
13697
13698 void
13699 vm_map_set_user_wire_limit(vm_map_t     map,
13700                            vm_size_t    limit)
13701 {
13702         map->user_wire_limit = limit;
13703 }
13704
13705
13706 void vm_map_switch_protect(vm_map_t     map,
13707                            boolean_t    val)
13708 {
13709         vm_map_lock(map);
13710         map->switch_protect=val;
13711         vm_map_unlock(map);
13712 }
13713
13714 /*
13715  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
13716  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
13717  * bump both counters.
13718  */
13719 void
13720 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
13721 {
13722         pmap_t pmap = vm_map_pmap(map);
13723
13724         ledger_credit(pmap->ledger, task_ledgers.iokit_mem, bytes);
13725         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
13726 }
13727
13728 void
13729 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
13730 {
13731         pmap_t pmap = vm_map_pmap(map);
13732
13733         ledger_debit(pmap->ledger, task_ledgers.iokit_mem, bytes);
13734         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
13735 }
13736
13737 /* Add (generate) code signature for memory range */
13738 #if CONFIG_DYNAMIC_CODE_SIGNING
13739 kern_return_t vm_map_sign(vm_map_t map,
13740                  vm_map_offset_t start,
13741                  vm_map_offset_t end)
13742 {
13743         vm_map_entry_t entry;
13744         vm_page_t m;
13745         vm_object_t object;
13746
13747         /*
13748          * Vet all the input parameters and current type and state of the
13749          * underlaying object.  Return with an error if anything is amiss.
13750          */
13751         if (map == VM_MAP_NULL)
13752                 return(KERN_INVALID_ARGUMENT);
13753
13754         vm_map_lock_read(map);
13755
13756         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13757                 /*
13758                  * Must pass a valid non-submap address.
13759                  */
13760                 vm_map_unlock_read(map);
13761                 return(KERN_INVALID_ADDRESS);
13762         }
13763
13764         if((entry->vme_start > start) || (entry->vme_end < end)) {
13765                 /*
13766                  * Map entry doesn't cover the requested range. Not handling
13767                  * this situation currently.
13768                  */
13769                 vm_map_unlock_read(map);
13770                 return(KERN_INVALID_ARGUMENT);
13771         }
13772
13773         object = entry->object.vm_object;
13774         if (object == VM_OBJECT_NULL) {
13775                 /*
13776                  * Object must already be present or we can't sign.
13777                  */
13778                 vm_map_unlock_read(map);
13779                 return KERN_INVALID_ARGUMENT;
13780         }
13781
13782         vm_object_lock(object);
13783         vm_map_unlock_read(map);
13784
13785         while(start < end) {
13786                 uint32_t refmod;
13787
13788                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13789                 if (m==VM_PAGE_NULL) {
13790                         /* shoud we try to fault a page here? we can probably
13791                          * demand it exists and is locked for this request */
13792                         vm_object_unlock(object);
13793                         return KERN_FAILURE;
13794                 }
13795                 /* deal with special page status */
13796                 if (m->busy ||
13797                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13798                         vm_object_unlock(object);
13799                         return KERN_FAILURE;
13800                 }
13801
13802                 /* Page is OK... now "validate" it */
13803                 /* This is the place where we'll call out to create a code
13804                  * directory, later */
13805                 m->cs_validated = TRUE;
13806
13807                 /* The page is now "clean" for codesigning purposes. That means
13808                  * we don't consider it as modified (wpmapped) anymore. But
13809                  * we'll disconnect the page so we note any future modification
13810                  * attempts. */
13811                 m->wpmapped = FALSE;
13812                 refmod = pmap_disconnect(m->phys_page);
13813
13814                 /* Pull the dirty status from the pmap, since we cleared the
13815                  * wpmapped bit */
13816                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13817                         SET_PAGE_DIRTY(m, FALSE);
13818                 }
13819
13820                 /* On to the next page */
13821                 start += PAGE_SIZE;
13822         }
13823         vm_object_unlock(object);
13824
13825         return KERN_SUCCESS;
13826 }
13827 #endif
13828
13829 #if CONFIG_FREEZE
13830
13831 kern_return_t vm_map_freeze_walk(
13832                 vm_map_t map,
13833                 unsigned int *purgeable_count,
13834                 unsigned int *wired_count,
13835                 unsigned int *clean_count,
13836                 unsigned int *dirty_count,
13837                 unsigned int  dirty_budget,
13838                 boolean_t *has_shared)
13839 {
13840         vm_map_entry_t entry;
13841
13842         vm_map_lock_read(map);
13843
13844         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13845         *has_shared = FALSE;
13846
13847         for (entry = vm_map_first_entry(map);
13848              entry != vm_map_to_entry(map);
13849              entry = entry->vme_next) {
13850                 unsigned int purgeable, clean, dirty, wired;
13851                 boolean_t shared;
13852
13853                 if ((entry->object.vm_object == 0) ||
13854                     (entry->is_sub_map) ||
13855                     (entry->object.vm_object->phys_contiguous)) {
13856                         continue;
13857                 }
13858
13859                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
13860
13861                 *purgeable_count += purgeable;
13862                 *wired_count += wired;
13863                 *clean_count += clean;
13864                 *dirty_count += dirty;
13865
13866                 if (shared) {
13867                         *has_shared = TRUE;
13868                 }
13869
13870                 /* Adjust pageout budget and finish up if reached */
13871                 if (dirty_budget) {
13872                         dirty_budget -= dirty;
13873                         if (dirty_budget == 0) {
13874                                 break;
13875                         }
13876                 }
13877         }
13878
13879         vm_map_unlock_read(map);
13880
13881         return KERN_SUCCESS;
13882 }
13883
13884 kern_return_t vm_map_freeze(
13885                 vm_map_t map,
13886                 unsigned int *purgeable_count,
13887                 unsigned int *wired_count,
13888                 unsigned int *clean_count,
13889                 unsigned int *dirty_count,
13890                 unsigned int dirty_budget,
13891                 boolean_t *has_shared)
13892 {
13893         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
13894         kern_return_t   kr = KERN_SUCCESS;
13895         boolean_t       default_freezer_active = TRUE;
13896
13897         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13898         *has_shared = FALSE;
13899
13900         /*
13901          * We need the exclusive lock here so that we can
13902          * block any page faults or lookups while we are
13903          * in the middle of freezing this vm map.
13904          */
13905         vm_map_lock(map);
13906
13907         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13908                 default_freezer_active = FALSE;
13909         }
13910
13911         if (default_freezer_active) {
13912                 if (map->default_freezer_handle == NULL) {
13913                         map->default_freezer_handle = default_freezer_handle_allocate();
13914                 }
13915
13916                 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
13917                         /*
13918                          * Can happen if default_freezer_handle passed in is NULL
13919                          * Or, a table has already been allocated and associated
13920                          * with this handle, i.e. the map is already frozen.
13921                          */
13922                         goto done;
13923                 }
13924         }
13925
13926         for (entry2 = vm_map_first_entry(map);
13927              entry2 != vm_map_to_entry(map);
13928              entry2 = entry2->vme_next) {
13929
13930                 vm_object_t     src_object = entry2->object.vm_object;
13931
13932                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13933                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
13934                         if (default_freezer_active) {
13935                                 unsigned int purgeable, clean, dirty, wired;
13936                                 boolean_t shared;
13937
13938                                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
13939                                                                 src_object, map->default_freezer_handle);
13940
13941                                 *purgeable_count += purgeable;
13942                                 *wired_count += wired;
13943                                 *clean_count += clean;
13944                                 *dirty_count += dirty;
13945
13946                                 /* Adjust pageout budget and finish up if reached */
13947                                 if (dirty_budget) {
13948                                         dirty_budget -= dirty;
13949                                         if (dirty_budget == 0) {
13950                                                 break;
13951                                         }
13952                                 }
13953
13954                                 if (shared) {
13955                                         *has_shared = TRUE;
13956                                 }
13957                         } else {
13958                                 /*
13959                                  * To the compressor.
13960                                  */
13961                                 if (entry2->object.vm_object->internal == TRUE) {
13962                                         vm_object_pageout(entry2->object.vm_object);
13963                                 }
13964                         }
13965                 }
13966         }
13967
13968         if (default_freezer_active) {
13969                 /* Finally, throw out the pages to swap */
13970                 default_freezer_pageout(map->default_freezer_handle);
13971         }
13972
13973 done:
13974         vm_map_unlock(map);
13975
13976         return kr;
13977 }
13978
13979 kern_return_t
13980 vm_map_thaw(
13981         vm_map_t map)
13982 {
13983         kern_return_t kr = KERN_SUCCESS;
13984
13985         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13986                 /*
13987                  * We will on-demand thaw in the presence of the compressed pager.
13988                  */
13989                 return kr;
13990         }
13991
13992         vm_map_lock(map);
13993
13994         if (map->default_freezer_handle == NULL) {
13995                 /*
13996                  * This map is not in a frozen state.
13997                  */
13998                 kr = KERN_FAILURE;
13999                 goto out;
14000         }
14001
14002         kr = default_freezer_unpack(map->default_freezer_handle);
14003 out:
14004         vm_map_unlock(map);
14005
14006         return kr;
14007 }
14008 #endif
14009
14010 /*
14011  * vm_map_entry_should_cow_for_true_share:
14012  *
14013  * Determines if the map entry should be clipped and setup for copy-on-write
14014  * to avoid applying "true_share" to a large VM object when only a subset is
14015  * targeted.
14016  *
14017  * For now, we target only the map entries created for the Objective C
14018  * Garbage Collector, which initially have the following properties:
14019  *      - alias == VM_MEMORY_MALLOC
14020  *      - wired_count == 0
14021  *      - !needs_copy
14022  * and a VM object with:
14023  *      - internal
14024  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14025  *      - !true_share
14026  *      - vo_size == ANON_CHUNK_SIZE
14027  */
14028 boolean_t
14029 vm_map_entry_should_cow_for_true_share(
14030         vm_map_entry_t  entry)
14031 {
14032         vm_object_t     object;
14033
14034         if (entry->is_sub_map) {
14035                 /* entry does not point at a VM object */
14036                 return FALSE;
14037         }
14038
14039         if (entry->needs_copy) {
14040                 /* already set for copy_on_write: done! */
14041                 return FALSE;
14042         }
14043
14044         if (entry->alias != VM_MEMORY_MALLOC) {
14045                 /* not tagged as an ObjectiveC's Garbage Collector entry */
14046                 return FALSE;
14047         }
14048
14049         if (entry->wired_count) {
14050                 /* wired: can't change the map entry... */
14051                 return FALSE;
14052         }
14053
14054         object = entry->object.vm_object;
14055
14056         if (object == VM_OBJECT_NULL) {
14057                 /* no object yet... */
14058                 return FALSE;
14059         }
14060
14061         if (!object->internal) {
14062                 /* not an internal object */
14063                 return FALSE;
14064         }
14065
14066         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
14067                 /* not the default copy strategy */
14068                 return FALSE;
14069         }
14070
14071         if (object->true_share) {
14072                 /* already true_share: too late to avoid it */
14073                 return FALSE;
14074         }
14075
14076         if (object->vo_size != ANON_CHUNK_SIZE) {
14077                 /* not an object created for the ObjC Garbage Collector */
14078                 return FALSE;
14079         }
14080
14081         /*
14082          * All the criteria match: we have a large object being targeted for "true_share".
14083          * To limit the adverse side-effects linked with "true_share", tell the caller to
14084          * try and avoid setting up the entire object for "true_share" by clipping the
14085          * targeted range and setting it up for copy-on-write.
14086          */
14087         return TRUE;
14088 }
14089
14090 vm_map_offset_t
14091 vm_map_round_page_mask(
14092         vm_map_offset_t offset,
14093         vm_map_offset_t mask)
14094 {
14095         return VM_MAP_ROUND_PAGE(offset, mask);
14096 }
14097
14098 vm_map_offset_t
14099 vm_map_trunc_page_mask(
14100         vm_map_offset_t offset,
14101         vm_map_offset_t mask)
14102 {
14103         return VM_MAP_TRUNC_PAGE(offset, mask);
14104 }
14105
14106 int
14107 vm_map_page_shift(
14108         vm_map_t map)
14109 {
14110         return VM_MAP_PAGE_SHIFT(map);
14111 }
14112
14113 int
14114 vm_map_page_size(
14115         vm_map_t map)
14116 {
14117         return VM_MAP_PAGE_SIZE(map);
14118 }
14119
14120 int
14121 vm_map_page_mask(
14122         vm_map_t map)
14123 {
14124         return VM_MAP_PAGE_MASK(map);
14125 }
14126
14127 kern_return_t
14128 vm_map_set_page_shift(
14129         vm_map_t        map,
14130         int             pageshift)
14131 {
14132         if (map->hdr.nentries != 0) {
14133                 /* too late to change page size */
14134                 return KERN_FAILURE;
14135         }
14136
14137         map->hdr.page_shift = pageshift;
14138
14139         return KERN_SUCCESS;
14140 }
14141
14142 kern_return_t
14143 vm_map_query_volatile(
14144         vm_map_t        map,
14145         mach_vm_size_t  *volatile_virtual_size_p,
14146         mach_vm_size_t  *volatile_resident_size_p,
14147         mach_vm_size_t  *volatile_pmap_size_p)
14148 {
14149         mach_vm_size_t  volatile_virtual_size;
14150         mach_vm_size_t  volatile_resident_count;
14151         mach_vm_size_t  volatile_pmap_count;
14152         mach_vm_size_t  resident_count;
14153         vm_map_entry_t  entry;
14154         vm_object_t     object;
14155
14156         /* map should be locked by caller */
14157
14158         volatile_virtual_size = 0;
14159         volatile_resident_count = 0;
14160         volatile_pmap_count = 0;
14161
14162         for (entry = vm_map_first_entry(map);
14163              entry != vm_map_to_entry(map);
14164              entry = entry->vme_next) {
14165                 if (entry->is_sub_map) {
14166                         continue;
14167                 }
14168                 if (! (entry->protection & VM_PROT_WRITE)) {
14169                         continue;
14170                 }
14171                 object = entry->object.vm_object;
14172                 if (object == VM_OBJECT_NULL) {
14173                         continue;
14174                 }
14175                 if (object->purgable != VM_PURGABLE_VOLATILE) {
14176                         continue;
14177                 }
14178                 if (entry->offset != 0) {
14179                         /*
14180                          * If the map entry has been split and the object now
14181                          * appears several times in the VM map, we don't want
14182                          * to count the object's resident_page_count more than
14183                          * once.  We count it only for the first one, starting
14184                          * at offset 0 and ignore the other VM map entries.
14185                          */
14186                         continue;
14187                 }
14188                 resident_count = object->resident_page_count;
14189                 if ((entry->offset / PAGE_SIZE) >= resident_count) {
14190                         resident_count = 0;
14191                 } else {
14192                         resident_count -= (entry->offset / PAGE_SIZE);
14193                 }
14194
14195                 volatile_virtual_size += entry->vme_end - entry->vme_start;
14196                 volatile_resident_count += resident_count;
14197                 volatile_pmap_count += pmap_query_resident(map->pmap,
14198                                                            entry->vme_start,
14199                                                            entry->vme_end);
14200         }
14201
14202         /* map is still locked on return */
14203
14204         *volatile_virtual_size_p = volatile_virtual_size;
14205         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
14206         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
14207
14208         return KERN_SUCCESS;
14209 }